Iter #50: [tensor([-0.1028,  0.3674, -0.0416,  0.1550, -0.1525,  0.3630, -0.0542, -0.0562,
        -0.1442, -0.0258,  0.0749,  0.0462,  0.0856, -0.0773, -0.0111, -0.1637,
         0.0138,  0.1131, -0.1745, -0.0197,  0.1113,  0.0208,  0.3991, -0.4210,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3361, -1.3334,  0.0276, -0.1242, -0.0842, -0.7885,  0.1704, -0.5961,
        -0.9389,  0.0113, -0.5610,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2573, -0.9687, -2.1243, -0.3422,  0.0834,  0.1723, -0.1253,  0.0892,
        -0.1415, -0.3351,  0.0780, -0.2093, -0.0462,  0.3093,  0.0624, -0.0165,
        -0.0514,  0.3695, -0.1294, -0.0866,  0.4263,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7295,  0.2704, -0.0339,  0.1864, -0.5348, -0.0076,  0.1667, -0.1263,
        -0.0850,  0.0211, -0.1242,  0.2302,  0.1375,  0.0780,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3336,  0.4766, -0.3728,  0.3541, -0.0759,  0.4693,  0.1854,  0.0710,
        -0.3359, -0.1821,  0.6406, -1.2684,  0.3993,  0.2556, -0.1672,  0.2734,
         0.1628, -0.1045,  0.0517, -0.7212,  0.5751,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1631, -0.0006, -0.0101, -0.0673,  0.0579,  0.0380,  0.0524, -0.0136,
         0.0422,  0.0465,  0.1305, -0.2179,  0.0230,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0039, -0.0026, -0.0176,  0.0066, -0.0224, -0.0521, -0.0315, -0.0150,
         0.1342, -0.0055,  0.0029, -0.0006, -0.0964, -0.0679, -0.0222,  0.0169,
         0.0311, -0.0373, -0.0341,  0.0449, -0.0349,  0.0409,  0.0411, -0.0403,
         0.0544,  0.0254, -0.0083,  0.0160, -0.0061,  0.0546, -0.0205,  0.0036,
         0.1141,  0.0017,  0.0076, -0.0292, -0.0115,  0.0137,  0.0171, -0.0102,
        -0.0275, -0.0011, -0.0059, -0.0197, -0.0491], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2251e-01,  4.1048e-02, -5.9235e-02, -1.3624e-02, -3.4936e-03,
         3.3692e-02, -1.2845e-02,  3.0442e-02,  2.3450e-02,  6.9041e-03,
        -7.2344e-02, -2.5239e-02,  1.3238e-02,  1.3199e-04,  7.8244e-02,
         1.2217e-01, -7.0052e-02,  3.3083e-02, -4.6891e-02, -4.2912e-04,
        -1.8997e-02, -7.9487e-03, -1.9118e-02,  9.3745e-02, -7.9207e-03,
         2.4357e-01,  1.5765e-02, -6.4577e-02, -3.3928e-02,  9.0638e-02,
        -7.0306e-02,  8.7711e-02,  2.5722e-01,  1.4204e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0212, -0.3388, -0.0824, -0.0105, -0.0123, -0.1566, -0.0286, -0.0064,
        -0.0479, -0.0201,  0.0105,  0.0508, -0.0203,  0.0280,  0.0370,  0.0810,
        -0.0520, -0.0036, -0.0540, -0.0167, -0.0319, -0.0811, -0.0950, -0.0255,
         0.1431, -0.1685,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2138, -0.0070,  0.0958,  0.0733, -0.4161,  0.1472, -0.0342,  0.0771,
         0.0165,  0.0324,  0.0387,  0.0863,  0.1169,  0.1782,  0.0590, -0.0146,
        -0.0021, -0.1046,  0.0077,  0.0855,  0.0181,  0.2945,  0.0119,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0650, -0.0540, -0.0933, -0.0003,  0.0042, -0.0115,  0.1350, -0.0401,
        -0.0456, -0.0799, -0.0291,  0.0214, -0.0983, -0.0196, -0.0463, -0.0968,
         0.0634,  0.0125, -0.0153, -0.0194, -0.0011, -0.0330,  0.0131, -0.0198,
         0.0187,  0.0523, -0.0191, -0.0383,  0.0257,  0.0495,  0.0155,  0.0532,
         0.0245,  0.0696, -0.0272,  0.0085,  0.0086, -0.0448, -0.0092, -0.1229,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6201,  0.0092,  0.0220,  0.0284, -0.0356,  0.0079, -0.2384, -0.0707,
        -0.0336, -0.1302,  0.0459, -0.0298, -0.0345, -0.0748, -0.0385, -0.0775,
        -0.0650, -0.0632,  0.0678,  0.0033,  0.0498, -0.0123,  0.0560, -0.0384,
        -0.0554, -0.1280, -0.0015,  0.0421, -0.0162, -0.2165,  0.0377, -0.1752,
         0.0351,  0.1209,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0026,  0.1792, -0.0805, -0.0468, -0.1890, -0.0481, -0.1225, -0.0256,
        -0.0457, -0.0051,  0.0023, -0.0040,  0.1182, -0.0220, -0.0758, -0.0554,
        -0.0517,  0.0075, -0.0208, -0.0189, -0.0933,  0.0019,  0.0541, -0.0143,
        -0.0420, -0.0516, -0.0123, -0.0179, -0.1442, -0.0483, -0.0136, -0.0437,
        -0.0043, -0.0248, -0.0127, -0.0407, -0.0447,  0.0396,  0.0136,  0.0155,
         0.0144, -0.0451,  0.0004, -0.0068, -0.0011, -0.0332, -0.0230,  0.0228,
         0.0422, -0.0158,  0.1031,  0.0999], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0682, -0.0135, -0.0064, -0.0807,  0.0095,  0.0130,  0.0054, -0.0094,
        -0.0165, -0.0335, -0.0025, -0.0022,  0.0235,  0.0172, -0.0038, -0.0019,
         0.0040,  0.0305,  0.0020, -0.0615, -0.0275, -0.0309, -0.0149,  0.0018,
        -0.0080,  0.0028, -0.0469,  0.0229,  0.0864,  0.0207,  0.0924,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3823,  0.4033, -0.8239, -0.0978, -0.0471,  0.0628,  0.0301, -0.0213,
         0.0392, -0.1163, -0.0541,  0.0051,  0.2017, -0.1428,  0.0171, -0.0086,
         0.2919,  0.1195,  0.0118,  0.0940, -0.1020, -0.0201, -0.2259, -0.3472,
        -0.7057,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2395,  0.1294, -0.0698, -0.0900,  0.1947,  0.0059,  0.1350, -0.0399,
        -0.0887,  0.0355,  0.1236,  0.0110,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0760, -0.0028,  0.0314, -0.0281,  0.0408,  0.0227, -0.0180,  0.0290,
         0.0490, -0.0734, -0.1026, -0.0080, -0.0158,  0.0205,  0.1310, -0.0012,
         0.0403,  0.0007, -0.0222,  0.0396, -0.0289, -0.0721,  0.0059,  0.1106,
         0.0378,  0.0410, -0.0287, -0.0445, -0.0518,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0328, -0.0450, -0.4420, -0.0574,  0.2379, -0.3444, -0.2493,  2.1845,
        -0.2300,  0.1433, -0.3790, -0.8711,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3097, -0.2977, -0.2083, -0.0865, -0.0689, -0.0648, -0.0633, -0.5116,
         0.2252, -0.0873, -0.1689,  0.9139,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7478e+00,  5.9258e-01, -1.9182e+00,  8.9788e-01,  6.7558e-01,
         9.2935e+00, -1.7863e+00,  1.2088e+00, -1.2532e+00, -7.1485e-01,
         3.0193e-01,  1.7668e+00, -1.0082e+00, -9.7133e-02, -5.0110e-04,
        -6.0535e-02, -3.1716e+00, -1.0703e+00, -1.0754e+00,  2.0049e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0334, -0.0044,  0.0135,  0.0077,  0.0117, -0.0487,  0.0649,  0.0672,
         0.0168, -0.0052, -0.0096, -0.0070, -0.0227, -0.0142,  0.0212, -0.0662,
         0.0375, -0.0296, -0.0265, -0.0058, -0.0009, -0.0196,  0.0289,  0.0776,
         0.0233, -0.0301, -0.0085, -0.0015, -0.0097,  0.0338,  0.0053,  0.0035,
        -0.0210, -0.0112, -0.0071,  0.0534,  0.0702,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1420, -0.0189,  0.1238,  0.3840,  0.0435, -0.0919, -0.1192, -0.2261,
        -0.1241,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0509,  0.8358,  2.0360,  0.3640,  0.0094, -0.3320,  0.2181,  0.0544,
        -0.1609, -0.0998, -0.2998,  0.0115,  0.1860, -0.1942,  0.1817,  0.1496,
         0.0971, -0.0255,  0.0996,  0.2044, -0.4227,  0.0835, -0.0126, -0.2006,
         0.1692,  0.3123,  0.2480,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0203,  0.0143, -0.1509, -0.1268,  0.1350,  0.0091, -0.0882,  0.0598,
        -0.0021,  0.1039, -0.3255, -0.0402,  0.0026, -0.0540, -0.0635,  0.1346,
        -0.0352, -0.0299, -0.1550, -0.0507, -0.0110, -0.1026,  0.0109, -0.2408,
        -0.0620,  0.0644,  0.0258,  0.0314, -0.0012, -0.0472, -0.1265,  0.0535,
        -0.2275,  0.2040,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.1190, -0.0799, -0.0476, -0.0529,  0.0329, -0.0600, -0.0195,  0.0590,
         0.0058, -0.0193,  0.1453, -0.0477, -0.0277,  0.0208, -0.0049,  0.0246,
         0.0380,  0.0704,  0.0085,  0.0155,  0.0192,  0.0068, -0.0328,  0.0069,
         0.0498, -0.0207,  0.0445, -0.0582, -0.0426, -0.1560, -0.0234,  0.0008,
        -0.0195, -0.0566,  0.0256, -0.0338, -0.0234, -0.0191,  0.0320, -0.0601,
         0.0052, -0.0069,  0.0105,  0.0500, -0.2199,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4309, -0.7536, -0.8425, -0.9369,  0.6520,  0.1371,  0.5134,  0.0679,
         0.6108,  0.4434,  0.9693,  0.3366,  1.3130, -0.5739,  1.7902,  1.3624,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9380, -1.9839, -1.6499, -0.0031,  0.1948,  0.6360, -0.2396, -0.2249,
         0.1429, -0.1146, -0.1186,  0.1731,  0.3342, -0.0586, -0.1840,  0.1981,
        -0.2154, -0.0254, -0.1888,  0.1720, -0.0368, -0.0540, -0.6134,  0.6068,
        -0.6636,  0.7043,  0.7678,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0425,  0.6759,  0.0528, -0.0197,  0.0241, -0.0289, -0.0302, -0.0016,
        -0.0138,  0.0052,  0.0049, -0.0155,  0.0110, -0.0148, -0.0205,  0.0233,
         0.0102, -0.0132, -0.0264, -0.0414,  0.0522, -0.0137,  0.0498,  0.0007,
        -0.0466, -0.0262, -0.0409,  0.0277,  0.0400, -0.0120,  0.0689, -0.0103,
         0.0248,  0.1497,  0.1118,  0.1996, -0.0021,  0.0317,  0.0025,  0.0709,
        -0.0224,  0.0078, -0.0252, -0.0429,  0.0362,  0.0462], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0832,  0.1605,  0.0110, -0.0011, -0.0142, -0.0370, -0.0286, -0.0699,
         0.0285,  0.0288,  0.0173, -0.0881,  0.0075, -0.0646,  0.0506,  0.0463,
        -0.0173,  0.0237,  0.0290, -0.0414,  0.0427,  0.0065, -0.0036, -0.0215,
        -0.0115,  0.0243, -0.0932,  0.2750,  0.0097,  0.0044,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0032,  0.0360, -0.2442, -0.1840, -0.0107,  0.0789,  0.0031,  0.0173,
         0.2896, -0.0189, -0.4454,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1844, -0.0234,  0.0599, -0.0080, -0.0009,  0.0250, -0.0134,  0.0414,
         0.0090, -0.0277, -0.0476, -0.0968, -0.0373,  0.0043, -0.0587, -0.0929,
        -0.0307, -0.0446,  0.0080, -0.1367,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0413, -0.0584,  0.0183,  0.0312,  0.0502, -0.0153, -0.0420, -0.0581,
        -0.0434, -0.1109, -0.1681,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0598, -0.0604, -0.0562, -0.0022,  0.1085,  0.0133, -0.0759, -0.1237,
        -0.0572, -0.1975, -0.0561, -0.0425, -0.1298, -0.0576, -0.0239,  0.0144,
         0.0329, -0.0240, -0.0161, -0.0275, -0.1555,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0285,  0.0096,  0.0946,  0.0669, -0.0347, -0.0304, -0.1987, -0.2308,
        -0.0265, -0.0408, -0.0881,  0.1116,  0.0183,  0.0171,  0.0257,  0.1148,
         0.0418,  0.0544,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0069,  0.0561,  0.0511,  0.0276,  0.0208,  0.0440, -0.1193, -0.0300,
         0.0354, -0.0145,  0.0135, -0.0212,  0.0530,  0.0196, -0.0108, -0.0133,
        -0.0280,  0.0060,  0.0067,  0.0252,  0.0271, -0.0111,  0.0362, -0.0205,
        -0.0006,  0.0729,  0.0204,  0.0198, -0.0163,  0.0266, -0.0002, -0.0062,
         0.0143, -0.0327, -0.0154,  0.0169, -0.0308,  0.1879,  0.0353,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0186,  0.0115,  0.0691,  0.0319,  0.0292,  0.0225, -0.0684,  0.0998,
         0.0491,  0.0642, -0.0428,  0.0386,  0.0722,  0.0456, -0.0371,  0.0290,
         0.0061, -0.0359, -0.0013,  0.0533, -0.1321, -0.0301, -0.0010,  0.0149,
         0.0310,  0.0122, -0.0190, -0.0833,  0.0198,  0.0117, -0.0440,  0.0003,
        -0.1343,  0.0501,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.3028, -0.2593,  0.0607,  0.0181,  0.1655, -0.1356,  0.2238,  0.0294,
         0.0198,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2942,  0.0010,  0.0660, -0.0465, -0.0458, -0.0457, -0.0140, -0.0153,
         0.0636, -0.0590, -0.0131,  0.0022,  0.0392, -0.0226,  0.0954, -0.0029,
        -0.0177,  0.0021, -0.0281, -0.0120,  0.0254, -0.0061, -0.0691,  0.0917,
         0.0648, -0.0305, -0.1001,  0.0215,  0.0392,  0.0135, -0.0281, -0.0162,
        -0.0431, -0.0615,  0.0048,  0.0241,  0.0079, -0.0093,  0.0724, -0.0478,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3466,  0.1556, -0.0132, -0.0512, -0.0045, -0.0105, -0.0688,  0.0702,
        -0.0105,  0.0212,  0.0337,  0.0116, -0.0150,  0.0357,  0.0626, -0.0004,
        -0.0687,  0.0183,  0.0272,  0.0042, -0.0500, -0.0334,  0.0401, -0.0728,
         0.0079, -0.0536, -0.0618, -0.0176,  0.0643,  0.0638,  0.2452, -0.0390,
         0.0739, -0.0319, -0.0165, -0.0289, -0.0213,  0.0107,  0.0121,  0.0495,
        -0.0028, -0.0112,  0.0541,  0.0004, -0.0263,  0.0129, -0.0680,  0.0032,
         0.0234,  0.0272, -0.1361, -0.0784,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1003, -0.0003,  0.0211, -0.0100,  0.0595, -0.0491,  0.0274, -0.0474,
        -0.0117,  0.0498,  0.2357, -0.0204, -0.0600, -0.1034, -0.1215, -0.0955,
        -0.0143, -0.0031,  0.0087, -0.0372, -0.0874,  0.0423,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0010, -0.0630,  0.0081, -0.0004, -0.0937, -0.0544,  0.0874,  0.0579,
        -0.0497, -0.0814, -0.1417,  0.0569,  0.0315,  0.0323,  0.0229,  0.0026,
         0.0181, -0.0227,  0.0245,  0.0573, -0.0179,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0433, -0.0141,  0.0113, -0.0399, -0.0332, -0.0083, -0.0811,  0.0476,
        -0.0137, -0.0187,  0.0006, -0.0088,  0.0368, -0.0143, -0.0125,  0.0080,
         0.0575,  0.0205,  0.0126,  0.0391,  0.0158,  0.0609,  0.0013,  0.0843,
        -0.0015,  0.0270,  0.0164,  0.0117,  0.0187,  0.0166,  0.0411,  0.0022,
        -0.0078,  0.0011, -0.0073,  0.0287,  0.0238, -0.0173,  0.0062, -0.0280,
         0.0445,  0.0029, -0.1034, -0.0662, -0.0220, -0.0180,  0.0146,  0.0016,
         0.0302, -0.0335, -0.0008, -0.0169,  0.0459, -0.1132, -0.0231],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0718, -0.1102, -0.0398, -0.0182, -0.0668, -0.0076, -0.0156, -0.0033,
         0.0263,  0.0462,  0.0385,  0.0509, -0.0325, -0.0204,  0.0049,  0.0360,
         0.0035,  0.0375,  0.0292, -0.0031, -0.0507, -0.0344, -0.0092,  0.0266,
        -0.0230,  0.0084, -0.0626, -0.0843,  0.0179, -0.0170, -0.2099, -0.0784,
        -0.0022, -0.0645,  0.0556, -0.0104, -0.0079,  0.0327, -0.0244, -0.0064,
        -0.0110, -0.0300,  0.0194, -0.0244, -0.0830,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1407,  0.0337,  0.0054,  0.0622,  0.0725,  0.0760, -0.0483,  0.0232,
         0.0956,  0.0020,  0.0333, -0.0312, -0.0140,  0.0900,  0.0159, -0.0145,
        -0.0072,  0.0203,  0.0439,  0.0022, -0.0113,  0.0483,  0.2443,  0.0808,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2104,  0.0760, -0.0502, -0.0896, -0.1191,  0.0810, -0.1448,  0.1780,
        -0.0882, -0.0716,  0.0442, -0.0448,  0.0992, -0.0977, -0.0111, -0.0059,
         0.1487,  0.0775, -0.0122,  0.0244, -0.0432,  0.0157, -0.0149, -0.0007,
         0.0280,  0.0073,  0.0452, -0.0305,  0.0072,  0.0145, -0.0813, -0.2741,
         0.0238,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0503, -0.0236, -0.0208,  0.0450, -0.0099,  0.1835,  0.0229,  0.0136,
        -0.0395,  0.0611, -0.0909,  0.0768, -0.0644,  0.0140,  0.0020, -0.0732,
         0.0839,  0.0214, -0.0476,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0939,  0.3025, -0.1706, -0.0532,  0.0647, -0.0441,  0.0287,  0.0234,
        -0.0511,  0.0706,  0.0230, -0.0116,  0.0761, -0.0234,  0.0125,  0.0180,
         0.0878, -0.0028,  0.0096,  0.0220,  0.0151,  0.0339,  0.0660, -0.0744,
        -0.0762,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0844, -0.1302, -0.0258, -0.0123, -0.1305,  0.0531, -0.1101, -0.3599,
         0.0743, -0.0429, -0.0073,  0.1215, -0.0109,  0.0274,  0.0358,  0.1188,
        -0.3661,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.0059, -0.0851,  0.0286, -0.1133,  0.0654, -0.0201, -0.0497,  0.0776,
        -0.0204, -0.0499,  0.0259, -0.0202, -0.0134, -0.0976, -0.0266,  0.0115,
         0.0043, -0.0116,  0.0388,  0.0250,  0.0238,  0.0167, -0.0081,  0.0156,
         0.0698,  0.0256,  0.0612, -0.0285,  0.0742,  0.1167,  0.0105, -0.0511,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1926, -0.0101,  0.0225, -0.0250, -0.0016,  0.0046, -0.0109, -0.0027,
        -0.0013,  0.0211, -0.0342, -0.0017,  0.0467,  0.0273,  0.0417,  0.0987,
         0.0104, -0.0376,  0.0342,  0.0901, -0.0022, -0.0147, -0.0081, -0.0195,
         0.0025, -0.0144,  0.0268,  0.0425,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0018,  0.0931,  0.1870,  0.3378, -0.1687,  0.0042,  0.0860,  0.0058,
         0.0105,  0.0450, -0.0535,  0.0338, -0.0176, -0.0198, -0.0241, -0.0280,
        -0.0528,  0.0034, -0.0772,  0.1588, -0.0074, -0.0234, -0.0213, -0.1549,
         0.0161,  0.0380, -0.2257, -0.1127, -0.1465,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0529, -0.0575,  0.0415,  0.0283, -0.0089,  0.0243,  0.1840, -0.0045,
         0.0109, -0.0214, -0.0325,  0.0121, -0.0252, -0.0307, -0.2036, -0.0499,
        -0.0050, -0.0037,  0.0109,  0.0686,  0.0763, -0.0040,  0.0261,  0.0262,
         0.0057, -0.0229, -0.0371, -0.0196,  0.0405,  0.0586,  0.0239, -0.0474,
        -0.0671,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1777, -0.0356,  0.0164,  0.0195, -0.0489, -0.0447, -0.0861, -0.0252,
         0.0085, -0.0243,  0.0189, -0.0158,  0.0174,  0.0018,  0.0192, -0.0151,
        -0.0418, -0.0359,  0.0075, -0.1771, -0.0008,  0.1836, -0.0549, -0.0731,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0267, -0.0432, -0.0250, -0.0169, -0.0528,  0.0195, -0.0456, -0.0021,
         0.0356, -0.0239,  0.0084, -0.0042,  0.0086, -0.0050, -0.0260,  0.0055,
         0.0055, -0.0058,  0.0014, -0.0150, -0.0137, -0.0439, -0.0032,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4627e-01, -4.3877e-02,  2.0486e-02, -2.5681e-02, -3.6715e-02,
        -3.1007e-02, -2.4475e-01,  1.5811e-03, -3.2313e-02, -1.0740e-01,
         9.1522e-02, -3.7627e-02, -1.1865e-04, -9.2375e-03,  3.1953e-02,
        -3.7335e-02, -9.3984e-04, -1.3161e-02, -1.8035e-02,  6.4552e-02,
        -5.0895e-03, -1.0028e-01,  3.5105e-02, -1.4337e-02, -4.6066e-02,
         1.4479e-02,  3.2837e-02, -5.0021e-03,  1.9513e-02, -2.8835e-03,
        -5.1785e-02,  3.3181e-02,  1.5298e-02,  8.1742e-02,  1.1807e-02,
        -2.9879e-02, -8.0432e-03, -1.4795e-01, -3.8116e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0572, -0.1284, -0.0030, -0.0107,  0.0256, -0.0659,  0.0560,  0.0671,
        -0.0366,  0.0096, -0.0105, -0.0074,  0.0137, -0.0838,  0.0271,  0.0285,
         0.0728, -0.0048,  0.0227, -0.0159, -0.0480,  0.0569, -0.0298, -0.2470,
         0.0850, -0.0280,  0.0888,  0.0898, -0.0355,  0.0065,  0.0502,  0.1219,
         0.0692,  0.1470, -0.1239, -0.0071,  0.0068,  0.0016,  0.0035,  0.0136,
        -0.0085,  0.0823, -0.0892,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0276,  0.0099,  0.0045,  0.0192,  0.0315, -0.0036, -0.0315, -0.0135,
         0.0187,  0.0732,  0.0341,  0.0038, -0.0281, -0.0168, -0.0031, -0.0225,
        -0.0071,  0.0211,  0.0115, -0.0334, -0.0103, -0.0212, -0.0340, -0.0199,
         0.0197, -0.0177, -0.0063, -0.0022, -0.0321, -0.0246, -0.0154,  0.0400,
        -0.0083,  0.0270, -0.0614, -0.0709, -0.2317, -0.0402, -0.0141,  0.0497,
         0.0199,  0.0261, -0.0083,  0.0935, -0.0078,  0.0341,  0.0006,  0.0056,
         0.0006,  0.0161,  0.0224,  0.0310, -0.0113,  0.0082,  0.0407, -0.0970,
        -0.1087], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3290,  0.0503, -0.0545, -0.0658, -0.0125, -0.0159, -0.0056, -0.1167,
        -0.0169,  0.0389, -0.0387,  0.0039,  0.0260, -0.0152,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4200e-01,  8.3675e-02, -1.0567e-01,  1.9476e-01,  3.5832e-02,
        -9.8126e-04, -1.5536e-01,  9.8338e-03, -3.5564e-02, -2.1142e-02,
        -8.4899e-06,  3.1304e-02,  3.3698e-02,  4.7513e-02, -8.0063e-02,
        -2.4651e-01, -1.5470e-01,  2.2713e-02,  1.3209e-01, -4.5122e-03,
         4.7850e-02,  1.6323e-02, -3.3821e-02,  1.5864e-02, -1.8583e-01,
        -3.3233e-02, -3.1982e-02, -1.5649e-02, -5.1416e-02, -6.3523e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1797,  0.3863,  0.1503, -0.1781, -0.0942,  0.0443,  0.1604, -0.0306,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.0202,  0.0432, -0.0208,  0.0335,  0.0241, -0.0432,  0.0106, -0.0170,
         0.0300,  0.0693, -0.0292, -0.0744,  0.0097, -0.0006, -0.0014, -0.0364,
         0.0549, -0.0078,  0.0670, -0.0215,  0.0258,  0.0240, -0.0468,  0.0458,
        -0.0078, -0.0094, -0.0675,  0.0651, -0.0032, -0.0128, -0.1482, -0.2932,
        -0.1119,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8864e-02,  5.9843e-02,  4.1345e-02,  8.8673e-04, -3.4791e-02,
        -6.8406e-03,  2.4516e-02, -1.2877e-02,  6.2031e-02,  3.4577e-02,
         1.6232e-02,  2.6078e-02,  4.3447e-02,  1.8331e-02, -1.1748e-02,
         3.4149e-02,  3.9152e-03,  3.0154e-02,  3.8886e-02,  3.0873e-02,
         1.6464e-02,  5.8283e-02, -7.7074e-02,  1.6832e-02, -8.4976e-03,
         7.7540e-02, -1.5515e-04, -3.1010e-02, -2.4002e-02,  7.5519e-03,
        -1.2387e-02,  1.0417e-02,  2.0246e-03, -3.1598e-02, -1.3226e-02,
         4.4352e-02,  2.4725e-02,  3.6932e-02,  2.9278e-02, -1.6009e-02,
        -1.7053e-02, -4.9546e-02, -5.2079e-02,  9.0346e-02,  1.4385e-01,
         1.2163e-01,  4.3571e-02,  7.1396e-02,  2.6733e-02, -2.9862e-02,
         1.3701e-02,  7.5660e-02,  1.7281e-02,  2.3961e-02,  3.1941e-02,
         1.6989e-02,  4.4737e-02, -4.3012e-03,  1.5120e-02, -4.5063e-02,
         1.7501e-02, -6.0792e-03, -7.4611e-02,  7.3096e-02,  4.1360e-03,
        -1.0610e-02, -1.5966e-02,  2.9378e-02,  4.3287e-02,  1.6544e-01,
         8.4712e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0486, -0.0216, -0.0277, -0.0057, -0.0546, -0.0198, -0.0227, -0.0113,
        -0.0120,  0.0284,  0.0050, -0.0024, -0.0278,  0.0475,  0.0473,  0.0218,
         0.0111,  0.0066, -0.0216, -0.0193, -0.0518,  0.0200,  0.0120, -0.0346,
        -0.0005, -0.0161, -0.0548,  0.0076,  0.0059,  0.0711,  0.0023, -0.0082,
         0.0079,  0.0092, -0.0200, -0.0258, -0.0007, -0.0184,  0.0124, -0.0056,
         0.0260,  0.1166,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0572, -0.0756, -0.0406, -0.0741, -0.0648, -0.0078, -0.0702, -0.0433,
         0.0009, -0.0090,  0.0013, -0.0015,  0.0208, -0.0051, -0.0006,  0.0193,
         0.0174, -0.0107,  0.0142, -0.0425, -0.0045, -0.0523,  0.0044, -0.0376,
        -0.0231,  0.0159, -0.0741, -0.0141,  0.0289, -0.0714, -0.0488, -0.0395,
         0.0040,  0.0071,  0.0885, -0.0249, -0.0675,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0457,  0.0657,  0.1585,  0.1365,  0.0797,  0.1474,  0.1315,  0.1322,
        -0.2764,  0.0346,  0.1367, -0.0408, -0.0518,  0.2950, -0.0285,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3588,  0.0920, -0.0573,  0.0422,  0.0311,  0.0018, -0.0025,  0.0246,
         0.0041, -0.0258, -0.0646, -0.1378,  0.0372, -0.0390, -0.0310,  0.1127,
         0.0704,  0.2107,  0.0352,  0.2907,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0087, -0.0219, -0.0427, -0.0461, -0.0024, -0.0200, -0.0150, -0.0093,
        -0.0125,  0.0029, -0.0020, -0.0122,  0.0278,  0.0032,  0.0214,  0.0387,
         0.0303,  0.0095, -0.0511,  0.0318, -0.0218,  0.0023,  0.0363,  0.0166,
         0.0072, -0.0175, -0.0019, -0.0191, -0.0224,  0.0538, -0.0107, -0.0448,
         0.0035, -0.0036,  0.0245,  0.0064,  0.0570, -0.0039, -0.0270, -0.0063,
         0.0354, -0.0472,  0.0222,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0974,  0.0778, -0.1171, -0.1111, -0.0431, -0.0605, -0.0120, -0.0780,
        -0.0454,  0.0471, -0.0625, -0.0215,  0.0195,  0.0136,  0.0332,  0.0398,
        -0.0222, -0.0212,  0.0101, -0.0146,  0.0149,  0.0300,  0.0256, -0.0051,
        -0.0269,  0.0340,  0.0266,  0.0814, -0.0125,  0.0727, -0.0115,  0.0294,
         0.0723,  0.0025,  0.0170,  0.0288, -0.0459,  0.1176,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1972,  0.0347, -0.0331,  0.0907, -0.0474,  0.0034,  0.1878,  0.0019,
         0.0193,  0.0963,  0.0068, -0.0750,  0.0831,  0.0032, -0.0007,  0.0273,
        -0.0757,  0.0156, -0.1406,  0.0373, -0.2249,  0.0112,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2272, -0.0207, -0.0176, -0.0757, -0.0465,  0.1318,  0.0109, -0.0663,
        -0.1778,  0.0203, -0.0110, -0.0205,  0.0079, -0.0109, -0.0012, -0.0003,
         0.0261, -0.0471, -0.0287, -0.0071,  0.0306,  0.0047,  0.0488,  0.0230,
        -0.0466,  0.0240, -0.0290,  0.0148, -0.0102, -0.0359, -0.0325, -0.0714,
        -0.0351, -0.0111, -0.0161,  0.0461,  0.0693, -0.0217, -0.0921, -0.0340,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0239,  0.0013,  0.1191,  0.2373, -0.0393,  0.0328, -0.0019,  0.0152,
         0.0291, -0.0637, -0.0217,  0.0161,  0.0331, -0.0220, -0.0065, -0.0693,
         0.0032,  0.0885, -0.0210,  0.0187,  0.0006, -0.0069, -0.0598,  0.0102,
         0.0403,  0.0022, -0.0290,  0.0136,  0.0190, -0.0078,  0.0106,  0.0854,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5198e-02, -8.1432e-02, -1.2953e-02,  2.4281e-02, -1.1865e-04,
        -1.1655e-02, -1.0692e-01, -1.7019e-02, -1.8322e-02,  2.8984e-02,
        -2.6096e-03,  1.5598e-02,  4.4875e-02,  7.6857e-03,  7.2675e-02,
         1.9220e-02, -1.1594e-02,  4.3177e-02, -3.7476e-03,  6.7474e-05,
         1.9775e-02,  4.1642e-02,  4.1357e-02,  2.5903e-02,  9.3507e-02,
         1.4094e-01,  1.9438e-02, -4.3238e-03, -2.9646e-03, -1.6061e-02,
         9.1896e-03,  2.4276e-04,  9.8680e-04, -2.5586e-01, -2.4460e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.0343,  0.0054,  0.0869,  0.0735,  0.0692, -0.0240,  0.0144,  0.0439,
        -0.0098,  0.0208,  0.0089, -0.0013,  0.0567,  0.0006, -0.0356, -0.0502,
         0.0036, -0.0070,  0.0031,  0.0148, -0.0719,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1548, -0.0740, -0.0640, -0.0365, -0.0123, -0.1060,  0.0067, -0.0544,
         0.0278, -0.0032, -0.0466, -0.0266,  0.0470,  0.1461, -0.0180, -0.0430,
         0.0266, -0.0895, -0.0248,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0478,  0.3377, -0.1095, -0.0147, -0.0054, -0.0081,  0.0408, -0.1534,
         0.0085,  0.2408,  0.0384,  0.0212,  0.0019,  0.0990, -0.0146,  0.0049,
         0.1072,  0.0181,  0.1025, -0.0666,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0389, -0.2739, -0.1633,  0.0683, -0.1026, -0.0286, -0.0119,  0.1115,
         0.0788, -0.0534,  0.0055, -0.0350, -0.1058, -0.0416, -0.0814,  0.0476,
        -0.0397,  0.0957,  0.0812,  0.0388, -0.0345,  0.3145, -0.0355,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0969, -0.0007,  0.0105, -0.0461,  0.0414,  0.0205, -0.0091, -0.0106,
         0.0612, -0.0528, -0.0275,  0.0513,  0.0659,  0.0050,  0.0860, -0.0389,
         0.0012, -0.0138, -0.0657, -0.1823,  0.0057, -0.0614,  0.1232, -0.1273,
        -0.0471,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0011,  0.0603,  0.0233,  0.0402,  0.0031,  0.0085,  0.0325, -0.0610,
        -0.0264,  0.2313,  0.0220, -0.0071,  0.0186, -0.0280, -0.0655, -0.0276,
        -0.0190,  0.0822, -0.0470,  0.0258, -0.0336, -0.0205,  0.0239,  0.0251,
         0.0552, -0.0229, -0.0050, -0.0441,  0.0096,  0.0815,  0.0306, -0.0690,
        -0.0122, -0.0788,  0.0065,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2188, -0.0260,  0.2046,  0.0690,  0.0823, -0.0079,  0.0475,  0.0043,
         0.0239,  0.1020,  0.0811,  0.0267,  0.1174,  0.0438, -0.1110,  0.0272,
         0.0241, -0.0362,  0.1124,  0.0899, -0.0634, -0.0576,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1504,  0.0716,  0.1946,  0.0125, -0.0213, -0.0469,  0.0379, -0.0065,
         0.0342,  0.0104,  0.0766,  0.0118,  0.0943, -0.0737, -0.1063,  0.0157,
        -0.0765,  0.0223, -0.0030, -0.0006,  0.1379,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0714, -0.1813, -0.0069, -0.0490,  0.0097, -0.0739, -0.0241,  0.0163,
        -0.1572, -0.0222, -0.1360, -0.0021,  0.0624, -0.0691, -0.1884,  0.0144,
        -0.0447, -0.0719, -0.0552,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7403e-01, -5.9744e-01,  1.3207e-01,  1.4182e-01, -2.6922e-02,
         6.3526e-02,  5.8538e-03, -8.5244e-02, -7.8660e-02, -4.1623e-02,
        -7.4257e-02,  4.3787e-02,  2.2696e-02,  5.3425e-02,  4.3476e-02,
        -5.0309e-02, -4.5706e-02,  3.1278e-02, -1.2227e-02,  1.0764e-01,
        -1.0373e-01,  1.7891e-01,  6.3940e-02, -8.9373e-02,  1.1731e-01,
         4.5800e-02,  6.6564e-01,  6.5635e-02,  8.0912e-02,  3.3939e-02,
         5.1239e-04, -5.5376e-02, -1.3575e-02, -2.1335e-02,  2.3350e-03,
         1.4720e-02, -2.4060e-01,  1.7900e-03, -5.3268e-02,  2.5084e-02,
         7.1115e-02, -5.7007e-02,  5.6048e-02,  1.0799e-01,  4.8286e-04,
        -5.5236e-02, -1.9764e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2145,  0.0140, -0.0591, -0.0068,  0.0454,  0.0066, -0.0079,  0.1564,
         0.0017,  0.0119, -0.0328,  0.0131, -0.0271, -0.0988,  0.0125,  0.0790,
        -0.0012,  0.0015, -0.0826, -0.0284, -0.0653, -0.0351,  0.0456, -0.0491,
        -0.0134,  0.1158, -0.0368, -0.0045,  0.0447,  0.1163,  0.0196, -0.0734,
        -0.1220, -0.1375,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1447,  0.0006,  0.0633, -0.0285, -0.0083,  0.0231,  0.0320, -0.0940,
         0.0183,  0.0326,  0.0844,  0.0066, -0.0194, -0.0239,  0.0172, -0.0693,
        -0.0802, -0.0217,  0.1183,  0.0170,  0.0431,  0.0023, -0.0252, -0.0600,
        -0.0896,  0.0051, -0.0283, -0.0486, -0.0025, -0.0362, -0.0399, -0.0293,
         0.0482,  0.0199,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.1042, -0.0433,  0.1898,  0.0008, -0.0026,  0.0085, -0.0591, -0.0548,
        -0.0226, -0.0284, -0.0722, -0.0318,  0.0051, -0.1139, -0.1801, -0.0136,
         0.0173,  0.0170,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0851, -0.0284, -0.0976, -0.0792, -0.0053,  0.0485,  0.0312, -0.0675,
        -0.0069,  0.0109, -0.0210,  0.1589,  0.1018, -0.1054,  0.1176,  0.2597,
        -0.0794, -0.0148, -0.0435,  0.0386,  0.0108,  0.1081,  0.0052, -0.0159,
         0.0169,  0.0828,  0.0199,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2007, -0.0924, -0.1658,  0.0656, -0.2363,  0.4558, -0.5219, -0.4553,
        -0.2139, -0.2148, -0.0611,  0.0574, -0.1542, -0.6963,  0.0431, -0.5903,
        -0.1871, -0.1286, -0.2433,  0.1632, -0.1808,  0.0415, -0.1052,  0.1288,
         0.0960, -0.2714,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1276,  0.0535,  0.0060,  0.0337, -0.0140,  0.1134,  0.0595, -0.0623,
        -0.0664, -0.0053, -0.1474,  0.0058,  0.1450,  0.0509, -0.0719, -0.0357,
         0.0540, -0.0305, -0.0961,  0.1495,  0.1711,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2249,  0.0477, -0.0187, -0.0040,  0.0154, -0.0336,  0.0700, -0.0141,
         0.0524, -0.0698, -0.0928, -0.0773, -0.0879, -0.0398, -0.0392,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1304,  0.0281,  0.0684,  0.1039, -0.3324, -0.1671, -0.2011,  0.0834,
        -0.1055,  0.0284, -0.1536, -0.0261, -0.0666, -0.0831, -0.1221, -0.1309,
        -0.0186,  0.1496,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0433,  0.0252, -0.0113,  0.0287,  0.0925,  0.2478,  0.0790, -0.0228,
         0.0229, -0.0316, -0.0079,  0.0425,  0.0387, -0.0657, -0.0602,  0.0223,
         0.0210,  0.1979,  0.0842,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4170e-01, -5.3505e-02,  1.7608e-02, -1.2357e-02,  2.4920e-03,
        -2.7089e-02, -9.8667e-02,  4.2905e-02, -1.9309e-02,  1.4437e-03,
         1.5222e-02,  1.1083e-01, -1.4055e-04, -1.9762e-02, -1.5845e-02,
        -1.1603e-01,  3.0904e-02, -7.4552e-03,  9.3554e-02, -2.6699e-03,
        -3.4477e-02,  6.3608e-02, -2.6827e-02,  2.3421e-02, -3.2619e-02,
        -4.9505e-02, -7.5586e-02, -4.9302e-02,  1.1968e-02, -3.8891e-03,
        -8.5498e-03, -6.3612e-02, -4.5882e-02, -2.8680e-02, -3.4421e-02,
         1.1786e-01, -6.1005e-03,  8.2743e-02, -3.4602e-02,  3.6744e-03,
        -5.0701e-02,  3.4790e-02,  1.2692e-01, -6.3433e-03], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0249, -0.0122, -0.0489, -0.1578,  0.0236,  0.0964,  0.0387,  0.1058,
        -0.0300,  0.0461, -0.0088, -0.0838,  0.0510, -0.0136, -0.0250, -0.0582,
         0.1132, -0.0821, -0.0321, -0.0050, -0.0550, -0.0030, -0.0361,  0.0225,
        -0.0409, -0.0290, -0.1070, -0.0337,  0.0223,  0.0297, -0.0161,  0.0146,
        -0.0513,  0.0064, -0.1535,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0813, -0.1895, -0.1190, -0.0028,  0.1559,  0.1998, -0.0756,  0.0392,
        -0.0505, -0.0107,  0.0520, -0.1087, -0.0853, -0.0518,  0.1114,  0.0024,
        -0.0696, -0.0189,  0.0523,  0.0118, -0.0733,  0.0209, -0.1335, -0.2090,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0270, -0.0510,  0.0120, -0.0290,  0.0038, -0.0010,  0.0041, -0.0317,
        -0.0015,  0.0030, -0.0371, -0.0326, -0.0130,  0.0052, -0.0444,  0.0048,
        -0.0163,  0.0268, -0.0072, -0.0835, -0.0074, -0.0540, -0.0161, -0.0380,
         0.0383, -0.0346,  0.0209, -0.0019,  0.0354,  0.0080,  0.0437,  0.0005,
        -0.0154,  0.1538,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0262, -0.4054, -0.2167, -0.1523, -0.0191,  0.0082, -0.0480,  0.0894,
        -0.0259, -0.0756, -0.0042,  0.0241, -0.0025,  0.0241, -0.0062, -0.0197,
        -0.0057, -0.0043, -0.1287, -0.0825, -0.0146, -0.0134,  0.0144,  0.0161,
        -0.0166,  0.0804,  0.0190, -0.0310, -0.1021, -0.0600,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.0368, -0.4716, -0.1302, -0.0307, -0.0523, -0.0418, -0.0147, -0.0153,
        -0.0126, -0.1981, -0.2481,  0.0483, -0.0831,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ -0.4157,   7.4314,  -5.6716, -11.3378,   5.6260,   0.5437,  -4.0170,
         -1.6675,  -0.8051,  -0.2545,   0.5589,   1.5457,  -4.0502,  -2.8121,
          1.6447,   5.1724,   9.1710,   7.6388,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0898, -0.0053, -0.0633, -0.0392,  0.0048,  0.0675,  0.0488, -0.0129,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1187, -0.0115, -0.0245,  0.0309, -0.0402,  0.0104, -0.0035, -0.0511,
         0.0330, -0.0189, -0.0569,  0.0115, -0.0042, -0.0733, -0.0087, -0.0249,
         0.0215, -0.0543,  0.0081,  0.0077, -0.1382,  0.0228,  0.0400, -0.0679,
         0.0133, -0.0183, -0.0200,  0.0390,  0.0282, -0.0164,  0.0756,  0.0255,
         0.0265,  0.0345,  0.0393,  0.0409, -0.0532,  0.0462,  0.0663, -0.0093,
         0.0079,  0.0025, -0.0176, -0.0288, -0.1372, -0.0495, -0.1124],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4927, -0.0186,  0.2983,  0.0484, -0.2099,  0.0477, -0.0221,  0.0811,
        -0.1853, -0.1150,  0.1678,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2542,  0.0493, -0.0116, -0.0443,  0.0443,  0.0262, -0.0249, -0.1234,
         0.0238, -0.0113, -0.0011,  0.0488, -0.0762, -0.0094,  0.1594, -0.0744,
        -0.1938,  0.0913,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3727,  0.0976, -0.0324, -0.1218, -0.0761, -0.0951,  0.0050,  0.0451,
        -0.0688,  0.0178, -0.0344,  0.0358, -0.1676,  0.0549, -0.3760,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4164,  0.1157, -0.1050, -0.2131, -0.2383,  0.1062, -0.0017, -0.6831,
        -0.2579, -0.0794, -0.1414,  0.1025,  0.0597, -0.3377, -0.1206,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0402, -0.1441,  0.0016, -0.0308, -0.1563,  0.0879,  0.0777,  0.0766,
        -0.0234,  0.0865, -0.0605,  0.0253, -0.0199, -0.1017, -0.0216, -0.0054,
        -0.0471,  0.1174, -0.0956,  0.0066,  0.0515, -0.1048, -0.0294, -0.2131,
         0.0257,  0.1224,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0667,  0.0246, -0.1563, -0.1838,  0.0543, -0.0438,  0.1409, -0.0225,
        -0.0269,  0.0012,  0.0362, -0.0340,  0.0171,  0.1955,  0.0054,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0881, -0.0115,  0.0013,  0.0108, -0.0129, -0.0534,  0.0192, -0.0185,
        -0.0022,  0.0078, -0.0482, -0.0113,  0.0011, -0.0048,  0.0632,  0.0052,
        -0.0302, -0.0233,  0.0116, -0.0420, -0.0102, -0.0431,  0.0258, -0.0067,
        -0.0706,  0.1219,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1958, -0.1934, -0.0249,  0.1555, -0.0168,  0.0128, -0.0412,  0.0935,
         0.0123,  0.0186,  0.1396, -0.0207, -0.0025, -0.0644, -0.0363, -0.0023,
        -0.2127,  0.0457,  0.0117,  0.0549, -0.0731,  0.0354, -0.0286,  0.0339,
        -0.0261,  0.2557,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.0437, -0.0214, -0.0463, -0.0567, -0.0378,  0.0090, -0.0338, -0.0160,
        -0.0103, -0.0356, -0.0904, -0.0131, -0.0136,  0.0736, -0.0203,  0.0314,
        -0.0632,  0.0951, -0.0048, -0.0426,  0.0329, -0.0438, -0.0499, -0.0284,
        -0.0304, -0.0480,  0.0049, -0.0253, -0.0180, -0.0127, -0.0480,  0.0404,
         0.0104, -0.0083, -0.0922,  0.1036,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1374, -0.2432, -0.2150, -0.2026,  0.0429, -0.0390,  0.0294,  0.0265,
        -0.0197, -0.0096, -0.0430,  0.0529,  0.0516, -0.0947,  0.0377, -0.0818,
        -0.1095,  0.0340,  0.0842, -0.2843, -0.3185,  0.0020,  0.1361, -0.0898,
        -0.0501, -0.0066, -0.0585,  0.0217, -0.0111,  0.1362, -0.0269,  0.0438,
        -0.1145,  0.2048, -0.8288,  0.1357,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0355,  0.0022,  0.0468,  0.0090, -0.1428,  0.1127,  0.0692,  0.0110,
        -0.1172,  0.0579,  0.0330,  0.0030,  0.0267, -0.0092, -0.0276,  0.0286,
         0.0258,  0.1291, -0.0021, -0.0905,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0832, -0.1184,  0.0089,  0.0216, -0.0505, -0.0568,  0.0290, -0.0210,
         0.0168, -0.0251, -0.0381, -0.0249,  0.0168, -0.0381,  0.0011, -0.0419,
        -0.0132, -0.0351,  0.0468,  0.0205,  0.0117,  0.0337, -0.0006, -0.0357,
        -0.1234, -0.0566, -0.0518,  0.1011, -0.0063,  0.0730, -0.1323, -0.0178,
        -0.0835, -0.0129,  0.0645, -0.0973, -0.2054, -0.0163,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0429e-01,  1.7473e-02,  2.7493e-02,  1.4063e-02,  2.0231e-03,
         2.2126e-02, -2.5162e-02, -1.5549e-02,  8.2916e-05, -5.4017e-02,
         1.4253e-02, -1.9923e-02, -1.2046e-03, -5.1968e-03,  6.5536e-02,
        -1.3182e-02,  2.3591e-02, -1.0961e-04, -1.3675e-02, -3.1261e-02,
        -4.3144e-02, -1.1651e-02,  1.3520e-02, -7.6394e-02, -3.4184e-02,
        -3.7446e-02, -2.1621e-02,  7.5156e-03,  6.5137e-04,  5.7207e-02,
         3.3352e-03, -3.4808e-02, -5.4077e-03, -2.3685e-02,  5.0643e-03,
         4.7379e-02,  1.9996e-02,  8.4209e-02, -1.8896e-02,  4.8064e-02,
         9.7464e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6384, -0.6197, -0.1450, -0.2331,  0.3135,  0.0872,  0.0065,  0.0134,
        -0.0680,  0.0168,  0.0309,  0.0457,  0.0687, -0.5019,  0.2567, -0.0986,
         0.0310,  0.0123,  0.0832,  0.0038,  0.2542, -0.7301,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0587,  0.0382,  0.0259, -0.0272,  0.0179, -0.0201, -0.0494, -0.0588,
         0.0016, -0.0167, -0.1113, -0.0027,  0.0264,  0.0018,  0.0521,  0.0134,
        -0.0037, -0.0424,  0.0423,  0.0365,  0.0573, -0.0154,  0.0377,  0.0108,
         0.0418, -0.0337,  0.0023, -0.0420, -0.0980,  0.0257,  0.0201,  0.0458,
        -0.0318,  0.3073, -0.1336,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0988, -0.0786, -0.1030, -0.1950, -0.0030,  0.1408,  0.0953,  0.1101,
         0.0361, -0.0888, -0.0317, -0.0015, -0.0668,  0.0051,  0.0424, -0.1044,
        -0.0947,  0.0529, -0.0185,  0.0403, -0.0133, -0.0356, -0.0540,  0.0386,
         0.0043, -0.0579, -0.0070,  0.0571, -0.0559,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3069,  0.0493,  0.0775,  0.0611,  0.0126,  0.0012,  0.0283, -0.0319,
        -0.0543,  0.2035, -0.0629, -0.0545, -0.0130, -0.0118,  0.0718,  0.0123,
        -0.0398, -0.0460, -0.1078, -0.0225,  0.0481, -0.2461,  0.1007, -0.2405,
         0.1128,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4170,  0.1073, -0.0560, -0.2245,  0.0357,  0.0737,  0.0173,  0.0044,
        -0.0220,  0.0999,  0.0626,  0.0380,  0.0183,  0.1912,  0.0328,  0.0250,
        -0.0035, -0.0432,  0.0193,  0.1494, -0.1146, -0.1790, -0.1680,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7635,  0.1254,  0.0044, -0.1606,  0.0027,  0.3997, -0.2832,  0.1139,
         0.0349,  0.0425,  0.1150,  0.0341,  0.0173, -0.0213, -0.0071, -0.1724,
         0.0293, -0.0225,  0.0110,  0.0044,  0.0816, -0.1306,  0.1814,  0.3549,
         0.1637,  0.0830,  0.0858,  0.0019,  0.0565, -0.0055,  0.0262, -0.0272,
        -0.1458, -0.1036, -0.2373, -0.2868, -0.1307, -0.1824, -0.2412,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5315e-01, -6.9583e-01, -3.4192e-01, -1.3714e-01,  1.8326e-02,
        -1.0520e-01,  6.9710e-03, -2.2686e-02,  2.1238e-01, -1.9819e-02,
         6.0770e-02, -1.2680e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.3395, -0.0730, -0.0342,  0.0485,  0.0205, -0.0991, -0.0407, -0.0290,
        -0.0675,  0.0018, -0.0457,  0.0096, -0.0106, -0.0214, -0.0038, -0.0183,
         0.0219, -0.0225, -0.0088,  0.0052,  0.0104,  0.0019,  0.0188,  0.0291,
         0.0491, -0.0850,  0.1995, -0.2272, -0.0136,  0.0898, -0.0698, -0.0642,
        -0.0448, -0.0235, -0.0851, -0.1426,  0.0025,  0.0023,  0.0559,  0.0079,
        -0.0029, -0.0277, -0.0622,  0.0198,  0.0691,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2133,  0.0169,  0.0005,  0.0071,  0.2745, -0.0932, -0.1109,  0.0648,
        -0.0046,  0.1885,  0.0192, -0.0383,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1963, -0.1487,  0.3132, -0.2805,  0.2837, -0.1171, -0.4662, -0.0876,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0048,  0.0587, -0.0190,  0.0498,  0.0246,  0.0007, -0.0144, -0.0282,
         0.0083, -0.0057,  0.0262,  0.1548, -0.0295, -0.0219, -0.0110,  0.0379,
        -0.0207, -0.0848, -0.0145,  0.0101,  0.0046,  0.0496,  0.0148, -0.0714,
        -0.0119,  0.0162,  0.0089, -0.0073,  0.0018,  0.0240,  0.0568,  0.0444,
         0.0269,  0.0438,  0.0016, -0.0215,  0.0456, -0.0067,  0.0133,  0.0309,
         0.0006,  0.0118,  0.0320, -0.0425, -0.0060, -0.1862, -0.1253, -0.0325,
        -0.0551, -0.0356, -0.0022,  0.0471,  0.0070,  0.0312,  0.0135,  0.0165,
        -0.0181, -0.0161,  0.0169, -0.0357, -0.1421], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1314, -0.0555, -0.1203, -0.0575,  0.0448,  0.0281, -0.0420, -0.0447,
        -0.0249, -0.0335, -0.0312, -0.0168, -0.0167, -0.0282,  0.0079, -0.0199,
         0.0142, -0.0361, -0.0176, -0.0189,  0.0473,  0.0240,  0.0137, -0.0968,
        -0.0258,  0.0009,  0.0174, -0.0185, -0.0298,  0.0420,  0.0765,  0.1016,
         0.0394,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3544, -0.0514,  0.0020, -0.0383, -0.0750,  0.0603, -0.0255,  0.0015,
         0.0440, -0.1352,  0.0313, -0.0019, -0.0807, -0.0317,  0.0244,  0.2351,
        -0.0125, -0.0030, -0.0268, -0.0508, -0.2335,  0.0950,  0.0333, -0.2472,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1626, -0.1849, -0.2214,  0.1301, -0.0394, -0.0552, -0.0953,  0.0799,
         0.0105, -0.0556,  0.1139,  0.0759, -0.1455, -0.0572,  0.0202,  0.0044,
         0.0231,  0.1218,  0.0566,  0.0696,  0.2055,  0.2838, -0.1111,  0.3614,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4225,  0.3837, -2.4513, -0.8169, -0.7394,  0.4910, -4.7609, -0.8301,
        -0.6608, -1.7283,  0.4472, -0.2335, -0.2981, -0.2101, -0.0648,  0.2874,
        -0.7064, -1.0875, -1.3905, -1.0848, -0.5607,  0.0492, -0.6023,  0.4671,
         0.6460,  1.0628, -0.6057,  0.3779, -0.5490, -0.1508,  0.1893, -1.8125,
        -2.9340,  0.4346,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0261, -0.1991, -0.1177,  0.0653, -0.0900, -0.1542, -0.0795,  0.1318,
         0.0551,  0.0074, -0.0025, -0.0236,  0.0599,  0.1615, -0.0814, -0.0955,
        -0.0133, -0.0244,  0.0142, -0.0255, -0.0663,  0.0133, -0.1385,  0.0012,
         0.2736,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0724,  0.1655, -0.0044,  0.0247,  0.0332, -0.0181, -0.0551,  0.0294,
         0.0639, -0.0319, -0.3757,  0.0230, -0.0375, -0.0661,  0.2474, -0.1174,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2326,  0.0749, -0.0025,  0.1195, -0.0116,  0.0314, -0.0277, -0.0121,
        -0.1306, -0.0376,  0.0143,  0.0296,  0.0468,  0.0564,  0.0983,  0.0500,
         0.0495, -0.0204,  0.0011, -0.0175, -0.0580, -0.0326,  0.2228,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0881, -0.5039, -0.1315, -0.0021, -0.0541, -0.0757, -0.0045, -0.0453,
        -0.1246,  0.0420,  0.0554, -0.0540, -0.0589, -0.1666,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.8937e-01,  1.6164e-03, -4.1777e-02, -9.0929e-03, -2.6801e-02,
         1.4525e-01, -2.5935e-04,  4.4033e-02, -8.2783e-03, -2.7880e-02,
        -6.4210e-02, -5.3169e-04,  2.0506e-02, -3.2411e-02,  5.5196e-02,
        -1.4784e-01, -7.8329e-02,  2.8600e-02,  1.5312e-01, -1.5319e-01,
        -1.0386e-01, -4.6412e-02,  6.2925e-02,  9.1790e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4081,  0.3428,  0.0293, -0.1796,  0.1568, -0.2236, -0.0092, -0.0710,
        -0.1151, -0.0486,  0.1020,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1417, -0.4459,  0.0604,  0.0852, -0.0761, -0.0385, -0.0649, -0.0463,
        -0.0392, -0.0295, -0.0109, -0.0376, -0.0423, -0.1013, -0.0047, -0.0168,
        -0.0336, -0.0390, -0.0095, -0.0223, -0.0866,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0034, -0.0350, -0.2092,  0.0494,  0.0143, -0.1085, -0.0993, -0.1192,
         0.0288,  0.0243, -0.0548, -0.2085, -0.0739,  0.1242,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1298,  0.0540, -0.0614, -0.1434,  0.1597,  0.0816,  0.0337, -0.1850,
        -0.0074,  0.1241, -0.0133, -0.1610,  0.0964, -0.0321, -0.0070,  0.0339,
         0.0187, -0.1212, -0.0840,  0.0253, -0.2067,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1801, -0.0167, -0.0118, -0.0034,  0.0195, -0.0317,  0.0541, -0.0275,
        -0.2570, -0.0572,  0.0109, -0.1763, -0.0861,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2360,  0.0237, -0.0240,  0.0441,  0.0312,  0.0129, -0.0084,  0.0034,
         0.1571, -0.0458,  0.0019,  0.0365,  0.0078,  0.0164,  0.0117, -0.0274,
        -0.0171,  0.0316, -0.0509,  0.0111, -0.0050,  0.0756, -0.1058, -0.0814,
        -0.0415, -0.0216, -0.0279, -0.0153, -0.0478, -0.0706, -0.0024, -0.0110,
        -0.0354,  0.0023,  0.0091,  0.0084, -0.0158, -0.0057,  0.0010,  0.0238,
        -0.0100,  0.0003,  0.0050,  0.0849, -0.0961], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0776,  0.0171, -0.0289, -0.0274, -0.0265, -0.0083, -0.0197,  0.0156,
        -0.0367, -0.0274, -0.0487, -0.0034, -0.0470, -0.0006, -0.0711, -0.0043,
         0.0343,  0.0109, -0.0108,  0.0152, -0.0664, -0.0477,  0.0149, -0.0561,
        -0.0098, -0.0552, -0.0974,  0.0493, -0.0829, -0.1004,  0.0995, -0.0308,
         0.1323, -0.0873,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0947, -0.1498, -0.0429, -0.0827, -0.0211, -0.0632, -0.0316, -0.0490,
        -0.0350, -0.0561, -0.0365,  0.0109,  0.0740, -0.1442,  0.0228, -0.0025,
        -0.1046,  0.0481,  0.0285, -0.0049, -0.1326, -0.0692, -0.0286,  0.0122,
         0.0352,  0.1836,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0727, -0.2984,  0.1928,  0.0371, -0.4219, -0.0874, -0.2574,  0.0471,
         0.0381, -0.0361,  0.0169,  0.0130, -0.0331, -0.1264, -0.1212, -0.0275,
         0.1097, -0.0748,  0.0204, -0.0568, -0.0081, -0.0432,  0.0256,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7756e-01, -6.9217e-02, -2.5477e-02,  5.6067e-02, -1.9085e-02,
         5.5882e-03, -1.2629e-02, -5.7331e-03, -2.0643e-02,  7.4537e-03,
        -2.5570e-02, -2.6969e-02,  2.6250e-02, -1.0694e-02, -1.0548e-02,
        -1.0282e-02, -3.7336e-02,  2.3042e-04,  1.0521e-03,  2.2205e-04,
         2.1192e-02,  1.3654e-02, -3.2918e-02, -9.5578e-02,  4.2404e-02,
        -6.7924e-02,  2.0656e-02, -6.3017e-04,  7.0112e-02, -4.0210e-02,
         3.9254e-02, -6.0022e-03,  1.0511e-01,  3.3645e-02,  4.0836e-02,
         9.0644e-02, -8.3072e-03,  9.2709e-02,  1.0604e-03, -2.0076e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0527, -0.0122,  0.0316, -0.0176, -0.0343,  0.0136, -0.1039,  0.0020,
        -0.0614, -0.0150,  0.0086,  0.0033, -0.0466, -0.0124, -0.0158, -0.0168,
        -0.0345,  0.0592,  0.0627,  0.0244,  0.0517, -0.0033,  0.0005, -0.0241,
         0.0020, -0.0319,  0.0267, -0.0065, -0.0295,  0.0569,  0.0721,  0.1203,
         0.0288, -0.0016,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.0729e-01,  3.4075e-02, -4.8138e-02, -2.4952e-02, -5.2249e-02,
        -5.2172e-02,  8.9353e-03, -2.3792e-02, -2.9739e-02,  4.0213e-04,
         6.6082e-03, -3.4610e-03, -2.8328e-02,  4.8216e-02, -8.8241e-02,
        -1.5426e-01, -3.1993e-03,  2.0527e-02,  5.7357e-03,  2.6253e-02,
        -4.8961e-02,  1.3372e-02, -1.5302e-01, -2.7127e-02,  5.0256e-05,
        -4.4433e-02, -7.9116e-04,  3.7025e-03,  1.1383e-03,  1.7885e-03,
         5.6112e-02,  9.4076e-03, -1.8061e-02, -1.5874e-02, -2.4475e-02,
        -4.5581e-03,  2.7302e-03, -4.5697e-03,  2.4031e-02, -5.1422e-04,
        -2.5506e-03,  2.3468e-02,  9.2528e-03, -4.0145e-02, -1.2141e-02,
         4.6078e-03, -3.1174e-02,  7.8512e-03, -8.2847e-03, -5.8287e-02,
        -9.4577e-03, -4.5634e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0305, -0.0561, -0.0222, -0.0414, -0.0026,  0.0270,  0.0300, -0.0463,
         0.0156, -0.0282,  0.0080,  0.0512, -0.0169,  0.0040, -0.0525,  0.0117,
        -0.0953, -0.0068, -0.0170,  0.0348,  0.0111,  0.0353,  0.0195, -0.0337,
        -0.0714, -0.1287, -0.0652,  0.0166,  0.0891, -0.0859, -0.0017,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0375,  0.0615, -0.0300, -0.0050,  0.0056,  0.0736,  0.0253,  0.1036,
        -0.0190,  0.0626, -0.0388, -0.0431,  0.1443, -0.1258,  0.0391,  0.0031,
         0.1319,  0.0077,  0.0253,  0.0205,  0.0557, -0.0505,  0.0350, -0.1144,
         0.0172,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0404,  0.1078, -0.0495, -0.0088, -0.0518,  0.0233,  0.0356, -0.1866,
         0.0710,  0.1563, -0.1176,  0.0726,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0352,  0.0768, -0.1786,  0.0635, -0.0221, -0.0700,  0.0526,  0.0265,
        -0.0430,  0.2253,  0.0310,  0.0344,  0.0457, -0.0313,  0.0933,  0.0182,
        -0.0911, -0.0267, -0.0610, -0.0636, -0.0245,  0.1688,  0.0044, -0.0438,
        -0.0836,  0.0071, -0.0138,  0.0302, -0.0740,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1833, -0.0151,  0.0041, -0.1727,  0.0743, -0.0943,  0.0055,  0.0450,
         0.0874,  0.0860, -0.1784,  0.0364,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0212, -0.1528, -0.1514,  0.0179, -0.1539, -0.0428,  0.0177, -0.0923,
         0.0121, -0.0363,  0.0577,  0.1890,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1545,  0.0403, -0.2147, -0.0903, -0.0126,  0.1538, -0.0061,  0.0625,
         0.0264, -0.0664, -0.0421, -0.0058,  0.0076,  0.0335, -0.0127, -0.0294,
         0.0458, -0.0620,  0.0112,  0.1727,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0493, -0.0177, -0.0226, -0.0109, -0.0041, -0.0213,  0.0520,  0.0243,
        -0.0068,  0.0541,  0.0243,  0.0092,  0.0092,  0.0594,  0.0910,  0.0317,
         0.0049,  0.0625,  0.0601, -0.0212,  0.0294,  0.0147,  0.0456,  0.0047,
        -0.0233,  0.0087, -0.0467, -0.0153,  0.0147,  0.0517,  0.0254, -0.0071,
        -0.0221, -0.0087,  0.0657,  0.0081,  0.0257,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1808, -0.1991, -0.0633, -0.0779, -0.0144,  0.0630,  0.1691,  0.0444,
         0.0806,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1685,  0.9292,  0.5947,  0.0508,  0.0839, -0.5246,  0.1275,  0.1487,
        -0.0217,  0.1440, -0.0803, -0.0313, -0.0486,  0.0505,  0.4957,  0.0987,
         0.0299, -0.3892,  0.2548, -0.1323, -0.4492, -0.0172, -0.3604,  0.0394,
        -0.0051, -0.5526,  0.4045,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0800,  0.0363,  0.0369, -0.0012,  0.0188, -0.0365,  0.0179, -0.0070,
         0.0093,  0.1580,  0.0083, -0.0152,  0.0136,  0.1000,  0.0014, -0.0392,
        -0.0037,  0.0221, -0.1143, -0.0717,  0.1610,  0.0184, -0.0321, -0.0336,
         0.0282,  0.1326,  0.0074,  0.0016, -0.1197, -0.0130, -0.0081, -0.0424,
        -0.1307, -0.1309,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.0826,  0.0169,  0.0339, -0.0377, -0.0255,  0.0069, -0.0081, -0.0315,
        -0.0178,  0.0398, -0.0489,  0.0121,  0.0202,  0.0040,  0.0185,  0.0570,
        -0.0134, -0.0162,  0.0018,  0.0022,  0.0329,  0.0134, -0.0193, -0.0202,
        -0.0324,  0.0361, -0.0144,  0.0967,  0.0162,  0.0138,  0.0346,  0.0835,
        -0.0194,  0.0250, -0.0466, -0.0074, -0.0194, -0.0283,  0.0280,  0.0386,
        -0.0103,  0.0073,  0.0442,  0.0223, -0.1320,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1769e-02, -7.3808e-03, -2.0097e-01, -4.8722e-02,  2.1779e-02,
         5.7717e-03,  1.6514e-01, -2.2835e-02,  2.7978e-01,  4.5397e-02,
        -9.5829e-02,  2.4369e-02,  5.3007e-03,  9.8720e-06, -6.1045e-02,
        -9.6267e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1328e+00, -1.7550e-01,  7.6290e-02, -6.5362e-01, -8.3025e-01,
        -2.1133e-02,  5.3531e-02,  3.7161e-01,  1.4090e-01, -4.2192e-02,
        -1.6964e-03, -5.5421e-02, -9.7975e-02, -8.4773e-03,  3.1547e-02,
         3.7751e-03, -1.0404e-01,  1.6889e-01, -1.7866e-01, -1.6916e-02,
         8.3287e-02,  1.2088e-02, -9.3534e-02, -1.3163e-01, -1.7203e+00,
         3.9221e-01,  5.2036e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1368,  1.3104, -0.2285,  0.2715, -0.2950, -0.5623,  0.1664, -0.1747,
        -0.1767, -1.2751, -0.0502, -0.2295,  0.3651, -0.0588, -0.6497,  0.3925,
         0.6686, -1.6449, -0.0032, -2.4442, -0.0051,  0.4006, -0.6576,  1.0757,
         0.0105,  0.1723, -0.3939, -0.0119,  0.0334, -0.6911, -1.2019,  0.1084,
        -0.6093,  1.2397,  0.7603, -0.4305,  0.0637, -0.2639,  0.2325, -0.0475,
         0.0674, -0.5253, -0.3418,  0.2565,  0.3707, -0.7295], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1609,  0.5391,  0.0451,  0.0107,  0.0178,  0.1813,  0.0062,  0.0828,
        -0.0111,  0.3162,  0.0207, -0.0520,  0.1060, -0.0376,  0.3341, -0.1010,
        -0.0367,  0.0986,  0.2018, -0.4503,  0.0034,  0.0281,  0.0166,  0.0195,
         0.0322, -0.0422, -0.0621, -0.2469,  1.1119,  0.3236,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-12.8892,  -2.1320, -12.3208,  -1.9038,  -6.2558,   0.0178,  -0.5084,
         -6.2257, -15.4900,  -9.3818,   0.4740,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1081,  0.0818, -0.0507, -0.0977,  0.1130,  0.0389,  0.0478, -0.0081,
        -0.0046,  0.0279, -0.0167, -0.0306, -0.1617, -0.0808,  0.1021,  0.1079,
         0.0241, -0.0188,  0.1139,  0.2230,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1237, -0.0166, -0.0626,  0.0446, -0.1585, -0.1445, -0.0417,  0.2269,
        -0.0238, -0.1107, -0.0862,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2451, -0.0624, -0.1442, -0.0245, -0.0612, -0.1137, -0.0595,  0.0768,
        -0.0933, -0.1295, -0.0080, -0.0845, -0.0271, -0.0420,  0.0226, -0.0466,
        -0.1047,  0.0101, -0.0750, -0.1438,  0.1626,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0565, -0.1015, -0.0378, -0.0859, -0.0120,  0.0454, -0.0098,  0.0455,
         0.1375, -0.0604, -0.0023,  0.0833,  0.0479, -0.0113,  0.0268, -0.0426,
         0.0089, -0.0943,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6068e-03, -1.3170e-02, -2.0761e-02,  1.0725e-04,  2.7527e-02,
         3.6096e-02,  1.8807e-02, -4.8141e-02, -7.6412e-03, -1.8492e-02,
         2.5506e-02,  6.8599e-02, -2.9880e-03, -1.2904e-02, -2.5254e-02,
         9.3979e-02, -1.5787e-02, -1.7100e-02,  2.6367e-03, -2.9036e-03,
         4.2056e-03, -7.6022e-03, -5.8574e-03,  2.5960e-02,  1.1763e-02,
        -1.1160e-03, -3.6958e-02, -1.4034e-02,  3.0084e-03, -1.4004e-02,
         7.4042e-02, -4.5935e-03, -4.9743e-02, -1.9327e-02,  2.3978e-02,
        -4.2346e-03, -7.5402e-03,  1.8235e-01, -6.5535e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9645e-02, -1.2867e-01,  1.9030e-02, -1.1467e-02,  1.1872e-02,
         4.7476e-02, -2.6389e-02, -3.2652e-02,  3.0869e-02, -1.1560e-02,
        -1.4613e-02, -4.3954e-02,  2.1029e-02,  2.9416e-02,  2.6765e-02,
         1.7734e-02, -1.1408e-02, -1.7829e-04, -4.0930e-02,  1.9208e-02,
        -8.0537e-02, -6.2701e-02, -3.3009e-02, -5.3069e-03, -4.8543e-02,
        -3.5516e-02,  3.8785e-03, -5.0339e-02,  9.7161e-03,  7.5732e-02,
        -7.3038e-02, -1.2920e-02, -2.6013e-01, -1.8727e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 3.1635,  0.1286, -0.2165, -0.1345, -0.1168, -0.8620, -0.3343, -0.2791,
         0.3793,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0398, -0.0062,  0.0690, -0.0137, -0.0716,  0.0105, -0.0083, -0.0229,
         0.0492, -0.0661, -0.0086,  0.0200, -0.0202,  0.0020,  0.1081, -0.0138,
         0.0023, -0.0212, -0.0453, -0.0234, -0.0360,  0.0023,  0.0135, -0.0137,
        -0.0272, -0.0882, -0.0525,  0.0665, -0.0080,  0.0168,  0.0458, -0.0437,
         0.0091, -0.0864,  0.1368, -0.0335, -0.0417, -0.0228,  0.1706,  0.0536,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2926,  0.0851,  0.0305,  0.0540,  0.0990, -0.0765, -0.0230,  0.3085,
         0.0580, -0.1656, -0.0369,  0.0207,  0.0011,  0.0681,  0.1713,  0.1144,
         0.0677,  0.3655, -0.0089, -0.0137, -0.0032, -0.2425, -0.2607,  0.2309,
        -0.2426,  0.0486, -0.0486,  0.0761, -0.0670, -0.1536,  0.0039, -0.0444,
         0.0327,  0.0327,  0.0205, -0.2402, -0.0177, -0.0728,  0.0326,  0.0088,
        -0.1180, -0.0699,  0.0340,  0.0649,  0.0279, -0.0482, -0.0229,  0.0064,
        -0.0478,  0.2166, -0.1734, -0.3023,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0989,  0.1235,  0.3944,  0.1393,  0.1299,  0.0227,  0.0558, -0.2269,
        -0.0471, -0.0806,  0.1537, -0.0107,  0.0142,  0.1198, -0.0127,  0.0868,
         0.0861, -0.0607, -0.0288,  0.0973, -0.1286, -0.1769,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4967, -0.1950, -0.0534, -0.0636, -0.2075,  0.0494,  0.0500,  0.0101,
         0.0883, -0.0373,  0.0560, -0.0124,  0.0509,  0.0217,  0.1719,  0.0252,
        -0.1066, -0.0684,  0.1593, -0.0768, -0.0757,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1082, -0.0801,  0.0164,  0.0939,  0.1573,  0.0870,  0.1289,  0.0456,
         0.0594,  0.0678, -0.0096,  0.0097, -0.0786, -0.0546, -0.0009,  0.0352,
        -0.0307,  0.0155, -0.0182, -0.0204, -0.0289, -0.0963,  0.0307, -0.0583,
         0.0124,  0.0023,  0.2108,  0.0515,  0.0658,  0.0989, -0.0162, -0.0128,
         0.0162, -0.0083, -0.0248, -0.0239,  0.0104, -0.0380, -0.0919,  0.0374,
        -0.0268,  0.0741,  0.1406, -0.0964,  0.0064,  0.0196,  0.0685,  0.0341,
         0.0381, -0.0019,  0.0388,  0.0127, -0.1166,  0.0948,  0.1380],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1958e-01, -1.5975e-01, -6.6168e-02,  1.3681e-01, -7.0751e-03,
         1.7725e-04,  1.3094e-01,  1.6579e-01, -6.3119e-02, -1.8205e-03,
         2.3540e-01, -1.3847e-02,  1.1662e-01,  4.8806e-03,  1.2744e-01,
        -4.4759e-02, -9.5235e-02,  2.0276e-02,  4.7525e-02,  2.6723e-02,
        -6.2342e-02, -9.1040e-02, -1.3550e-01,  1.7530e-02, -8.3647e-02,
        -1.5256e-01, -3.2643e-01, -8.9621e-02, -2.1383e-01,  4.4194e-02,
        -3.1833e-01, -7.3021e-03,  3.2788e-02,  8.5927e-03, -1.5029e-01,
        -3.5986e-02,  9.0941e-03,  7.1664e-02, -8.1934e-02,  7.0155e-02,
        -1.8475e-01,  5.8093e-02,  5.8116e-02,  3.0397e-02,  3.9282e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1595,  0.0041,  0.0682,  0.0162, -0.0369,  0.0082, -0.0175, -0.0181,
        -0.0599, -0.1198, -0.1112, -0.0284,  0.0282, -0.0324, -0.0013, -0.0110,
        -0.0427, -0.0010, -0.0809,  0.0199, -0.0142, -0.0249,  0.0368,  0.1038,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1913, -0.4278, -0.1254,  0.2267,  0.2215, -0.0427,  0.1751, -0.5881,
        -0.0325,  0.0082, -0.3552,  0.1799,  0.0071,  0.3719,  0.2113,  0.0748,
         0.0487,  0.2442,  0.1105,  0.0966, -0.0212,  0.1172,  0.1100,  0.0379,
         0.1962, -0.0120, -0.0157, -0.0014, -0.0100,  0.0081,  0.0682,  0.5112,
         0.2696,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0480, -0.0553,  0.3294, -0.2610,  0.0387,  0.1604,  0.0493, -0.3653,
        -0.1442,  0.0914,  0.0343,  0.1971,  0.2224, -0.1071, -0.3109, -0.1771,
         0.0005, -0.4220,  0.0850,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1597, -0.1935, -0.2223, -0.2004, -0.0458,  0.0149,  0.0830, -0.0108,
        -0.1506, -0.1504, -0.0822, -0.0134,  0.0039, -0.0126, -0.0640,  0.0181,
         0.0513, -0.2055,  0.0007, -0.0447,  0.0699, -0.0156, -0.1007, -0.0150,
        -0.2789,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4247,  0.0566, -0.0219, -0.0204,  0.0006,  0.0037, -0.0610,  0.0664,
        -0.0228, -0.0153, -0.0686,  0.1412, -0.0746, -0.0649,  0.0689, -0.0320,
        -0.1714,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0862, -0.0102,  0.1165, -0.0103, -0.0125, -0.0637,  0.0345, -0.1704,
         0.0443,  0.0123, -0.0389,  0.0581,  0.0352,  0.0426,  0.0349, -0.0540,
        -0.0139, -0.0573, -0.0346,  0.0155,  0.0779, -0.0124, -0.0822, -0.0037,
        -0.0863,  0.0211,  0.0956,  0.0656,  0.1509, -0.0372,  0.1782, -0.1008,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5378, -0.1138,  0.0112, -0.0609, -0.0082,  0.0445, -0.0041,  0.0508,
         0.0257,  0.0159, -0.0456, -0.0286, -0.0677,  0.0029, -0.0011, -0.0111,
        -0.0470, -0.0268,  0.0293,  0.0227, -0.0234, -0.0457,  0.0669,  0.0280,
         0.1108, -0.0122,  0.1460,  0.2473,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0908,  0.0137,  0.0962, -0.2067, -0.0219, -0.0615, -0.0085, -0.0893,
        -0.0410, -0.1059, -0.0481,  0.0104, -0.0167,  0.0587,  0.0044,  0.0141,
        -0.0372,  0.0159,  0.0187,  0.1152,  0.0071, -0.0070, -0.0147, -0.0387,
        -0.0445,  0.1396,  0.0200, -0.1190, -0.1996,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1774,  0.1119,  0.0240, -0.1056, -0.1641, -0.0666,  0.2830, -0.0050,
         0.0723, -0.1990,  0.0620,  0.0310,  0.0249,  0.0011,  0.1215, -0.0186,
        -0.0281, -0.0443,  0.0152,  0.0529, -0.0419,  0.0317, -0.0661,  0.0298,
         0.0173, -0.0232, -0.0091, -0.0152,  0.0851,  0.0646,  0.0487, -0.2564,
        -0.1686,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0165, -0.0153,  0.0121,  0.0871,  0.0042,  0.0569,  0.0904,  0.0036,
         0.0206,  0.0158, -0.0312,  0.0789, -0.0313,  0.0138,  0.0120,  0.0276,
         0.0044, -0.0038,  0.1024,  0.0254, -0.0079, -0.0627, -0.0525, -0.0105,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0334,  0.0295,  0.0338,  0.0156, -0.0191,  0.0794,  0.1512,  0.0345,
        -0.0252,  0.0107, -0.0049, -0.0121, -0.0193,  0.0484, -0.0743, -0.0036,
         0.0797,  0.0007,  0.0187, -0.0003,  0.0451, -0.1063, -0.0066,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1358,  0.0956,  0.0599, -0.0647,  0.1643,  0.0597,  0.0321, -0.0330,
         0.0457,  0.0871,  0.0563, -0.0300, -0.0080, -0.1049,  0.0107, -0.0243,
         0.0096, -0.0820,  0.0161,  0.0288,  0.0317,  0.0097, -0.0190,  0.0448,
        -0.0223,  0.0375, -0.0243, -0.0059, -0.0010, -0.0253,  0.0343,  0.0495,
         0.0171, -0.1856,  0.0768,  0.0276, -0.1101,  0.0558,  0.0368,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1249, -0.0498, -0.1084, -0.0276, -0.0145,  0.0531, -0.0020, -0.0161,
        -0.0538,  0.0312, -0.0204, -0.0736,  0.0236, -0.0567, -0.0349, -0.0355,
         0.0113, -0.0578,  0.0135, -0.0787, -0.0289, -0.0654, -0.0741, -0.0199,
         0.0427,  0.0276,  0.0146,  0.0189,  0.0085, -0.0019, -0.0550, -0.0600,
        -0.0147, -0.0718, -0.0686, -0.1108,  0.0506, -0.0313, -0.0700, -0.0596,
         0.0404,  0.0986, -0.0047,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1093, -0.0526,  0.0176, -0.0131, -0.0206, -0.0073, -0.0305, -0.0273,
        -0.0192,  0.0009, -0.0219, -0.0101, -0.0298,  0.0105, -0.0359, -0.0416,
        -0.0012, -0.0011, -0.0165, -0.0193,  0.0014, -0.0089,  0.0117, -0.0044,
         0.0027, -0.0158,  0.0283,  0.0103, -0.0062, -0.0244, -0.0073, -0.0035,
        -0.0248, -0.0429,  0.0416,  0.0027, -0.0343,  0.0258, -0.0258,  0.0097,
        -0.0415, -0.0051,  0.0150,  0.0435,  0.0365, -0.0241,  0.0078, -0.0087,
        -0.0030,  0.0174,  0.0592,  0.0202, -0.0856, -0.0273,  0.0231,  0.0480,
        -0.0720], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0632, -0.1547,  0.1605, -0.0356, -0.0705, -0.0872,  0.0075, -0.0006,
         0.0311,  0.1740, -0.0216,  0.0073, -0.0658,  0.0116,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2801, -0.1989,  0.0954,  0.0832, -0.0200,  0.0810, -0.0614, -0.0184,
        -0.0420,  0.0583, -0.0324, -0.1384, -0.0485, -0.3050,  0.0148, -0.0285,
        -0.0039, -0.0364,  0.0586, -0.0290, -0.0855, -0.2815, -0.0992, -0.0399,
         0.0009, -0.0650,  0.0221,  0.1914,  0.1479,  0.0852,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1567,  0.2887, -0.4290,  0.3177, -0.0670, -0.2680, -0.6397, -0.4305,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.2531, -0.0756,  0.0677,  0.1169,  0.0128, -0.0065,  0.0738,  0.0067,
         0.0584,  0.0129, -0.1699, -0.0112, -0.0830, -0.0557, -0.0315,  0.0084,
        -0.0281, -0.0172, -0.0247, -0.0566, -0.0849, -0.0991,  0.0264,  0.1266,
         0.0190,  0.3608, -0.0153, -0.0519, -0.0305,  0.0282, -0.0283,  0.1625,
        -0.1417,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1102, -0.0044,  0.0072, -0.2312, -0.1273,  0.0898, -0.0533, -0.1456,
         0.0359,  0.0432,  0.0320,  0.0134, -0.0008,  0.0024, -0.0272,  0.0501,
         0.0187, -0.0866,  0.0256,  0.0353, -0.0824, -0.0081,  0.0311, -0.0293,
         0.0100, -0.0550,  0.0692, -0.0289,  0.0042,  0.0165,  0.0166,  0.0090,
         0.0402, -0.0127,  0.0506,  0.0750,  0.0109, -0.0458, -0.0015,  0.0209,
        -0.0352, -0.0770,  0.0047,  0.0670, -0.1064, -0.2682, -0.0055,  0.0275,
        -0.1433,  0.0448,  0.1515, -0.0142,  0.0879,  0.0388,  0.1969, -0.0215,
         0.5462, -0.0624,  0.2476,  0.1291, -0.0202,  0.0790, -0.0797, -0.2725,
        -0.0556, -0.1029, -0.0336, -0.0307, -0.1050,  0.3584,  0.1656],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0078,  0.0452,  0.0138,  0.0373, -0.0196,  0.0079,  0.0448,  0.0081,
         0.0283,  0.0339,  0.0593, -0.0017, -0.0262,  0.0211, -0.0088,  0.0222,
        -0.0122,  0.0183, -0.0188, -0.0064, -0.0047,  0.0153,  0.1243, -0.0166,
         0.0560,  0.0455, -0.0261,  0.0552, -0.0026,  0.0716,  0.0056,  0.0315,
         0.0453, -0.0340, -0.0599,  0.0449,  0.0101,  0.0149, -0.0370,  0.0462,
        -0.1033,  0.0360,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0205, -0.0426, -0.0352, -0.0698, -0.1051,  0.0385, -0.0416,  0.0539,
        -0.0930, -0.0730, -0.0355, -0.0137, -0.0137, -0.0168,  0.0318,  0.0271,
         0.0103, -0.0336, -0.0207, -0.1284, -0.0462, -0.0118, -0.0009,  0.0121,
         0.0216,  0.0077,  0.0214,  0.0188,  0.0108, -0.1618,  0.0513, -0.0041,
         0.1271,  0.0743, -0.0245, -0.1550,  0.0382,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7285,  0.4751,  0.3539,  0.0891,  0.3049, -0.8980,  0.2825, -0.1193,
         0.1630, -0.1135,  0.2318,  0.2015, -0.2310, -0.4476,  0.0599,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0352, -0.0382,  0.0492, -0.0397,  0.0620, -0.0413, -0.0048,  0.0012,
        -0.0076,  0.0201,  0.0379,  0.0574, -0.0454, -0.0876, -0.0452,  0.0196,
        -0.1795, -0.1387, -0.0291,  0.1548,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1524,  0.2025,  0.0890, -0.0583, -0.1056, -0.2600, -0.0211,  0.0491,
        -0.2139, -0.1013, -0.0810, -0.0722, -0.0016, -0.0673,  0.0729,  0.1903,
         0.3356, -0.0841, -0.1001,  0.2639,  0.1896,  0.0672, -0.0140, -0.2986,
         0.5094, -0.7594,  0.0178,  0.0857,  0.0217,  0.0857, -0.0245, -0.0597,
         0.0951, -0.0502,  0.0169, -0.1973, -0.0328, -0.0651, -0.0382,  0.0889,
        -0.1417,  0.0100, -0.6911,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0041,  0.0643,  0.0884, -0.0631, -0.1514,  0.0968,  0.1213, -0.0584,
        -0.0124,  0.0323, -0.2740, -0.1140, -0.0141,  0.0438, -0.1621, -0.1769,
         0.0568, -0.1347, -0.0183,  0.1698,  0.2636,  0.0431,  0.0723,  0.1783,
         0.2978,  0.2616,  0.0943, -0.0811, -0.1157, -0.0135, -0.0786,  0.0880,
        -0.1061, -0.1178, -0.0241, -0.3201,  0.1222,  0.1685,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0600, -0.0446, -0.1672,  0.0082, -0.0993,  0.0431,  0.0224, -0.2116,
        -0.0451, -0.0688,  0.0174, -0.0138,  0.0416, -0.0244,  0.0121,  0.0226,
        -0.0967, -0.0130, -0.0021,  0.0379, -0.2718,  0.1909,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1606, -0.0275,  0.0542, -0.1731,  0.0023, -0.0881,  0.1872,  0.1051,
         0.1044,  0.0523, -0.0203, -0.0348,  0.0399,  0.0167, -0.0054, -0.0092,
         0.0048, -0.0437,  0.0089, -0.0068, -0.0185, -0.0925,  0.0721, -0.0646,
        -0.0455,  0.0405, -0.1294, -0.1115, -0.0427,  0.1150, -0.1779, -0.0533,
         0.0647, -0.0564, -0.0348, -0.0032,  0.0397,  0.0091,  0.0620,  0.2819,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0383, -0.1114, -0.0756,  0.0490, -0.0852,  0.0669,  0.0348,  0.0177,
        -0.1027, -0.1528,  0.0640, -0.0463, -0.0154,  0.0010, -0.0617, -0.0274,
        -0.0501, -0.0407, -0.0661,  0.0140, -0.0269, -0.0135,  0.0832, -0.1257,
        -0.0295,  0.0511, -0.0027, -0.0013, -0.0335,  0.0183,  0.0163, -0.0403,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2696, -0.0422,  0.0082,  0.1117,  0.0275,  0.0575, -0.0322, -0.0351,
         0.0022,  0.0263,  0.0412,  0.0030, -0.2755,  0.0440, -0.0344, -0.1155,
         0.0680, -0.0649, -0.0033, -0.0592, -0.0158, -0.0774,  0.1572, -0.0153,
         0.0083,  0.0999, -0.0305, -0.0014,  0.0450, -0.0504,  0.0603, -0.0106,
         0.0899,  0.1608, -0.2749,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.2633,  0.1388, -0.0003, -0.0744,  0.1838,  0.1601, -0.2610, -0.0737,
         0.0477, -0.0276,  0.0705, -0.0267, -0.0961, -0.0317,  0.0369, -0.0179,
        -0.0086, -0.0176, -0.0188, -0.0795,  0.1372,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4713, -0.4510,  0.0959,  0.3479,  0.0936, -0.1413, -0.1155,  0.0205,
        -0.0829,  0.0010, -0.0979,  0.3024, -0.1119,  0.1967,  0.0684, -0.0744,
        -0.1524, -0.2806,  0.2804,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1433, -0.2661,  0.0529,  0.1498, -0.0311,  0.0699,  0.1735,  0.2464,
         0.0610, -0.0188, -0.1047, -0.0691, -0.0104, -0.0500,  0.0244, -0.0512,
         0.0330, -0.0251, -0.1416,  0.1544,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4367, -0.5112, -0.1672,  0.0325,  0.0063, -0.0865,  0.2439, -0.0057,
         0.0313, -0.0555, -0.0234, -0.0182, -0.0172, -0.1147, -0.0801, -0.0404,
        -0.0706, -0.0289,  0.0201, -0.0191, -0.0397, -0.1421,  0.0652,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1240, -0.0138, -0.0614, -0.0311, -0.0611,  0.0270, -0.0228,  0.0009,
        -0.0555, -0.0449,  0.0437,  0.0413,  0.0017, -0.0771,  0.1443,  0.0718,
        -0.0533, -0.0333, -0.0274,  0.0805,  0.0087, -0.0869, -0.0100, -0.1918,
         0.0172,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0328,  0.1539,  0.0658,  0.0266, -0.0424,  0.0741,  0.0899,  0.0693,
        -0.1963,  0.3642,  0.1130, -0.0518, -0.0210,  0.2966,  0.0224,  0.1217,
        -0.0460,  0.1105, -0.0734, -0.0068,  0.1228,  0.0072,  0.0245,  0.1379,
         0.0152,  0.0661,  0.0707, -0.0534,  0.1042, -0.0439,  0.0551,  0.0143,
        -0.0251, -0.1568,  0.0573,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0527, -0.0402,  0.0182, -0.0570, -0.0102,  0.0526,  0.0185, -0.0386,
        -0.0169,  0.0370,  0.0332, -0.0336, -0.0625, -0.0591, -0.0678, -0.0244,
        -0.0488, -0.0198, -0.0747, -0.1359,  0.1066,  0.0163,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4633,  0.1034,  0.0684,  0.0593, -0.0112, -0.0348,  0.0438, -0.0220,
        -0.1299, -0.0470,  0.0466,  0.0380,  0.0505, -0.0374, -0.0450, -0.0230,
         0.1125,  0.0159, -0.1085, -0.1722,  0.1165,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3708, -0.0402, -0.0342, -0.0234,  0.1009, -0.1046, -0.0619, -0.0596,
         0.1784,  0.0156, -0.0120, -0.0049, -0.1085, -0.0037, -0.3256, -0.1565,
         0.1981, -0.2957, -0.3106,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6951,  2.3239, -2.3149, -0.3050,  0.0997, -0.3050, -0.5100, -0.2752,
         0.2462,  0.1285, -0.4034, -0.5954, -0.5485, -0.2801,  0.1492, -1.6163,
        -1.9461, -0.3456,  0.5744, -1.1661, -1.1297, -1.9852, -1.4445, -0.1332,
        -0.9053,  0.0266, -0.0993, -0.2685,  0.3502, -0.1188, -0.2956, -0.5310,
        -0.0358,  0.0722,  0.4244, -0.3440,  1.1081, -0.6665, -0.2877, -0.0634,
         0.0564,  0.5336, -0.2586,  0.0635,  0.2323,  0.3639, -0.3582],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-15.3975,  -2.3763,  -1.4240,  -0.3267,  -0.4689,  -1.9433,  -0.6109,
          3.1449,  -1.2317,  -0.2395,  -0.8407,  -1.9102,   1.6223,  -1.7034,
          0.3929,  -0.3929,  -0.3646,  -1.1144,  -0.7730,   1.7806,   5.5540,
         -2.6653,  -3.4204,   2.5820,  -0.4446,  -1.6323,   2.1445,   0.4284,
          0.6126,  -0.6011,  -0.2126,  -2.8871,  -9.4904,  -6.7959,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0633, -0.0720,  0.0380,  0.0033,  0.0323, -0.0064,  0.0047, -0.0022,
        -0.0348,  0.0217,  0.1163, -0.0604,  0.0016,  0.0168,  0.1230,  0.1676,
         0.1072,  0.0938, -0.0814,  0.0347, -0.0433, -0.0860,  0.0070, -0.0596,
        -0.0756, -0.0121, -0.0573, -0.0428,  0.0497, -0.0466,  0.0027, -0.0096,
         0.2415, -0.0367,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.6687,  0.0771,  0.1460,  0.2920,  0.0051,  0.0119, -0.0211, -0.2556,
        -0.4691, -0.0326, -0.4141, -0.1666, -0.0903, -0.0823, -0.1046, -0.0031,
         0.3361,  0.3428,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2240,  0.2158,  0.0890, -0.0292,  0.0513, -0.0559,  0.0692, -0.0343,
         0.0364, -0.0442,  0.0272, -0.0655,  0.0919,  0.0312,  0.0102, -0.0161,
         0.0391, -0.1997, -0.0180, -0.1257, -0.0767,  0.2394, -0.0992,  0.0871,
        -0.0652, -0.1038,  0.1032,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0780, -0.4563, -0.1751,  0.0102, -0.0239,  0.0119, -0.1672, -0.1932,
        -0.1106, -0.1202, -0.0740,  0.1156, -0.0876, -0.0034, -0.0065,  0.1045,
        -0.0268, -0.0042,  0.0094, -0.0472, -0.0440, -0.0538, -0.0252, -0.0946,
        -0.2378, -0.2629,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3059,  0.0130, -0.0069, -0.0224, -0.0965, -0.0634,  0.0149, -0.0020,
         0.0011, -0.0960, -0.0314,  0.0215, -0.0227, -0.0424, -0.0685,  0.0048,
         0.0672, -0.0340, -0.0744,  0.0301, -0.0693,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5229,  0.3435, -0.7864,  0.3623,  0.0595, -0.1630,  0.6479, -0.0258,
        -0.3675,  0.4787,  0.0488,  0.1797,  0.3522, -0.9682, -2.3637,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1186,  0.1817,  0.0311,  0.1363, -0.1768, -0.0170,  0.0322, -0.0515,
         0.0195, -0.0887,  0.0641,  0.1707, -0.0508,  0.0141, -0.0108, -0.0437,
        -0.0770,  0.0880,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3981, -0.1783,  0.2015, -0.3234, -0.2838, -1.4518,  0.3992, -0.3257,
         0.4838, -0.3368,  0.2326, -0.2239, -0.4649,  0.1684,  0.2113, -0.0091,
         0.4509, -0.2477,  0.2188,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1388, -0.0347,  0.0383,  0.0171, -0.0190,  0.0178, -0.0066, -0.0967,
         0.0163,  0.0571, -0.0037,  0.1025,  0.0628, -0.0724,  0.0044,  0.0521,
         0.0679,  0.0769, -0.0041, -0.4677, -0.2149,  0.3469,  0.0113, -0.0359,
         0.0383,  0.0426, -0.0442, -0.0960, -0.0187,  0.0136,  0.0322,  0.1222,
        -0.0157, -0.0719,  0.1061,  0.0890, -0.0113, -0.0122,  0.0222,  0.0781,
         0.0061,  0.0183, -0.1779,  0.1881], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1176, -0.0703, -0.0689, -0.0317,  0.0143,  0.0724,  0.0231, -0.0204,
        -0.0207,  0.0164, -0.0408, -0.1031, -0.0236, -0.0251,  0.0165, -0.0660,
         0.0779, -0.0608,  0.0795,  0.0398, -0.0502,  0.0055, -0.0731,  0.0180,
         0.0358, -0.0667,  0.0384, -0.0609,  0.0009, -0.0128, -0.0533,  0.0365,
         0.0149,  0.0458, -0.2042,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5996e-01, -1.7201e-01,  1.3865e-01,  5.8206e-02,  8.5818e-02,
         2.2769e-01,  2.8366e-02,  7.5255e-02,  1.6756e-01,  9.9896e-02,
         9.5398e-02,  6.0355e-02, -1.2122e-04,  1.4611e-01,  3.8100e-02,
         3.2051e-01,  1.3717e-01,  4.6024e-02,  2.2075e-01, -7.1633e-02,
         6.3805e-02,  4.9803e-03, -1.7688e-01, -2.0988e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9398e-01,  2.3302e-02, -1.5108e-02, -9.5398e-02,  8.9386e-02,
        -5.7579e-02, -3.9167e-02, -1.0227e-01, -5.1132e-02,  6.5482e-02,
        -1.5986e-01,  6.5400e-02, -1.5424e-02,  3.5133e-02, -3.0664e-02,
        -2.8847e-02, -4.8977e-02,  3.1759e-02, -6.2415e-02, -9.9013e-02,
        -6.4362e-02, -2.6735e-02, -5.9716e-02, -9.7063e-02, -6.3325e-03,
        -4.2040e-02, -2.5494e-02, -3.1265e-02, -3.2006e-02, -5.4730e-02,
         1.3701e-04, -5.8754e-03, -2.4186e-01,  7.9450e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3073e-01, -4.8050e-01, -2.9081e-01, -1.1309e-01,  9.8917e-02,
         6.5573e-02,  6.2630e-02, -1.3324e-01, -3.5453e-02,  6.8188e-02,
         1.1199e-01,  4.0852e-02, -5.0727e-02, -5.8358e-02, -3.5522e-02,
        -2.1560e-01,  4.2876e-02, -4.6095e-02,  2.0958e-02,  1.8965e-02,
         1.6842e-02,  5.2865e-02, -1.5204e-01,  2.3714e-02, -4.3686e-02,
        -4.5267e-04,  1.0502e-02,  1.1665e-01,  8.3267e-02, -3.0928e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 6.2686e-01, -9.7723e-02, -1.0813e-01, -2.3641e-01,  7.6097e-02,
        -7.0868e-05,  3.2886e-02, -6.0796e-02, -1.7975e-02, -2.7356e-02,
        -1.1926e-02, -3.1438e-01,  4.8088e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6171,  0.1179,  0.2382, -0.0183, -0.0415,  0.0478, -0.1650, -0.1079,
        -0.2759, -0.0881, -0.0723,  0.0660,  0.0151,  0.0511,  0.0029, -0.0279,
        -0.5965,  0.2148,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0020, -0.0872, -0.0100, -0.1900, -0.1152,  0.1319, -0.0517, -0.0917,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7981e-02, -3.1520e-02, -3.3756e-02, -4.1031e-02,  9.8213e-03,
         3.5556e-02, -1.6344e-02, -2.9247e-02,  5.5835e-02,  1.9158e-02,
        -5.2082e-04,  3.9555e-02, -2.5334e-02,  1.2937e-02, -2.4060e-03,
        -2.5825e-02, -2.1360e-02,  3.2785e-02, -3.6028e-02, -2.3232e-02,
        -1.2366e-01,  4.2686e-02,  7.3626e-02,  4.4528e-02,  4.8502e-02,
         5.1768e-03, -8.0989e-03,  5.6053e-02, -8.8000e-02, -4.0657e-02,
        -3.0210e-02, -1.6947e-04, -1.9103e-02, -4.7646e-02, -5.5164e-02,
        -1.9266e-02, -4.3825e-02,  1.0785e-02,  8.1267e-02, -6.1835e-02,
        -6.1137e-05, -3.4650e-02, -5.5744e-02,  4.6801e-02, -7.9337e-02,
         2.9517e-02,  1.4712e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8216,  0.1458,  0.5298, -0.0975, -0.2872,  0.0722,  0.0109, -0.0522,
        -0.0451, -0.0102, -0.0865,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4912, -0.2537,  0.2410,  0.0627, -0.1357, -0.0866, -0.0556, -0.3214,
         0.0990,  0.1181,  0.1447,  0.2842, -0.0916,  0.0349, -0.4156,  0.1679,
        -0.2192,  0.3820,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1089, -0.2925, -0.2584,  0.0086, -0.1537, -0.0051,  0.0892,  0.0372,
        -0.0812, -0.0548, -0.0303,  0.2942, -0.0011, -0.1531, -0.5710,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3596,  0.8312,  0.4244, -0.4412, -2.0664, -1.3759,  0.9926, -0.7933,
        -0.9573, -0.7338,  0.0973, -0.9855,  1.2840,  2.0025, -0.6246,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2551, -0.0877, -0.1208, -0.0333,  0.0850,  0.0384,  0.0166,  0.0260,
        -0.0989, -0.0012, -0.0019, -0.0105, -0.0353,  0.0046, -0.0415, -0.0617,
        -0.1133, -0.0533, -0.0619,  0.0767,  0.0092, -0.0627, -0.0158,  0.0582,
        -0.0191,  0.0333,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4503, -0.9999, -0.8339,  0.2663,  0.1478,  0.2734,  0.2862, -0.1686,
        -0.1097, -0.1716, -0.1627, -0.0290, -0.0170,  0.1802,  0.0889,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4759, -0.0240, -0.0172, -0.0295,  0.1103,  0.0939,  0.0535,  0.0349,
         0.0758,  0.1669, -0.0916, -0.0037,  0.0657,  0.0752,  0.3008, -0.0298,
         0.0048,  0.0258,  0.0149,  0.0127, -0.0526,  0.0654, -0.0896, -0.0288,
        -0.0797,  0.0170,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0278, -0.0064, -0.0190, -0.0458, -0.0392, -0.0202,  0.0004,  0.0112,
         0.0105, -0.0134,  0.0141,  0.0074, -0.0031, -0.0311,  0.0437, -0.0618,
        -0.1274, -0.0045,  0.0481,  0.0281, -0.0200, -0.0317,  0.0129, -0.0002,
         0.1949,  0.0536,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.0246, -0.1122, -0.1133, -0.0454, -0.1193, -0.0245, -0.0210, -0.0540,
         0.0018, -0.0465, -0.0018, -0.0230, -0.0962,  0.1073, -0.0336, -0.0942,
        -0.0295, -0.0348, -0.0025, -0.1086,  0.0314, -0.0086,  0.0483, -0.0289,
        -0.0347, -0.0507, -0.0009,  0.0608,  0.0551,  0.0414, -0.0138, -0.0299,
         0.0097,  0.0296,  0.0663, -0.1265,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4025,  0.0930, -0.0177, -0.3329, -0.0879,  0.3583,  0.0253,  0.0250,
        -0.0144,  0.1787,  0.0620,  0.0652,  0.3314,  0.2814, -0.0786, -0.0513,
        -0.1048,  0.0757, -0.0043, -0.6534,  0.2743,  0.6332,  0.3657,  0.8464,
        -0.0759, -0.0335, -0.0240, -0.1273,  0.0120,  0.0877,  0.0112, -0.0024,
         0.0723,  0.4110, -0.1584, -0.3460,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1450, -0.1004, -0.0704,  0.0849, -0.1743,  0.1099,  0.2227, -0.0321,
        -0.0505,  0.0557,  0.0467, -0.0594,  0.0430, -0.0604, -0.0655,  0.0066,
        -0.1864,  0.0596,  0.1418,  0.0752,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4682,  0.0282,  0.0038,  0.0369, -0.0984,  0.0056,  0.0493, -0.0257,
         0.0141,  0.0011,  0.0227, -0.0872,  0.0265,  0.0122, -0.0032, -0.0164,
        -0.0417, -0.0100,  0.0466,  0.0359, -0.1279,  0.0174,  0.0473, -0.0657,
        -0.0315,  0.0158,  0.0386,  0.1348, -0.0975,  0.0215,  0.0397, -0.0264,
         0.1099, -0.0366, -0.0480, -0.1585,  0.0590,  0.0393,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1313, -0.0215, -0.0123, -0.0045, -0.0097, -0.0276, -0.0140,  0.0185,
        -0.0072,  0.1007,  0.0180, -0.0450, -0.0280,  0.0097,  0.1053, -0.0017,
        -0.0362, -0.0010,  0.0492, -0.0152, -0.0395,  0.0019,  0.0760, -0.0305,
        -0.0038,  0.0124,  0.0023,  0.0074, -0.0409, -0.0052, -0.0029,  0.0254,
         0.0179,  0.0250, -0.0260,  0.0359, -0.0083,  0.0676, -0.0399, -0.0336,
        -0.0456], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0921, -0.4121, -0.0562, -0.0298, -0.0330, -0.1174, -0.1262, -0.0362,
        -0.0197,  0.0182,  0.0691,  0.0169,  0.0681,  0.0753, -0.0534, -0.1000,
         0.0010,  0.0175, -0.0217, -0.2248, -0.2067, -0.3115,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4188,  0.2516, -0.0492,  0.0978,  0.3105, -0.0928, -0.0264, -0.0707,
         0.0610, -0.2583, -0.6001,  0.0902,  0.0064,  0.0701,  0.0063,  0.0636,
        -0.0051, -0.1455, -0.0842,  0.0176,  0.0956, -0.0469,  0.0716,  0.0768,
        -0.0015, -0.1392,  0.0678,  0.0478,  0.0435, -0.0602,  0.0970, -0.0786,
        -0.3864,  0.3987,  0.6631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2546, -0.0417,  0.0931,  0.0132,  0.1126, -0.2463,  0.0391, -0.0846,
        -0.0154,  0.0352,  0.0229,  0.0594,  0.0881,  0.0090, -0.0019,  0.2538,
         0.0891,  0.0774,  0.0250, -0.0525,  0.0501,  0.0290,  0.0199,  0.0634,
        -0.0170,  0.1198, -0.0519, -0.0063,  0.1167,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0127, -0.2318, -0.3968, -1.1229,  0.3461,  0.6788, -0.9199, -1.5658,
         0.4166,  0.2007,  0.1975,  0.1969, -0.5392, -0.0432,  0.0871,  0.4710,
        -0.1280, -0.3654, -0.1720, -0.1650,  0.3716,  0.4605,  0.5680, -0.3585,
         0.1811,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0839,  0.1243, -0.0260, -0.0708, -0.0511,  0.0069, -0.1467, -0.0164,
         0.0294,  0.1371,  0.0264,  0.0717, -0.0061,  0.1812, -0.0265, -0.0295,
         0.0607, -0.0298,  0.0112,  0.1227, -0.0407,  0.1873, -0.0010,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9061,  0.1639,  0.1786, -0.0630,  0.1094,  0.7527, -0.2524, -0.0901,
         0.0875,  0.3076,  0.2610,  0.1760,  0.2462,  0.1246,  0.0245,  0.3222,
        -0.2431,  0.1825, -0.1503,  0.0355,  0.0110,  0.2279, -0.1171, -0.0059,
         0.0717,  0.0974,  0.1236, -0.1412, -0.3602, -1.1650, -0.0597, -0.0438,
        -0.6763, -0.8395, -0.9559, -0.0598,  1.3840,  0.0483, -1.3629,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1764, -0.3912, -0.0578, -0.1025, -0.1688, -0.0951,  0.1230, -0.0880,
        -0.1512, -0.0882,  0.0570,  0.1762,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 2.4793e-01, -9.5619e-02, -2.5227e-01,  1.7208e-02,  3.6892e-03,
        -1.3660e-01, -7.7659e-03,  4.2623e-02, -5.9992e-03,  1.4352e-03,
         1.2259e-02, -1.0112e-02, -5.0527e-02, -9.1347e-02, -4.0385e-02,
        -4.1592e-03,  3.7414e-02, -1.6511e-02, -6.8270e-03, -4.5996e-03,
        -3.0210e-02, -2.7814e-02, -2.4420e-02, -4.2852e-03, -3.5801e-02,
        -2.0025e-02, -2.5766e-02, -9.4606e-02,  1.6834e-02, -1.3518e-01,
        -5.0309e-02, -2.7226e-02, -3.1785e-02, -2.7712e-02, -1.1543e-02,
         1.3471e-02, -6.6557e-02,  6.3104e-03,  1.7963e-04,  7.7040e-03,
         1.8219e-02,  4.8823e-02, -2.7882e-02,  6.2136e-02, -5.0402e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1095, -0.0596, -0.2053, -0.1419, -0.2954,  0.1811, -0.1336, -0.2399,
        -0.0587,  0.2052, -0.0521,  0.1428,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1545, -0.5124,  0.1714, -0.2951, -0.0089, -0.1918, -0.2381,  0.0879,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1981, -0.0970, -0.0680,  0.0270, -0.0428, -0.0123, -0.0116, -0.0011,
        -0.0102, -0.1012, -0.0342,  0.0160, -0.0105, -0.0037, -0.0086,  0.0590,
         0.0117, -0.0215, -0.0033,  0.0126, -0.0132, -0.0031, -0.0015, -0.0674,
        -0.0225, -0.0245,  0.0014,  0.0018, -0.0014,  0.0556,  0.0859, -0.0009,
         0.0056,  0.0270,  0.0096, -0.0187,  0.0538,  0.0158, -0.0172,  0.0228,
         0.0375,  0.0065, -0.0067, -0.0245, -0.0687, -0.0449, -0.0926, -0.0503,
        -0.0649, -0.0701, -0.0106,  0.0149, -0.0012,  0.0202,  0.0278,  0.0226,
         0.0118,  0.0129,  0.0836, -0.0732, -0.0514], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1400,  0.0713,  0.0505, -0.0187, -0.0079, -0.0184, -0.0565,  0.0423,
         0.0514, -0.1968, -0.0748,  0.0654,  0.0271, -0.0306,  0.0175,  0.0024,
         0.0535, -0.0544, -0.0014,  0.0528, -0.0340,  0.0025, -0.1123,  0.0456,
        -0.0166, -0.0497,  0.0763,  0.0468, -0.0295, -0.0623, -0.1120,  0.0446,
        -0.0144,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0851,  0.0513, -0.0652,  0.0118, -0.0890, -0.0267, -0.0813, -0.0373,
         0.0081, -0.1386, -0.0435, -0.0186, -0.0167, -0.0061, -0.0457, -0.1381,
        -0.3960,  0.2133, -0.1314,  0.0134, -0.0492, -0.1397, -0.1074,  0.1998,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0734, -0.0940, -0.0698, -0.0821, -0.0633, -0.0085, -0.0050, -0.1227,
         0.0961,  0.0176, -0.0018, -0.0427, -0.0409,  0.0097, -0.0417,  0.0028,
        -0.0207,  0.0432, -0.0300, -0.1451, -0.0103, -0.0363, -0.0783, -0.1579,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9677e-02, -2.9347e-02, -6.5353e-01, -5.1762e-01,  1.6192e-02,
         4.9614e-01, -3.9930e-04, -9.5722e-02, -1.5029e-01, -2.4778e-01,
         6.9272e-02,  5.5273e-01, -5.5466e-01, -1.5843e-01, -2.0842e-01,
         1.8274e-01, -8.6037e-02, -9.4997e-01, -3.8691e-02,  6.6171e-01,
        -9.9426e-02,  5.1112e-01, -7.0044e-01, -3.3859e-01,  8.3531e-01,
         6.3435e-02, -2.8353e-01, -1.8400e-01,  1.2965e-01, -1.4401e-01,
         2.8998e-02,  4.6110e-02,  3.2238e-01, -7.4805e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1388, -0.3478, -0.0507, -0.0215, -0.1153,  0.0379,  0.0040,  0.0053,
        -0.0748, -0.1736, -0.0730,  0.0030, -0.1003, -0.1603,  0.0903,  0.2448,
        -0.0938,  0.0281, -0.0224, -0.0152,  0.0192, -0.0662, -0.1263, -0.0355,
         0.1781,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1661,  0.0668, -0.3247,  0.0067, -0.0228,  0.0840, -0.0051,  0.0517,
        -0.0316, -0.2664, -0.1904, -0.0223, -0.0179, -0.0221, -0.0107,  0.0165,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9924,  1.1105,  0.7896, -0.8981,  0.1297,  0.0408,  0.1285, -0.2119,
        -1.0238,  0.0460,  0.5587,  0.3580,  0.5131,  0.3479,  3.0838,  0.5599,
         0.0672, -0.3851,  0.1287,  0.7352,  0.1107, -0.9031,  2.2741,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1326,  0.0120,  0.0241, -0.0970, -0.0107, -0.0452,  0.0709, -0.0323,
         0.0158, -0.0055, -0.2005, -0.0316, -0.0584, -0.1478,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.2361, -0.0439,  0.0096, -0.0475,  0.0174,  0.0034, -0.0092, -0.0058,
        -0.0722, -0.0112, -0.0370,  0.0011, -0.0254,  0.0072, -0.0412, -0.0195,
         0.0319, -0.0616, -0.0007, -0.0515, -0.0300,  0.0613, -0.1766,  0.0526,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2388,  0.2515,  0.0437, -0.1301, -0.1735,  0.0422, -0.1159, -0.0518,
        -0.1527, -0.0236,  0.0124,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3266, -0.0982, -0.0816,  0.1211, -0.2082, -0.0424, -0.1795,  0.1069,
        -0.0479, -0.0213, -0.0404,  0.0484, -0.0542, -0.0773, -0.0042,  0.0422,
         0.0025, -0.1131, -0.0095,  0.0198,  0.1380,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2653, -0.1557, -0.0779, -0.0113,  0.0319,  0.1364, -0.0806, -0.0767,
         0.0217, -0.0253,  0.0602,  0.0821,  0.1621,  0.0603,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2383, -0.0670, -0.0721,  0.0196, -0.1039, -0.0332,  0.0237, -0.3074,
        -0.0498,  0.1650, -0.1579,  0.3073, -0.0236,  0.0688,  0.0314,  0.1382,
        -0.1214,  0.2163, -0.0217, -0.0124, -0.2120,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0082,  0.0113, -0.1134, -0.1386,  0.0829, -0.0189, -0.0198,  0.0139,
        -0.1174, -0.0716, -0.0462,  0.0099, -0.0006,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8139e-02, -5.0895e-02, -2.8448e-02, -3.9596e-03, -1.9766e-02,
         3.8699e-02, -2.8897e-02,  3.0362e-02,  9.6010e-02,  7.4241e-02,
        -3.4182e-02,  6.6503e-02, -2.5636e-02,  9.3869e-03, -3.7637e-02,
        -1.1885e-02,  5.9502e-03,  7.6508e-03, -7.3440e-03,  3.2099e-02,
        -5.8106e-02, -3.5465e-02, -4.1665e-02, -1.0269e-01, -1.6569e-01,
         1.2535e-02, -9.3330e-02, -3.6103e-02,  6.5790e-03, -6.3216e-02,
        -2.8695e-02,  1.3294e-01, -6.2553e-05,  7.5203e-03,  7.2463e-02,
        -5.5479e-02, -4.0040e-02,  4.7782e-03,  6.0194e-03, -1.4149e-02,
         2.5631e-02,  4.0660e-02,  1.9659e-02,  1.6951e-01,  3.1481e-03],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1059, -0.0124, -0.0632, -0.0168, -0.0135,  0.0893, -0.0620, -0.0191,
        -0.1209, -0.0277, -0.0658,  0.0017, -0.0874, -0.0836, -0.0777,  0.0012,
         0.0167, -0.0033, -0.0077, -0.0133,  0.0571, -0.0679, -0.0252, -0.0220,
         0.1329,  0.0419, -0.0101, -0.0575, -0.1094, -0.0519,  0.1652,  0.1312,
        -0.1057, -0.1729,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0517, -0.1871, -0.1204, -0.0249, -0.0629,  0.0119, -0.0559, -0.0549,
         0.2181, -0.0139, -0.0820, -0.0022,  0.1063, -0.1053,  0.0019, -0.0885,
         0.0068, -0.0204, -0.0168,  0.0293, -0.0995, -0.0138, -0.0079, -0.0706,
         0.0149, -0.0354,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5553, -0.2441,  0.2303, -0.0070, -0.1500,  0.2155,  0.2180,  0.0271,
         0.1656, -0.0382, -0.0451, -0.0525,  0.0095, -0.0688, -0.1495, -0.0400,
        -0.1491,  0.0852, -0.0644, -0.1292,  0.0167, -0.1573,  0.0074,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0435,  0.0958, -0.0029, -0.0590, -0.0006,  0.0163,  0.0067,  0.0164,
         0.0513,  0.0320,  0.0149,  0.0738, -0.0794, -0.0015,  0.1233, -0.0466,
         0.0038,  0.0665,  0.0371, -0.0349, -0.0229,  0.0641,  0.0519, -0.0091,
         0.0174, -0.0342,  0.0368,  0.0275,  0.0015, -0.0557, -0.0217,  0.0515,
        -0.0310, -0.1127, -0.0723, -0.0199, -0.0380, -0.0013, -0.0265,  0.0041,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3889e-03, -5.1127e-02, -4.5593e-02,  1.6418e-01, -1.6617e-01,
        -8.7943e-02, -3.5682e-01, -9.6322e-02,  1.8203e-02, -6.4630e-02,
         6.5781e-02, -4.6694e-02,  3.3572e-04,  3.8102e-02, -1.2439e-02,
        -1.7072e-03, -9.9281e-02,  5.3247e-02,  6.9113e-02, -3.6580e-02,
         1.2101e-02, -1.4772e-02,  2.0913e-02,  1.8894e-02, -1.5207e-01,
        -1.7779e-02,  1.9761e-02, -1.3175e-02, -3.2561e-02, -1.9775e-02,
        -1.4717e-01, -5.7688e-02, -3.8112e-02, -1.2230e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0661,  0.0166, -0.0738, -0.0532,  0.0609,  0.0118, -0.0218,  0.0104,
        -0.0695,  0.1489,  0.0038, -0.0390,  0.1377, -0.0035, -0.1370, -0.0332,
        -0.0175, -0.0381, -0.0097,  0.0096, -0.0898,  0.0639,  0.0423,  0.0344,
         0.0138, -0.1373,  0.0396, -0.0567,  0.0613,  0.0167, -0.0117, -0.0184,
         0.0385, -0.0378, -0.0033,  0.0021,  0.0022, -0.0231, -0.0083, -0.0226,
        -0.0239,  0.0275, -0.0276,  0.0014,  0.0122, -0.0655, -0.0118,  0.0098,
         0.0197,  0.0125,  0.0148,  0.0232], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1075,  0.0140, -0.0069,  0.1052, -0.0647,  0.0234, -0.0416, -0.0569,
        -0.0449, -0.0063, -0.0061,  0.0459, -0.0446, -0.0177, -0.0275, -0.0214,
        -0.0843, -0.0113, -0.0135,  0.0425, -0.0342, -0.0066, -0.0373, -0.1215,
        -0.0214,  0.0258, -0.0437, -0.0172, -0.1135,  0.0100,  0.1435,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0748,  0.0602, -0.0346, -0.0243, -0.0046, -0.0217, -0.0384, -0.0457,
        -0.0418, -0.0844, -0.0309, -0.0102, -0.1412,  0.0172, -0.0493, -0.0799,
        -0.0122,  0.0536,  0.0123, -0.0159, -0.0126,  0.0258,  0.0413,  0.0133,
         0.0430,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3706,  0.3741, -0.0817, -0.0412,  0.2835, -0.1726,  0.1780,  0.1858,
        -0.3873,  0.2924, -0.3281,  0.9175,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2282, -0.0755, -0.0028,  0.0032, -0.0104, -0.0351, -0.0801,  0.0096,
        -0.0740,  0.1615, -0.1031, -0.0394, -0.0526, -0.0827, -0.0510, -0.0183,
         0.0413, -0.0989, -0.0278, -0.0135, -0.0320,  0.0035,  0.0478,  0.3730,
        -0.0182, -0.0770, -0.1488, -0.1287, -0.0865,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1459, -0.0027, -0.2156, -0.0827, -0.0619, -0.1365, -0.1196,  0.1037,
        -0.0745, -0.0657,  0.0926, -0.1040,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0616, -0.0326, -0.1269, -0.0615,  0.1508, -0.0806, -0.2960,  0.1869,
        -0.1010,  0.0025,  0.0571,  0.1719,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0042,  0.2657, -0.0928, -0.0515, -0.0849, -0.1468,  0.0461, -0.0221,
        -0.0946,  0.0720, -0.0362, -0.0886, -0.0104,  0.2201, -0.1230, -0.0066,
        -0.0419, -0.1162, -0.1017, -0.1052,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0086e-01, -8.5589e-02, -7.1843e-02, -7.7199e-02, -1.0314e-01,
        -5.4530e-02, -2.0074e-02,  3.9645e-02,  1.9156e-02, -1.6485e-02,
         2.2855e-02, -4.4485e-02,  2.8462e-02,  5.7560e-02, -1.7073e-02,
         3.6945e-02, -1.7596e-02, -1.8600e-02,  1.2801e-02, -5.6122e-02,
         5.6774e-04, -4.2394e-02,  6.9464e-02,  3.8528e-02,  8.3507e-03,
        -7.2496e-03, -2.6580e-02, -3.3866e-02, -3.5860e-02, -6.0048e-02,
        -4.8054e-02, -7.2175e-02, -6.2986e-05, -1.7453e-02, -8.3942e-03,
         1.6747e-01, -5.6443e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2155, -0.4740, -0.0341,  0.0682,  0.2688,  0.2134, -0.0240,  0.0705,
        -0.1401,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1962, -0.0268, -0.0008, -0.1041, -0.0409,  0.0570, -0.0451, -0.0240,
        -0.0367,  0.0561,  0.0392,  0.0609,  0.0369, -0.0490, -0.0222, -0.0276,
        -0.0196,  0.0990, -0.0366, -0.0037,  0.0706,  0.0334,  0.0448,  0.0065,
        -0.0138, -0.0180,  0.0130,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5537e-01, -1.0484e-01, -3.6826e-02, -8.2031e-02, -1.1812e-01,
        -7.8459e-03, -2.4622e-02, -2.0351e-03,  1.8607e-02, -1.4342e-01,
        -6.9482e-02,  4.4168e-02,  1.5645e-04, -1.6080e-02, -7.5318e-02,
        -2.0515e-02,  1.5031e-02,  3.4323e-02,  1.3427e-02,  1.1759e-01,
         2.4538e-01, -2.4991e-02, -3.4967e-02, -2.3611e-02, -2.4429e-02,
        -6.5237e-02, -2.4720e-02, -2.1217e-02, -9.3850e-02,  1.1663e-02,
        -6.7894e-03, -5.8143e-02,  6.6947e-02, -1.1249e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0652, -0.0219, -0.0274,  0.0491,  0.1420,  0.1057,  0.0412,  0.0190,
        -0.0064,  0.1040,  0.2645,  0.0640,  0.0371,  0.0194, -0.0132, -0.0871,
         0.1133, -0.0191, -0.0077, -0.0103,  0.0257, -0.0470,  0.0360, -0.0413,
         0.1278,  0.0236,  0.0365, -0.0369,  0.0023, -0.0281, -0.0372,  0.0680,
         0.0176,  0.0248,  0.0182,  0.0369,  0.0680, -0.0086, -0.1321, -0.0246,
        -0.0049,  0.0049, -0.1357, -0.0014, -0.0532,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2417, -0.1875,  0.0652,  0.0338, -0.1259, -0.0814, -0.0375, -0.1293,
        -0.1118, -0.1159, -0.4024, -0.1358, -0.0142,  0.1145, -0.2256, -0.1349,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4992, -0.1679, -0.1173,  0.1040,  0.0625, -0.2032, -0.1093,  0.0561,
        -0.0791, -0.1736, -0.0282, -0.1168, -0.1147, -0.0402, -0.0826, -0.0134,
        -0.0435, -0.0776,  0.0111,  0.0245, -0.0387, -0.0129,  0.0260, -0.0342,
         0.0748,  0.0596, -0.1026,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3080, -0.1084, -0.2119,  0.0646, -0.0289,  0.0445,  0.0152,  0.0115,
         0.0077,  0.0723, -0.0092,  0.0263,  0.0118, -0.0047, -0.0150,  0.0178,
        -0.0042, -0.0101, -0.0118,  0.0775, -0.0507,  0.0861, -0.0506, -0.0088,
        -0.0827, -0.0128, -0.0021, -0.0100, -0.0640, -0.0032,  0.0017,  0.0115,
        -0.0325, -0.0414, -0.1109, -0.0450, -0.0629, -0.0122, -0.0661, -0.0488,
         0.0208, -0.0967, -0.0146,  0.0201,  0.0366,  0.0733], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0387, -0.0495, -0.0477, -0.0013, -0.0413, -0.0221, -0.0258, -0.0585,
        -0.0502, -0.0730, -0.0801, -0.0628, -0.0150, -0.0381, -0.0746,  0.0077,
         0.0397, -0.0617, -0.0159, -0.0617,  0.0097, -0.0498,  0.0127,  0.0050,
         0.0248,  0.0035, -0.0792,  0.0013,  0.2029, -0.0339,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0276,  0.1722,  0.0758, -0.2332,  0.0547,  0.2194, -0.0041, -0.1489,
        -0.2361,  0.3324, -0.1386,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2082, -0.1298, -0.0625, -0.2567, -0.0025, -0.0329, -0.0534, -0.1754,
        -0.0252, -0.3294,  0.1039,  0.0107, -0.0355, -0.1566, -0.1234, -0.0117,
        -0.0067,  0.0292,  0.0526, -0.2307,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0969,  0.0984, -0.0446, -0.0906, -0.0791, -0.2171, -0.0971, -0.5872,
        -0.0145, -0.0680,  0.0542,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3688, -0.2190, -0.2296, -0.0879,  0.0142, -0.1638, -0.0555, -0.1660,
        -0.0445, -0.2453, -0.1164, -0.0385, -0.1356, -0.1775, -0.0650, -0.0456,
        -0.1918, -0.1129, -0.0630, -0.1611, -0.2528,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0537, -0.0629, -0.0745, -0.0941, -0.0228, -0.1211,  0.1102,  0.1183,
         0.0411, -0.0569,  0.0358, -0.0161,  0.0115,  0.0070,  0.0292,  0.0382,
         0.0702, -0.0372,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0307, -0.0549, -0.1057,  0.0695,  0.0578, -0.0492, -0.0186,  0.0214,
        -0.0135, -0.0115,  0.0293,  0.0556, -0.0248, -0.0295, -0.0559,  0.0214,
        -0.0482, -0.0686,  0.0245, -0.0219, -0.0653, -0.0140, -0.0773, -0.0385,
        -0.0112,  0.0204, -0.0704,  0.0100, -0.0297, -0.0378,  0.0446, -0.0195,
        -0.0345,  0.0330,  0.0006,  0.0563, -0.0958,  0.0494, -0.0377,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1027, -0.2245,  0.0583,  0.0281, -0.0352, -0.0347,  0.0271,  0.1348,
         0.0769,  0.0023, -0.0934, -0.0221, -0.0599,  0.1400, -0.2528, -0.0374,
        -0.0090,  0.1001, -0.0162,  0.0230, -0.1294,  0.0158, -0.0290,  0.0023,
        -0.0575, -0.0510, -0.0334, -0.0689, -0.1284, -0.1093, -0.0114,  0.0151,
        -0.1006,  0.1770,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.4264, -0.2386,  0.0099, -0.1101, -0.0082, -0.1303,  0.1043, -0.1504,
        -0.2085,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1101, -0.1177, -0.0055,  0.0645,  0.0048,  0.0468,  0.0088, -0.0054,
         0.1187, -0.0332, -0.0146, -0.0591,  0.0266,  0.0028,  0.0083,  0.0444,
        -0.0044, -0.1997,  0.0091, -0.0121, -0.0403,  0.0419, -0.0409, -0.0582,
        -0.0178, -0.0649, -0.0570, -0.0457,  0.0288, -0.0226, -0.0464, -0.0687,
        -0.0232, -0.0687,  0.0186, -0.0630, -0.0275, -0.0032,  0.0434,  0.0289,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1120, -0.0244, -0.0644, -0.0145, -0.0458, -0.0069,  0.0113, -0.0040,
        -0.0300, -0.0301, -0.0054, -0.0267, -0.0021, -0.0084, -0.0156,  0.0918,
         0.0022, -0.0144, -0.0073, -0.0052, -0.0003, -0.0273,  0.0102,  0.0289,
         0.0633,  0.0208, -0.0391,  0.0343,  0.0368, -0.0911, -0.0430, -0.0186,
         0.0036, -0.0256, -0.0221, -0.1432, -0.0172, -0.0064,  0.0127,  0.0080,
        -0.0209,  0.0037, -0.0569,  0.0157, -0.0145,  0.0080, -0.0142,  0.0206,
        -0.0099, -0.0051,  0.0474, -0.0074,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0800, -0.0827, -0.1267,  0.0564,  0.0188,  0.0004, -0.0567,  0.0311,
         0.0008,  0.0028, -0.0102, -0.0435, -0.0771, -0.0296, -0.1213, -0.0622,
         0.0696,  0.1184, -0.0353, -0.0739, -0.1165, -0.1627,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2552, -0.5678, -0.1815,  0.0700, -0.0773,  0.0224,  0.0508, -0.0767,
         0.0351,  0.0319,  0.1423,  0.0622,  0.0796,  0.0427,  0.0373,  0.0209,
        -0.0316, -0.0318,  0.0933,  0.1115, -0.2345,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0565, -0.1866, -0.0737, -0.0484, -0.0838,  0.0057, -0.1059,  0.0408,
        -0.0430, -0.0696, -0.0670, -0.0362, -0.0622, -0.0977, -0.0048,  0.0314,
        -0.0123, -0.0383, -0.0695,  0.0004, -0.0137, -0.0235, -0.0261,  0.0471,
         0.0602,  0.0343, -0.0524,  0.0018, -0.0283, -0.0128, -0.0197,  0.0139,
        -0.0313, -0.0142, -0.0114,  0.0179,  0.0446, -0.0205, -0.0019, -0.0093,
        -0.0123, -0.0715, -0.1818, -0.1005, -0.0363, -0.0100, -0.0487, -0.0136,
         0.0199, -0.0681, -0.0453,  0.0244, -0.0186, -0.0477, -0.0153],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1504e-02,  5.8215e-03,  1.1915e-01, -1.7066e-02, -5.4223e-03,
        -2.1995e-02,  7.7382e-02,  1.3760e-01,  4.1201e-05,  4.3750e-02,
         6.3497e-02,  4.4890e-02, -7.6243e-02, -9.9188e-03, -3.4451e-02,
         3.0071e-02,  7.7280e-02, -1.8000e-02,  1.5828e-02,  2.6298e-02,
        -4.7461e-02, -7.1986e-02, -7.0937e-02, -2.1723e-02,  9.4672e-02,
        -4.7728e-02,  3.0383e-02, -3.3540e-02,  4.2470e-02, -4.6065e-02,
         4.0984e-02, -2.5335e-02, -4.5806e-02, -6.1561e-02, -3.7862e-02,
        -1.1436e-02,  2.3096e-03,  1.2228e-02,  1.3908e-02,  5.3999e-03,
        -2.5353e-02,  1.0665e-02, -3.1092e-02,  2.6885e-02, -4.6510e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1790, -0.1799, -0.1192, -0.0017,  0.1152, -0.0324, -0.0471, -0.0931,
         0.0113,  0.0150, -0.0260,  0.0777,  0.1535,  0.0678,  0.0080,  0.0796,
         0.0343,  0.0371, -0.0140, -0.0214, -0.2340,  0.2339,  0.0076, -0.0942,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1522, -0.0735,  0.0462,  0.0406, -0.0535, -0.0208, -0.0920, -0.0328,
        -0.0629, -0.0276, -0.1385, -0.0245, -0.0224,  0.0333, -0.0179,  0.0203,
        -0.0461, -0.0417, -0.0161,  0.0004, -0.0307,  0.0224, -0.0086, -0.0939,
        -0.1410, -0.0010, -0.0723,  0.0326, -0.0179, -0.0723,  0.0254,  0.1147,
        -0.0470,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0339, -0.0661,  0.1475, -0.0820, -0.0052, -0.1178, -0.0747, -0.0059,
        -0.1340, -0.1168,  0.0366, -0.0086,  0.0174, -0.0920,  0.0360,  0.0532,
        -0.0082, -0.0099,  0.1077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0566,  0.0294,  0.2231, -0.0487,  0.1404, -0.0328,  0.0365,  0.0234,
         0.0190,  0.1104,  0.1688,  0.1400,  0.0685,  0.0092,  0.0777,  0.0284,
         0.1243, -0.2515, -0.0225, -0.0011, -0.0568,  0.0203,  0.0655, -0.2972,
         0.2436,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2251,  0.0127, -0.0675,  0.0556, -0.2203, -0.0761, -0.2882,  0.3034,
         0.0388, -0.0347, -0.0702, -0.1287, -0.0571, -0.0434, -0.1765,  0.0260,
        -0.0902,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.1744, -0.1079, -0.0028, -0.0247,  0.0506, -0.0358, -0.0015, -0.0080,
        -0.0779, -0.1188,  0.0744,  0.0678,  0.0258, -0.0276, -0.0701, -0.0752,
         0.0107, -0.1209,  0.1135,  0.0325,  0.0479, -0.0445, -0.0932, -0.0329,
        -0.0640,  0.0283, -0.0418,  0.0304,  0.0913, -0.0143, -0.0931, -0.0824,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0359, -0.0780, -0.0063,  0.0092, -0.0622, -0.0055, -0.0591, -0.0319,
        -0.0035, -0.0541, -0.0161, -0.0597,  0.0574, -0.0675, -0.0710,  0.0494,
        -0.1246, -0.0631, -0.0070,  0.0696, -0.0029,  0.0378, -0.0196, -0.1195,
         0.0353,  0.0373,  0.3301, -0.0460,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0550, -0.1828,  0.1093,  0.0096, -0.1483, -0.1149, -0.0955, -0.0220,
         0.0048,  0.0362,  0.0555, -0.0411, -0.0498, -0.0462, -0.0065,  0.0242,
        -0.0477, -0.0014, -0.0315,  0.0521,  0.0002, -0.0531, -0.0440, -0.0182,
        -0.0476,  0.0083,  0.0834,  0.0742,  0.0322,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1143e-02, -8.8751e-05, -3.3499e-02, -6.1005e-02, -6.9903e-02,
        -2.7542e-02, -6.5071e-02,  2.2389e-02, -7.2578e-02, -4.1892e-02,
        -5.5013e-02, -2.5388e-02, -2.9830e-02, -6.4822e-03, -4.6529e-02,
        -1.5078e-02, -1.4358e-03, -5.2812e-03,  2.3343e-03,  4.6489e-03,
        -4.1544e-02, -1.6795e-02, -8.4776e-03, -4.6126e-02, -2.1342e-02,
         1.6006e-02, -2.8685e-02, -2.2320e-02,  1.1194e-01, -1.5351e-02,
        -3.5667e-02,  6.3974e-02, -9.0356e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0429, -0.0825,  0.0273, -0.0733, -0.1380, -0.0520, -0.0898, -0.1019,
        -0.0033, -0.0168,  0.0430, -0.0390, -0.0539, -0.0420, -0.0484, -0.0175,
        -0.0024,  0.0132, -0.0433,  0.0385, -0.0725, -0.0040,  0.1704, -0.0604,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1705,  0.0007, -0.0080,  0.0029, -0.0493, -0.1412, -0.1607, -0.0659,
        -0.0503, -0.0472, -0.0216,  0.0040, -0.0207, -0.0355, -0.0015, -0.0376,
        -0.0594, -0.0504,  0.0021, -0.0152,  0.0173, -0.0616, -0.0857,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6355e-02, -8.9232e-02, -5.1676e-02, -6.7398e-05, -2.8050e-02,
        -6.0464e-02,  3.4390e-02, -3.6449e-03, -4.1033e-02, -7.5137e-02,
        -4.6262e-02, -7.7329e-02, -6.0421e-02, -3.1256e-02, -3.3053e-02,
         1.3153e-02, -3.3430e-03, -3.0877e-02, -2.9213e-02,  4.9160e-02,
        -4.1289e-02,  4.6501e-02,  1.3364e-02,  5.5727e-02, -2.4178e-02,
        -5.2449e-02, -1.6287e-02, -7.3509e-02, -2.6498e-02, -2.6462e-02,
        -3.2174e-02, -1.3294e-02, -1.3459e-02,  2.1826e-02, -9.1886e-03,
        -3.9943e-02, -1.2775e-02,  5.6833e-02,  1.2078e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1043e-01, -1.0018e-01,  1.4423e-02,  2.4355e-02, -1.2582e-01,
         8.9192e-03,  3.9384e-02,  4.9261e-02,  5.2873e-02, -6.1456e-03,
        -1.1615e-02, -4.6244e-04,  5.9620e-02,  1.0436e-02, -6.0692e-03,
         2.5944e-03, -1.6536e-01, -3.2439e-02,  1.8112e-02,  5.0934e-02,
        -6.4539e-02, -1.6137e-02, -8.2218e-02,  1.3422e-02,  5.9905e-02,
        -4.8960e-02,  3.7251e-03, -3.8156e-03, -5.4347e-02,  1.3245e-02,
         2.0263e-04,  8.3799e-02,  1.8306e-01,  1.5361e-02, -1.9926e-02,
        -7.6614e-03,  4.5761e-02, -6.5945e-02,  7.2574e-03, -2.5481e-02,
        -4.8561e-02, -2.8624e-03,  3.5062e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0605, -0.0769,  0.0228, -0.0021,  0.1320, -0.0251, -0.0224, -0.0231,
        -0.1334,  0.0605,  0.0940, -0.0138, -0.0458, -0.0390,  0.0113, -0.1006,
        -0.0148,  0.0258, -0.0079, -0.0907,  0.0084, -0.0048,  0.0098, -0.0090,
        -0.0372, -0.0034,  0.0845,  0.0736, -0.0127, -0.0113,  0.0518, -0.0053,
        -0.0218,  0.1205,  0.0669,  0.0516, -0.0086,  0.0071, -0.0600, -0.0119,
        -0.0320, -0.0724, -0.0085, -0.1640, -0.0056,  0.0018, -0.0128, -0.0316,
        -0.0066, -0.0100, -0.0999, -0.0424, -0.1808,  0.0353,  0.0698, -0.0138,
        -0.0173], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3403, -0.3363, -0.1349, -0.0162, -0.1962, -0.2586, -0.0637, -0.0729,
         0.0067,  0.1546,  0.0331,  0.2748,  0.0865, -0.0117,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2600, -0.1200, -0.0771, -0.2185,  0.0052,  0.0753, -0.0996, -0.0718,
        -0.0482,  0.0560, -0.0351, -0.2060, -0.1147, -0.0173, -0.0217,  0.1290,
         0.0164, -0.0281,  0.0205, -0.0098, -0.0259, -0.0709, -0.0670,  0.0143,
        -0.0192, -0.0381,  0.0182,  0.0589,  0.1031,  0.0159,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3246,  0.9135,  0.8114, -0.3746,  0.2478,  0.8560,  0.8181, -0.7052,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0839, -0.0353, -0.0667,  0.0489, -0.0685, -0.1713, -0.0550, -0.0545,
         0.0064, -0.0415, -0.0841, -0.0384, -0.0912, -0.0344, -0.0401, -0.0116,
         0.0064, -0.0025,  0.0232, -0.0305,  0.0479, -0.0159, -0.0282,  0.0688,
        -0.0774, -0.0594, -0.0313,  0.0171,  0.0088, -0.0434,  0.0469, -0.0095,
         0.0906,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0836, -0.0250,  0.0495,  0.1306,  0.0032,  0.0196, -0.0072, -0.0141,
        -0.0642, -0.0135,  0.0003,  0.0117, -0.0097,  0.0064,  0.0037, -0.0012,
         0.0084,  0.0284, -0.0033, -0.0232,  0.0245, -0.0257,  0.0587,  0.0698,
         0.0226, -0.0205,  0.0261, -0.0442,  0.0413,  0.0656,  0.0157, -0.0106,
         0.0024, -0.0625,  0.0541, -0.0222,  0.0062, -0.0169,  0.0121,  0.0580,
        -0.0163,  0.0596, -0.0233, -0.0541,  0.1223,  0.0612,  0.0465, -0.0597,
         0.0363, -0.0012, -0.0420, -0.0194, -0.0344, -0.0293, -0.0335,  0.0076,
        -0.0129, -0.1861, -0.0437,  0.0207,  0.0096, -0.0357, -0.0221, -0.1862,
         0.0236,  0.0463,  0.0254,  0.0708, -0.0132,  0.1068, -0.0309],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4706e-01,  5.4150e-02, -6.8205e-02,  7.3406e-02, -5.6420e-02,
        -2.7641e-02, -3.5544e-01, -3.5050e-02, -7.2158e-02,  2.7638e-02,
         1.6713e-02,  3.0568e-02, -1.2976e-02,  1.4329e-03,  6.6847e-03,
        -3.2736e-02,  8.9152e-03,  7.9306e-02, -2.9470e-02, -5.3827e-02,
        -1.8691e-02, -4.4508e-03,  1.2852e-01, -3.1720e-02,  1.9303e-02,
         1.0612e-02, -4.9542e-02,  5.3627e-02,  6.1997e-02,  6.5686e-02,
        -2.9069e-02,  3.5853e-02, -1.2096e-02, -1.8461e-02, -2.6365e-02,
        -5.5658e-02, -1.0250e-02, -3.3273e-02, -5.9965e-03, -2.7732e-02,
         1.9988e-04, -2.5320e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0373, -0.1743, -0.0254, -0.2122,  0.0907, -0.0456,  0.0231,  0.0225,
         0.0276,  0.0655, -0.0354,  0.0478,  0.0097, -0.0249,  0.0511, -0.1516,
        -0.0058, -0.0850, -0.1581,  0.0014, -0.0427,  0.0901, -0.1357,  0.1782,
        -0.0386, -0.0082, -0.1861,  0.1034, -0.0833,  0.0531,  0.0203, -0.0363,
         0.0651,  0.0998,  0.0297,  0.2399,  0.4762,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1049, -0.0575,  0.0149, -0.0119, -0.1362,  0.1360, -0.0296, -0.0685,
        -0.0093, -0.1319, -0.0241,  0.0288, -0.0456, -0.0352, -0.0330,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5278,  0.0315, -0.2214, -0.0449, -0.1310,  0.0248,  0.0429, -0.0884,
         0.1103,  0.0322, -0.0460, -0.3115,  0.0953,  0.0263, -0.2092,  0.0723,
        -0.0605,  0.1490, -0.0541, -0.2092,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9655e-01,  6.7808e-02, -1.9144e-01,  1.2888e-01, -7.7191e-02,
        -8.1798e-02, -5.6451e-02, -1.0428e-01, -1.7614e-02,  5.6825e-02,
        -3.1746e-02, -1.3983e-02, -6.6364e-02,  1.5812e-02, -2.4405e-02,
        -4.1769e-02, -7.7752e-02, -2.2322e-04,  5.8384e-03, -1.5880e-02,
         1.7059e-03,  1.3608e-02,  8.6705e-02,  2.7519e-02,  1.3807e-02,
         2.0727e-02, -4.7955e-02,  3.2833e-02, -3.6325e-02,  3.2289e-02,
        -5.3618e-02, -9.8646e-02,  3.8925e-03,  5.9394e-03,  2.1331e-02,
        -1.2913e-02, -5.7736e-02,  2.3212e-02, -8.1368e-02,  2.1315e-02,
         1.1767e-01,  2.3993e-01, -3.0983e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1025,  0.0170, -0.1326, -0.1679, -0.0091, -0.0127,  0.0097, -0.1674,
         0.0252,  0.0296,  0.0187, -0.0694, -0.0136,  0.0589, -0.0325,  0.0175,
        -0.0061, -0.0646,  0.0003, -0.0558,  0.0501,  0.0187, -0.0156,  0.0201,
         0.0279,  0.0425, -0.0564, -0.0107, -0.0322,  0.1182,  0.0245, -0.0369,
        -0.0230, -0.0153,  0.0083,  0.0696,  0.1515, -0.1705,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1756, -0.0797, -0.0893, -0.1304,  0.0577, -0.0563,  0.0255, -0.1079,
         0.0620, -0.0972, -0.0241, -0.0150, -0.0419,  0.0626, -0.0040, -0.0498,
        -0.0460,  0.0257, -0.0274, -0.0247, -0.0964,  0.0522,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2051,  0.1325,  0.1111, -0.1171,  0.1224, -0.1098, -0.1096,  0.0246,
        -0.0848, -0.0518, -0.0350,  0.0478, -0.0158, -0.0236,  0.0059, -0.0019,
        -0.0914, -0.0999, -0.0450, -0.0512, -0.0463,  0.0259, -0.0326, -0.0570,
        -0.1331, -0.0456, -0.0091,  0.1357, -0.0398, -0.0712, -0.0414, -0.0993,
        -0.0191, -0.0904, -0.0232, -0.1349,  0.0458, -0.0377,  0.1337,  0.0496,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0296e-01,  2.4936e-02,  1.1850e-02, -3.1907e-02,  1.0198e-02,
        -5.3059e-02,  8.5196e-02,  3.9706e-02, -4.6573e-02, -8.4525e-02,
         6.5648e-02,  5.1248e-03, -1.5848e-04, -3.0858e-02, -1.2502e-01,
        -7.3680e-02,  6.7037e-02, -1.2265e-01, -7.7766e-02, -2.5646e-02,
         4.1119e-02, -3.8018e-03, -2.7661e-02,  3.5557e-02,  5.6793e-02,
        -3.4020e-02, -5.7160e-02, -4.8779e-02, -1.3992e-01, -3.8263e-02,
         1.2196e-01, -1.3887e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9287, -0.4577, -0.5220,  0.2776,  0.0105, -0.0529, -0.1207,  0.0547,
        -0.2099, -0.1197, -0.1786,  0.1213,  0.1499, -0.0628,  0.0473, -0.0985,
        -0.2145, -0.3133, -0.1218, -0.0077, -0.0164, -0.5998, -1.0861, -0.4415,
        -0.2698, -0.2534,  0.0143, -0.0251, -0.0923,  0.0467, -0.0368, -0.1543,
        -0.0494,  0.4765,  0.3781,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-8.7376e-02, -5.3333e-01, -1.9893e-01, -7.1159e-02,  6.0613e-02,
         6.4545e-02,  4.0278e-02, -8.7044e-02, -1.4257e-01, -7.1616e-02,
         2.4108e-01,  3.0862e-02, -2.0479e-02, -4.5587e-03,  4.1747e-04,
         2.0086e-02,  2.3747e-02,  4.9977e-03,  1.4785e-02, -7.3203e-02,
        -3.8043e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6864e-01, -1.2146e-01, -1.9110e-02, -1.6646e-01, -1.6247e-01,
        -4.5267e-02, -2.0708e-02, -3.1089e-02, -7.5570e-02, -8.2192e-02,
        -3.2736e-02,  1.7130e-02,  1.2304e-01, -9.0570e-02, -4.4012e-02,
        -1.1325e-04, -8.9943e-02,  1.8008e-01, -2.2127e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3349e-02, -5.8508e-01, -1.3361e-01, -6.2368e-02, -1.9523e-02,
         3.9769e-02, -1.6219e-03, -8.3690e-02,  1.2543e-01, -1.9111e-01,
        -1.2155e-01,  1.3455e-02,  2.7836e-02, -7.2569e-02,  3.1217e-02,
        -4.0732e-02,  3.7119e-02, -4.1809e-02, -7.4054e-03,  5.4558e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1587, -0.2165, -0.1044,  0.0210, -0.0040,  0.0177,  0.1484, -0.0828,
         0.0145,  0.0273, -0.0377, -0.0381, -0.0018, -0.0718, -0.0973,  0.0103,
        -0.0593,  0.0257, -0.0307, -0.0223,  0.0538, -0.0563,  0.1242,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3009, -0.0670, -0.0621,  0.1054, -0.0292, -0.0469, -0.0041, -0.0171,
        -0.0905, -0.1616, -0.0044,  0.0117,  0.0060, -0.0036,  0.0599, -0.0413,
        -0.0624,  0.0333,  0.0453,  0.0266, -0.1125, -0.0007,  0.0308, -0.0509,
         0.0905,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1723e-01, -3.6285e-01,  1.1530e-01,  1.8676e-04,  1.1967e-02,
        -3.9881e-02, -6.0535e-03, -6.1723e-02, -2.3712e-02, -1.2844e-01,
        -3.1593e-02, -5.4858e-02,  8.1132e-04, -1.2722e-01,  5.5192e-02,
        -2.3296e-02,  1.8128e-02, -2.7863e-02,  7.9589e-02,  1.9647e-02,
        -7.8395e-02, -5.4334e-02, -4.1499e-02, -4.9978e-02, -2.3621e-02,
         9.1074e-03, -4.7425e-02, -9.8649e-02, -6.8533e-02,  3.0839e-02,
        -3.1917e-02,  2.1983e-02, -4.2302e-02,  4.7387e-02, -3.2660e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1265,  0.0880,  0.0810, -0.0313, -0.0099,  0.0831, -0.0652,  0.0076,
        -0.1666, -0.0244, -0.0041,  0.1111,  0.0170, -0.0463, -0.0414, -0.0483,
        -0.0159, -0.0155, -0.0157,  0.0188,  0.0029,  0.0440,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0285,  0.0516,  0.0244, -0.1139,  0.0051, -0.0857, -0.0089,  0.0487,
        -0.0564, -0.0791, -0.2118, -0.0061,  0.0728, -0.1372, -0.1359, -0.0115,
         0.0054,  0.0243, -0.1039, -0.2091, -0.0642,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5262, -0.3130, -0.2264, -0.0721, -0.0458, -0.1458, -0.0562, -0.0309,
         0.0490, -0.1268, -0.0018,  0.2520, -0.0424, -0.0360,  0.0642, -0.0360,
        -0.2178, -0.0539, -0.2626,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1116,  0.1722, -0.0301,  0.0328,  0.0335, -0.0351,  0.0358,  0.0450,
         0.0058,  0.0337,  0.0403,  0.0224, -0.0247, -0.0050, -0.0354,  0.0557,
         0.0100, -0.0120,  0.0036, -0.0233,  0.0277,  0.0213, -0.0694, -0.0136,
        -0.1381, -0.1157,  0.0549, -0.0177,  0.0222, -0.0728,  0.0148, -0.0205,
        -0.0364, -0.0560,  0.0136,  0.0336,  0.0484, -0.0081, -0.0832, -0.0245,
        -0.0125, -0.0295, -0.0142,  0.0062, -0.0297,  0.1767, -0.1030],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1323, -0.1022, -0.0995, -0.0216,  0.0218, -0.1277,  0.0226,  0.0146,
        -0.0115, -0.0401, -0.0066,  0.0389,  0.0340,  0.0846, -0.0801, -0.0522,
        -0.2094, -0.0569,  0.0662,  0.0308, -0.0057,  0.1230,  0.0652, -0.2282,
        -0.0749,  0.1071, -0.0657,  0.0399, -0.0615, -0.0402,  0.0315,  0.1156,
         0.0936, -0.1003,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7960e-01, -5.4144e-02,  1.1206e-02,  4.5337e-02,  1.0010e-01,
        -7.7836e-02, -5.4366e-02, -2.8879e-02, -5.3718e-02, -2.7075e-02,
         4.7878e-02, -3.0755e-02, -4.5121e-02, -3.8259e-02, -4.9669e-02,
        -6.6180e-02, -5.5980e-02,  2.4515e-01, -6.9497e-02, -2.5009e-02,
        -1.4653e-02, -1.4316e-04, -1.7420e-02, -2.9389e-03, -2.3717e-02,
        -7.1791e-03, -1.1266e-02, -2.5152e-02, -3.2324e-03, -2.8650e-02,
         5.8159e-02, -3.8546e-02,  2.7536e-02, -1.2707e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.1104, -0.2369,  0.2419, -0.0621, -0.0094, -0.0609, -0.0950, -0.1548,
        -0.0279, -0.1095, -0.0824, -0.0465, -0.0275, -0.0493, -0.0853, -0.0763,
         0.0302, -0.0638,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0979,  0.0677, -0.1501, -0.0227, -0.0208,  0.0078, -0.0261, -0.0866,
        -0.0152, -0.0287,  0.1010,  0.0736,  0.0412, -0.0098, -0.0464,  0.1463,
         0.1007, -0.0414,  0.0508, -0.3251, -0.0636, -0.0301, -0.0504, -0.0765,
        -0.0074,  0.0805,  0.0801,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0902e-01, -6.3649e-01,  6.2095e-02, -1.6896e-01, -9.1821e-01,
        -9.1238e-02, -3.3119e-01, -4.2026e-01, -4.0360e-01, -1.4761e-01,
        -4.5063e-02, -4.7784e-02, -1.2696e-01, -1.2473e-01, -3.5674e-02,
        -3.4074e-02, -7.6150e-02, -1.4929e-04, -9.7436e-02, -9.3654e-02,
        -1.1387e-02, -6.3983e-02, -1.4150e-01,  7.7826e-02,  1.1513e-01,
         9.1070e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1897, -0.1009, -0.0313, -0.0059, -0.1137, -0.0036,  0.0614, -0.0512,
        -0.0510, -0.0076, -0.0643,  0.0277, -0.0718, -0.0231,  0.0661, -0.0648,
         0.0339, -0.0622, -0.1854,  0.0943, -0.0211,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0321,  0.0103, -0.1387, -0.1560, -0.0382,  0.0153,  0.1279, -0.0046,
        -0.0370,  0.0651, -0.0234, -0.0270, -0.0210, -0.0927, -0.1138,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8754e-01, -6.9116e-02, -2.8490e-02, -3.2069e-01,  5.1203e-01,
        -4.5165e-02,  7.2841e-04, -7.1795e-02,  1.2486e-01, -6.2685e-01,
        -3.3652e-02,  1.0886e-01, -3.8388e-02, -1.7581e-02,  2.1603e-01,
         1.3874e-01,  9.0563e-01,  6.0307e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1583, -0.0628, -0.0480, -0.2503, -0.0798, -0.0384,  0.0657, -0.0684,
         0.0160, -0.1079,  0.0101,  0.0127,  0.0453,  0.0400, -0.0070, -0.0092,
        -0.0131, -0.0420, -0.0478,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3249, -0.0975,  0.0173,  0.0453,  0.0704, -0.1193,  0.1039,  0.0150,
        -0.0260,  0.0496,  0.0986,  0.0403, -0.0425,  0.0175,  0.1072,  0.1438,
        -0.1907, -0.0812,  0.0692,  0.0877, -0.0528,  0.0775, -0.0477,  0.0177,
         0.0262,  0.0306,  0.0287, -0.0498,  0.0040, -0.0261, -0.0113, -0.0069,
         0.0120, -0.0042, -0.0521, -0.0181, -0.0107,  0.1458, -0.0204, -0.0117,
         0.0137,  0.0641,  0.1758,  0.0191], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5257, -0.0319, -0.0448, -0.0521,  0.0164, -0.0320,  0.0174,  0.0541,
        -0.0921,  0.0806, -0.0463,  0.0306, -0.0206,  0.0166,  0.0629, -0.0577,
        -0.0213, -0.1497,  0.0397,  0.0410, -0.0490,  0.0027,  0.0228, -0.0051,
        -0.0416, -0.0108,  0.0054, -0.0302,  0.0473,  0.2028, -0.0113, -0.1265,
        -0.0599, -0.2625,  0.0838,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0814,  0.3776, -0.0988,  0.1348,  0.0637,  0.6650,  0.2934, -0.1995,
        -0.1437, -0.0192,  0.2455, -0.0213, -0.0525,  0.1394,  0.1045,  0.2079,
         0.1362,  0.0428,  0.1827,  0.2393,  0.0977,  0.0253, -0.4948, -0.1482,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0316, -0.0147, -0.0079, -0.0557,  0.0109, -0.0354, -0.0061, -0.0417,
         0.0070,  0.0414, -0.0368, -0.0207,  0.0221, -0.0084, -0.0136, -0.0187,
        -0.0287,  0.0032, -0.0528, -0.0609, -0.0160, -0.0313, -0.0293,  0.0038,
         0.0503, -0.0395, -0.0528, -0.0208,  0.0081, -0.0245, -0.0306,  0.0049,
        -0.0620, -0.0535,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6211,  0.0305,  0.0268, -0.2637, -0.0063,  0.0981, -0.2016, -0.0785,
        -0.0564,  0.0632,  0.0417,  0.0681, -0.0952,  0.0161, -0.0548,  0.0387,
         0.1646, -0.0296,  0.0032, -0.1356,  0.0316,  0.0053,  0.2088, -0.0229,
         0.0256,  0.0757,  0.0297, -0.0190, -0.3279,  0.0047,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.1700,  0.8898, -0.6600, -0.2133,  0.0761, -0.0316,  0.1662, -0.1097,
        -0.0382,  0.0179, -0.2745, -1.1353,  0.5337,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1466, -0.3673,  0.3049,  0.0006, -0.1561, -0.1101,  0.0864, -0.0437,
        -0.1320, -0.0070, -0.0905, -0.1124,  0.1444, -0.0163, -0.0163, -0.1600,
        -0.3270,  0.0060,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4692,  0.0279, -0.3372, -0.3808, -0.3257,  0.3211,  0.0015, -0.0625,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0982,  0.1140,  0.0285,  0.0274,  0.0434,  0.0103,  0.0047,  0.0490,
        -0.0061,  0.0249, -0.0062,  0.0097, -0.0676, -0.0890, -0.0066, -0.0322,
        -0.0121,  0.0203, -0.0376, -0.0353,  0.2067,  0.0367,  0.0728, -0.0891,
         0.0417, -0.0554, -0.0663, -0.1110, -0.0699,  0.0266,  0.0402,  0.0029,
         0.0469, -0.1343,  0.0757, -0.0751, -0.0413, -0.0275,  0.0766, -0.0123,
        -0.0175,  0.0107, -0.0639,  0.0930,  0.0993, -0.0254,  0.0856],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1204,  0.0629, -0.1441, -0.1957, -0.0253,  0.1158, -0.0987,  0.0391,
        -0.1052, -0.0663, -0.0336,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7115e-01,  2.0158e-04, -1.9787e-01, -4.3643e-02, -2.2222e-01,
        -1.2493e-01, -7.5735e-02, -2.2199e-01, -5.9265e-02, -7.4226e-02,
        -4.4394e-02, -1.1580e-01, -3.6972e-02, -5.1219e-02,  4.8546e-02,
        -8.8290e-03,  1.3959e-01,  3.5193e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2222,  0.0056, -0.2595, -0.0613, -0.0045,  0.0874, -0.0016, -0.0164,
         0.0166, -0.0883, -0.0880,  0.0184, -0.1528,  0.2541, -0.4113,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3415, -0.1043,  0.1074,  0.0167, -0.0874, -0.0034, -0.2121,  0.1172,
        -0.0222,  0.1100, -0.0275,  0.0156,  0.1380,  0.0086, -0.0441,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4717, -0.1069,  0.2137, -0.0688, -0.0182,  0.0434,  0.0166, -0.0733,
        -0.1901, -0.1333, -0.0895, -0.0761, -0.0947,  0.0403, -0.0231,  0.0820,
        -0.0992,  0.0848,  0.0227,  0.0121, -0.1774, -0.0831,  0.1834, -0.1765,
        -0.0846, -0.1181,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1959e-01, -8.3865e-01, -2.2936e-01, -1.1653e-01, -1.5025e-01,
        -6.6909e-04,  4.5260e-03, -1.1767e-01, -2.3750e-01, -3.1964e-02,
        -7.0980e-02,  1.1169e-02, -2.5371e-02, -1.0334e-01, -2.4063e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0648, -0.0259,  0.0094,  0.0174, -0.0643, -0.0361,  0.0060, -0.0082,
         0.0063, -0.1494,  0.0334, -0.0034, -0.0061, -0.0070, -0.1090, -0.0134,
        -0.0144, -0.0205, -0.0236, -0.0510, -0.0330, -0.0633, -0.0256,  0.0460,
         0.0838, -0.0249,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2261,  0.0860, -0.1256, -0.0076, -0.0497, -0.0230,  0.0314,  0.0464,
         0.0618, -0.0028, -0.0635, -0.0395, -0.0195, -0.0395, -0.0283, -0.0477,
        -0.0422, -0.0427,  0.0313, -0.0556,  0.0269,  0.0038, -0.0217, -0.0531,
        -0.1267, -0.1085,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.1749, -0.1184, -0.1150, -0.0605, -0.0456, -0.0105, -0.0572, -0.0632,
         0.0329, -0.0696,  0.1235, -0.0501, -0.0767,  0.0372, -0.0354, -0.0090,
        -0.0152, -0.0079, -0.0126, -0.0650, -0.0432, -0.0169,  0.0095, -0.0461,
        -0.0220, -0.1241, -0.0617,  0.0642, -0.0811,  0.0548, -0.0041, -0.0228,
        -0.0032, -0.0179, -0.0878,  0.0731,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0727, -0.0816,  0.0589, -0.0312,  0.0162,  0.0050, -0.0523, -0.0477,
        -0.0028,  0.0265, -0.0195,  0.0409, -0.0440,  0.0183, -0.0587, -0.0657,
        -0.0131,  0.0172, -0.0838, -0.0268, -0.0331,  0.1583, -0.0681,  0.0697,
        -0.0080, -0.0516, -0.0158, -0.0255, -0.0160,  0.0337, -0.0006, -0.0456,
        -0.0449, -0.0447, -0.0257, -0.1784,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0984,  0.3274,  0.0701,  0.0168, -0.0783,  0.1598, -0.0031,  0.0479,
        -0.0443, -0.0750,  0.0203,  0.1103,  0.0731, -0.0017,  0.1898, -0.0138,
         0.1591,  0.1882, -0.1472, -0.0977,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5420,  0.0028, -0.0552,  0.1478,  0.3472, -0.0695,  0.1107,  0.0035,
        -0.1102,  0.0287,  0.0287,  0.0081,  0.0072,  0.0690, -0.0168,  0.0220,
         0.0415, -0.0406, -0.0509, -0.0206, -0.1069,  0.0149, -0.0013, -0.0551,
         0.1547,  0.0177,  0.0418, -0.0189, -0.0024, -0.1540,  0.0415, -0.0486,
        -0.1493, -0.0402,  0.0848,  0.1988,  0.0933, -0.0289,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0287, -0.0002,  0.0202,  0.0567, -0.0100, -0.0465, -0.0045,  0.1037,
        -0.0161, -0.0493, -0.0190, -0.0885, -0.0038, -0.0110, -0.0891, -0.0531,
         0.0063, -0.0139,  0.0132,  0.0325,  0.0064,  0.0529,  0.0248, -0.0591,
         0.0473,  0.0042, -0.0110, -0.0090, -0.1169, -0.0618, -0.0232,  0.0551,
         0.0273,  0.0068, -0.0183, -0.0024, -0.0452, -0.0536, -0.0170, -0.0316,
         0.0316], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0045, -0.1619,  0.1376,  0.0423, -0.0288,  0.1282, -0.1623, -0.0268,
         0.0437,  0.0388,  0.0232,  0.0162, -0.0220,  0.0032, -0.1496, -0.0817,
        -0.0380, -0.0499, -0.0403, -0.0725, -0.0141, -0.0361,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0083, -0.0152, -0.0136, -0.0312,  0.0191, -0.0535, -0.0177, -0.0411,
        -0.0439,  0.0060, -0.0332, -0.0567,  0.0196, -0.0249,  0.0578, -0.0286,
        -0.0474, -0.0257,  0.0124, -0.0846, -0.0449, -0.0718,  0.0376, -0.0031,
        -0.0231,  0.0098, -0.0072, -0.0387, -0.0970, -0.1111,  0.0053, -0.0433,
        -0.0427,  0.0225, -0.1320,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0141, -0.1243, -0.0663, -0.0612, -0.1677,  0.0867, -0.0409,  0.0936,
         0.0300,  0.0174, -0.0692,  0.0599, -0.0027,  0.0350,  0.1153, -0.1299,
         0.0563, -0.0015, -0.0436,  0.0904, -0.0496,  0.0546, -0.0363, -0.0088,
        -0.0678, -0.1562,  0.0077,  0.0080,  0.0717,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4368,  0.0333, -0.0505,  0.0389,  0.0077, -0.0970,  0.0387, -0.0460,
        -0.0072, -0.0674,  0.0315, -0.1016,  0.0419, -0.0311, -0.0536, -0.0096,
        -0.1470, -0.0293, -0.0846, -0.0195, -0.0087, -0.2463,  0.1039, -0.0559,
        -0.0009,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0588, -0.1249,  0.0628, -0.0688, -0.1853, -0.0127, -0.0499, -0.0626,
        -0.0175,  0.1591, -0.0314, -0.1396, -0.0622, -0.1441, -0.0428, -0.0984,
         0.0061, -0.0243, -0.0181, -0.0279, -0.0878, -0.2130,  0.1156,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2556e-02, -1.3829e-01, -1.0904e-01, -1.2074e-01, -1.0884e-01,
         1.9870e-01,  2.1074e-01, -6.8962e-02, -9.8555e-02,  1.5222e-03,
         2.9340e-02,  7.0581e-04, -1.2352e-02, -8.3204e-02,  1.2696e-01,
         6.5971e-02,  6.6258e-02,  1.8600e-04, -5.9371e-02, -3.8314e-02,
        -4.2942e-02, -1.0874e-01, -1.0007e-01, -3.4344e-02, -1.5766e-01,
        -1.3522e-01, -9.8319e-02,  4.5283e-02, -3.3861e-02, -5.4070e-02,
        -3.9668e-02, -2.0922e-02,  5.8144e-03, -6.8347e-02, -6.0165e-02,
        -1.5193e-01, -5.2620e-02, -4.2712e-02, -2.3768e-02,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0048, -0.1827,  0.0037, -0.0556, -0.1311, -0.0226, -0.0397, -0.0823,
        -0.1872, -0.1106, -0.0173,  0.0674,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.1099, -0.0955, -0.2730, -0.0199, -0.0082, -0.0426, -0.0069, -0.0452,
        -0.0155, -0.0282, -0.0643, -0.0435, -0.0077,  0.0351, -0.0051, -0.0260,
        -0.0121, -0.0292,  0.0148, -0.0481,  0.0053, -0.0323, -0.0069, -0.0358,
         0.0579,  0.0450, -0.0204,  0.0044, -0.1050,  0.0269,  0.0052,  0.0089,
        -0.0100, -0.0097, -0.0153, -0.0069, -0.0220,  0.0326, -0.0320, -0.0137,
        -0.0117,  0.0237, -0.0243,  0.0119,  0.0489,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4547, -0.1898,  0.0616, -0.0606,  0.1872,  0.0256, -0.0838, -0.0766,
        -0.1175, -0.0365,  0.0082, -0.1275,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1017, -0.6217, -0.1436,  0.0065,  0.2641, -0.0203,  0.0607, -0.2856,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2661e-01, -5.4847e-02, -3.6246e-02,  1.6956e-02,  3.4857e-02,
        -7.7761e-03, -7.5578e-02,  6.1590e-03,  1.2673e-02, -1.8882e-01,
        -1.9783e-01,  8.9191e-02,  1.9466e-02, -1.9726e-03, -2.2321e-02,
         3.2771e-02, -1.5736e-01, -2.5046e-02,  3.8385e-02,  6.6310e-03,
        -9.0164e-03, -1.4281e-02,  9.3528e-03,  4.0411e-02, -4.3078e-02,
        -3.4155e-02, -1.6450e-02, -9.3754e-03, -5.5155e-03,  6.0754e-02,
        -7.4971e-02, -4.3850e-02, -2.9831e-02, -3.7693e-02, -1.3994e-02,
         9.9480e-03,  2.1530e-01, -5.4011e-02, -1.2520e-04, -5.5499e-02,
        -2.5138e-02,  1.8013e-02,  9.2757e-02, -2.0889e-02,  2.3674e-01,
         3.0502e-01, -7.3572e-02,  1.5330e-01, -6.1165e-02, -9.4120e-02,
        -1.1096e-01, -4.3946e-02, -4.8438e-02,  1.3239e-01,  2.2198e-02,
         2.0778e-02,  1.7439e-02, -1.5661e-02, -2.4987e-02,  1.0370e-02,
         6.4653e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0150, -0.1275, -0.0114, -0.0142, -0.1568, -0.1015, -0.0044,  0.0280,
         0.1426,  0.0901,  0.0713,  0.0300,  0.0261,  0.0379, -0.1702, -0.0347,
         0.0195,  0.0032,  0.0025,  0.0613, -0.0202,  0.0745, -0.0329,  0.0638,
         0.0306, -0.0320, -0.0087, -0.0201, -0.0292, -0.0808,  0.0312,  0.1850,
         0.1847,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2001,  0.0506, -0.1562,  0.0269, -0.0526,  0.0974, -0.0065,  0.0174,
         0.0321, -0.1156, -0.0130, -0.0252, -0.0189,  0.0284, -0.1042,  0.0283,
        -0.1372, -0.0949, -0.2311,  0.0091,  0.0895, -0.2075,  0.0863, -0.0278,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1893, -0.1687, -0.1019, -0.0482, -0.0065, -0.1949, -0.0393, -0.0407,
         0.0958,  0.0629, -0.0422,  0.0345, -0.1626, -0.0099, -0.0513, -0.0301,
         0.0483,  0.0585, -0.0029,  0.0185, -0.0179,  0.1072,  0.1564,  0.3030,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1841,  0.0074, -0.0689, -0.0677, -0.0161,  0.0264, -0.0530, -0.0203,
         0.0554, -0.0032, -0.0547,  0.0508, -0.1818, -0.0105,  0.0087, -0.0334,
         0.0033,  0.0308, -0.0032,  0.0555, -0.0552, -0.0229, -0.0020, -0.1734,
        -0.0931,  0.1787,  0.0164, -0.0118,  0.1944,  0.0225, -0.0531, -0.0198,
         0.0924,  0.0882,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0781, -0.0259, -0.0456,  0.0049,  0.0094,  0.1374,  0.0280,  0.0128,
         0.0210, -0.1438, -0.1510, -0.1099, -0.0714, -0.1739,  0.1145,  0.0105,
         0.0997, -0.0019, -0.0287, -0.0044, -0.0597, -0.0377, -0.0278, -0.0300,
         0.0035,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1553, -0.1326,  0.0200, -0.0827, -0.0023,  0.0910, -0.0508,  0.0227,
        -0.0423, -0.1402, -0.0625, -0.0524, -0.0024, -0.0345, -0.1358,  0.0453,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8947e-01, -1.4817e-01, -1.2691e-02, -4.6463e-02, -3.1068e-02,
        -6.1789e-02, -9.8154e-03, -4.1882e-03,  6.4771e-02, -2.4217e-02,
         2.7976e-02, -6.4662e-02, -8.9100e-03, -3.2363e-02, -5.4201e-02,
         1.4989e-02, -1.2316e-01, -2.7780e-02,  1.2521e-02,  1.7354e-04,
        -3.6563e-02, -4.4063e-02, -8.0261e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3741, -0.7164, -0.0879, -0.1491, -0.1605,  0.0265,  0.0457, -0.0608,
        -0.0392, -0.0261,  0.3028, -0.0225,  0.2211,  0.1957,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.3819, -0.1129,  0.0426, -0.0858,  0.0148,  0.0102, -0.1236, -0.1328,
        -0.0030,  0.0528, -0.0891,  0.0057,  0.1598, -0.0294,  0.0024, -0.0161,
        -0.0101, -0.0603, -0.0051, -0.0075,  0.0053, -0.0534, -0.0786,  0.0178,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3007,  1.0165,  0.0715, -0.1074,  0.1895,  0.2626, -0.3683, -0.0872,
         0.0665,  0.0356,  0.5843,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6519e-02, -2.1331e-01, -3.6580e-03,  5.8555e-03, -2.0809e-01,
        -1.0145e-01, -2.2762e-01,  1.0551e-01,  5.0599e-03,  2.1895e-04,
        -2.9387e-02, -3.2982e-02, -4.9901e-02, -1.3544e-01, -6.4771e-02,
        -5.0765e-02,  5.0757e-04,  2.7351e-02,  8.5673e-03,  8.1163e-02,
        -1.4189e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0406,  0.1371, -0.2902, -0.1809,  0.1608,  0.1002, -0.2324, -0.0558,
        -0.0336, -0.0431,  0.2222, -0.0097,  0.2018, -0.2212,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0771,  0.1661,  0.0044,  0.2168,  0.0311,  0.2630,  0.0460,  0.0897,
        -0.0368,  0.0138, -0.0370, -0.4500, -0.1854, -0.0109,  0.0588,  0.0802,
         0.0763, -0.0173, -0.1836, -0.0155, -0.3400,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0081, -0.0117, -0.0030,  0.0038,  0.0551, -0.0235,  0.0050, -0.1485,
        -0.0121, -0.1032, -0.1307, -0.0415, -0.0567,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0347, -0.0428, -0.0227,  0.0142, -0.0232, -0.0192,  0.0261, -0.0465,
        -0.0719,  0.0408, -0.0404, -0.0061,  0.0376, -0.0489, -0.0391, -0.0277,
        -0.0197, -0.0287,  0.0178, -0.0156, -0.0247,  0.0565, -0.0939, -0.0643,
        -0.2324,  0.0489,  0.0186, -0.0197, -0.0357,  0.0097, -0.0016,  0.0418,
         0.0331,  0.0504, -0.0874,  0.1642, -0.0291,  0.0160, -0.0076, -0.0016,
         0.0008,  0.0025,  0.0078,  0.0890, -0.0891], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0377, -0.0625, -0.0654, -0.0662, -0.0530, -0.0378, -0.0267, -0.0615,
        -0.0224, -0.0650, -0.0342,  0.0213, -0.0587,  0.0045,  0.0005,  0.0052,
        -0.2124,  0.0208, -0.0568,  0.0127,  0.0409, -0.0819,  0.0327,  0.0418,
         0.0221,  0.0727, -0.1058, -0.0447, -0.0962, -0.0849,  0.0167,  0.0982,
        -0.0192,  0.0176,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0317, -0.7189, -0.0321, -0.2504, -0.1692, -0.0293, -0.1236, -0.2621,
         0.0155, -0.1541, -0.0171, -0.0886, -0.0914, -0.0881, -0.1283, -0.0167,
         0.0296, -0.0837, -0.0822, -0.0695, -0.3561,  0.0270, -0.0723,  0.1477,
        -0.0622,  0.1533,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0056, -0.2964,  0.5316, -0.0998,  0.4716,  0.0343,  0.2353, -0.0856,
         0.1622, -0.0433, -0.0915,  0.0108, -0.0287, -0.1327, -0.2864,  0.0630,
        -0.0887,  0.2142, -0.0093, -0.0737, -0.0657, -0.1466,  0.0264,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0353, -0.0336,  0.1355, -0.0136, -0.0209,  0.1046,  0.0031, -0.0239,
        -0.0098,  0.0304,  0.0351, -0.0331,  0.0518, -0.0729, -0.2063,  0.1307,
         0.0959, -0.0078, -0.0034,  0.0115, -0.0005, -0.0013,  0.0045,  0.0215,
         0.0156,  0.0330, -0.0067, -0.0920, -0.0380,  0.0203,  0.1101,  0.0006,
         0.0610, -0.0577,  0.0240,  0.0384, -0.0018, -0.0131, -0.0317, -0.0532,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1110, -0.0689, -0.1124,  0.0075, -0.0046,  0.0145,  0.0717, -0.0548,
        -0.1706, -0.0653,  0.0234, -0.0588, -0.0033, -0.0836, -0.0245, -0.0651,
         0.0020,  0.0507,  0.0721,  0.0024, -0.0636,  0.0282,  0.0443,  0.1520,
        -0.0797, -0.0364,  0.0145, -0.1009,  0.0055,  0.2689, -0.1297, -0.0970,
        -0.0377,  0.0005,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.2800,  0.0598, -0.2770,  0.0014, -0.0824, -0.0105,  0.0731, -0.0231,
         0.0956,  0.0552,  0.0215, -0.0244,  0.4166,  0.0150, -0.1140,  0.1269,
         0.0616,  0.0246, -0.0193, -0.0374, -0.0309, -0.0083,  0.0971,  0.1062,
        -0.0585, -0.0493, -0.1670, -0.0830, -0.1125,  0.0402,  0.0273, -0.0106,
         0.0763,  0.1178,  0.0028, -0.0285,  0.0210, -0.0059,  0.0057, -0.0039,
         0.0061, -0.0345, -0.0161, -0.0084, -0.0046,  0.0137,  0.0052,  0.0293,
         0.0053,  0.0055,  0.0748, -0.0686], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1026, -0.1244,  0.0027,  0.0173, -0.1427, -0.0869, -0.1565,  0.0038,
         0.0028,  0.0574, -0.0379,  0.0685, -0.1418, -0.0613, -0.0600,  0.0004,
        -0.1104, -0.0152, -0.0141,  0.1098, -0.0141,  0.0163,  0.0036, -0.1196,
         0.0336,  0.0199,  0.0299, -0.0385, -0.0779,  0.0178,  0.0785,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1706, -0.0613,  0.0002,  0.0289, -0.0049, -0.0299, -0.0182,  0.0006,
         0.0138,  0.0375, -0.0190, -0.0159, -0.0757, -0.0020, -0.0321, -0.0181,
        -0.0803, -0.0150, -0.0195, -0.0495, -0.0168, -0.0210, -0.0198, -0.0129,
         0.0499,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3975, -0.0285,  0.0828, -0.0712, -0.1794, -0.0508, -0.2005, -0.1035,
         0.0906, -0.0885,  0.2122, -0.3097,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0311e-01, -1.9946e-01, -8.8050e-02, -2.7464e-02, -4.3159e-02,
         2.5166e-02, -2.0644e-02,  1.4291e-02, -1.7383e-02, -1.7975e-04,
        -2.4252e-02,  1.4091e-02, -4.4060e-03, -1.1479e-01, -1.9672e-02,
        -8.2837e-02, -4.0783e-02, -1.5705e-02,  2.3571e-02, -3.2752e-02,
        -6.3677e-03, -4.8925e-02,  4.4362e-02,  2.4626e-01,  9.7357e-02,
         9.9631e-02, -6.2506e-02,  2.0279e-02,  1.8310e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1696, -0.0151, -0.0503,  0.1080, -0.0378, -0.0414, -0.1087,  0.1202,
         0.3229, -0.7123, -0.1878,  0.0294,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2925e-01, -2.7545e-01,  1.2994e-04, -4.5709e-02, -1.5794e-01,
        -6.1722e-02, -1.4341e-01, -7.1840e-02, -2.8881e-02, -1.2139e-02,
        -5.8889e-02,  2.7312e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5774,  0.0794, -0.1204,  0.0639, -0.1347, -0.0419,  0.0070,  0.0864,
        -0.1409,  0.1148, -0.0256, -0.1146,  0.0836,  0.1291, -0.0527, -0.0568,
         0.0225,  0.0499, -0.4296, -0.1257,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4034e-02, -1.3427e-02, -7.1454e-02, -1.2945e-01, -1.2394e-01,
        -8.9750e-02, -4.6162e-02,  9.4819e-04, -3.0398e-02, -9.6570e-03,
         4.0993e-02, -1.9624e-02, -2.9883e-02,  9.5136e-02,  8.3147e-02,
        -4.8985e-03, -1.7318e-02,  5.2670e-02,  3.9961e-02, -3.3513e-02,
        -3.1389e-02,  3.3805e-02,  1.7776e-02, -3.3446e-02, -1.3394e-02,
         5.9275e-02,  3.1673e-02, -2.2875e-02, -5.1488e-02, -5.6576e-05,
         4.9061e-02,  2.9862e-03, -6.0308e-02, -1.7708e-01, -2.4762e-02,
         8.6569e-02, -1.7650e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4451, -1.4342, -0.8404, -0.1991, -0.3496,  0.1135,  0.7645,  0.0747,
        -0.0953,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4137,  0.0332,  0.0815, -0.2080, -0.0034, -0.1275, -0.1051, -0.1113,
        -0.0072,  0.0275,  0.0488,  0.2291, -0.0042, -0.0397,  0.0369,  0.0140,
        -0.0440,  0.1102, -0.0621, -0.0033, -0.0365,  0.0911,  0.0142, -0.0513,
        -0.0168, -0.1223, -0.0872,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2436, -0.0785, -0.0399, -0.0322, -0.0303, -0.0048, -0.0208, -0.0279,
        -0.0141, -0.0712, -0.0642,  0.0044, -0.0889,  0.0833,  0.0191, -0.0165,
        -0.0355, -0.0300, -0.0412,  0.0532,  0.0660, -0.0882, -0.0226,  0.0763,
         0.0157, -0.0024, -0.0322, -0.0159, -0.2944, -0.0307,  0.0021, -0.0550,
         0.0704, -0.0339,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0779, -0.0619, -0.0090, -0.1068, -0.0176,  0.0372,  0.0009, -0.0395,
         0.0363,  0.0415, -0.0293, -0.1568, -0.0776, -0.0600,  0.0568, -0.0324,
        -0.1750, -0.0627,  0.0035,  0.0715,  0.0566,  0.0320, -0.0668,  0.0102,
        -0.0353,  0.0231, -0.0360, -0.0421, -0.1101, -0.0592,  0.0504,  0.0190,
        -0.0914,  0.1539,  0.1212, -0.0147,  0.0038, -0.0354,  0.0135,  0.0320,
        -0.0467, -0.0983,  0.0981,  0.1137, -0.0367,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3378, -0.0288,  0.0085,  0.0577, -0.2055, -0.1193, -0.2946, -0.1355,
        -0.0464,  0.0360,  0.0593, -0.0046, -0.0612, -0.0012,  0.0567, -0.2504,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2515, -0.0334, -0.0673, -0.0963,  0.0539,  0.0816,  0.0051,  0.1108,
        -0.0469, -0.0659, -0.0416, -0.0912, -0.1007, -0.0284, -0.0316,  0.0541,
        -0.0559,  0.0719,  0.0704,  0.0476, -0.0681, -0.0173, -0.0389, -0.0004,
         0.0687, -0.0664,  0.0816,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0529,  0.5717, -0.0430, -0.0594, -0.0215,  0.0143, -0.0059,  0.0106,
        -0.0022, -0.0385, -0.0130, -0.0261, -0.0270, -0.0480, -0.0097, -0.0276,
         0.0041,  0.0579, -0.0814,  0.0167,  0.0089, -0.0267, -0.0273, -0.0641,
         0.0797, -0.0273, -0.0209, -0.0380, -0.0694,  0.0370,  0.0170,  0.0308,
         0.0254,  0.0019,  0.0984,  0.1327, -0.0180, -0.0813, -0.0548, -0.0604,
        -0.0840, -0.0426, -0.0525,  0.0255, -0.3028,  0.0373], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0990, -0.0717,  0.0173, -0.0657, -0.0277,  0.0426, -0.0682, -0.0551,
         0.0200, -0.1197, -0.0243, -0.0294,  0.0062, -0.0663, -0.0376, -0.0664,
        -0.0742, -0.1481, -0.1685, -0.3661, -0.0369,  0.0239,  0.0400, -0.0202,
        -0.0423,  0.0422, -0.0531, -0.3051,  0.0152,  0.1654,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5632,  0.5475, -0.0889,  0.0076, -0.0823, -0.4001, -0.1876, -0.2553,
         0.1932, -0.3568, -0.1194,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2152, -0.2129, -0.1038, -0.0266, -0.0424, -0.0087, -0.0055, -0.0802,
        -0.0415, -0.3036, -0.0218, -0.0590, -0.0374, -0.0228, -0.0081,  0.0213,
        -0.0008, -0.0156,  0.0746, -0.2509,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0844, -0.2492,  0.0299, -0.1898, -0.0478, -0.1094, -0.1721, -0.0307,
        -0.2451, -0.0034, -0.1047,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1650, -0.0479, -0.0732, -0.1574, -0.1636,  0.0994, -0.0099, -0.0785,
        -0.0684,  0.0933, -0.0683, -0.0973, -0.0235, -0.0337, -0.0049, -0.0149,
        -0.0479, -0.0570,  0.0136, -0.0497, -0.0265,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0860, -0.0912, -0.0519, -0.1698, -0.0424, -0.5552, -0.0754,  0.1091,
         0.0283,  0.4493, -0.0589, -0.0166,  0.0732,  0.0321, -0.0089,  0.0680,
         0.0136,  0.0262,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9858e-02, -1.4788e-01, -2.0658e-01, -8.1644e-02,  3.3434e-02,
        -1.3707e-04, -3.1094e-02,  4.5395e-03, -1.4039e-02, -2.4243e-02,
         4.9440e-02,  1.0262e-01, -3.3404e-03, -2.0530e-02, -6.0047e-02,
        -6.0376e-02, -1.0826e-02, -7.4322e-02, -6.2900e-03, -1.4469e-02,
        -3.8216e-02, -5.0168e-02, -1.0764e-01,  3.0374e-02, -4.5235e-02,
        -4.8077e-02, -3.9838e-02, -3.0110e-02, -2.7426e-02, -3.6753e-02,
         7.9397e-03, -1.4515e-02, -5.5776e-02,  9.7986e-03, -5.6059e-02,
         2.2510e-02, -5.2384e-02, -1.3246e-02, -1.5871e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1296, -0.2444,  0.0197,  0.0908, -0.0196, -0.0620,  0.0422, -0.0683,
        -0.0110,  0.0172, -0.0683, -0.0377, -0.0454,  0.0290, -0.0303,  0.0151,
         0.0156,  0.0365,  0.0275, -0.0287, -0.0143, -0.0621,  0.0237, -0.0302,
        -0.0010,  0.0008, -0.0453,  0.0157, -0.0459, -0.1205,  0.0166,  0.0216,
        -0.0090, -0.1055,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.8082, -0.5687, -0.1845, -0.1048, -0.3489, -0.4673, -0.2628, -0.1294,
        -0.2562,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2218e-01, -4.2277e-02,  1.7298e-02,  5.9768e-03, -2.7695e-02,
         9.4243e-02, -9.5397e-03, -5.7677e-02, -1.6323e-02, -2.6166e-02,
        -5.0815e-02, -1.6398e-02,  1.5796e-02, -3.8354e-03,  1.6712e-01,
        -2.1093e-02, -6.7921e-02, -4.9000e-01, -1.0378e-02, -5.8099e-02,
         4.1825e-02,  3.4442e-03, -6.1192e-02,  3.0662e-02, -6.2176e-02,
         2.0808e-02,  1.3583e-03, -1.8551e-02,  1.0530e-02,  4.0606e-03,
         1.6752e-02,  2.7487e-04, -2.5388e-02, -4.9257e-02,  4.0545e-02,
        -5.6502e-02, -8.8777e-04, -1.5949e-01,  7.4192e-02, -1.4306e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0627, -0.0660, -0.1180,  0.0005, -0.0595, -0.0003, -0.0009, -0.0571,
         0.0088, -0.0248, -0.0030,  0.0143, -0.0053, -0.0018,  0.0026,  0.0811,
        -0.0399,  0.0654, -0.0127, -0.0057, -0.0213,  0.0143,  0.0357, -0.0154,
         0.0364, -0.0078, -0.0973, -0.0204,  0.0733, -0.1292, -0.0802, -0.0887,
        -0.0056,  0.0146, -0.0298,  0.0074, -0.0093, -0.0361,  0.0057,  0.0407,
         0.0559,  0.0126,  0.0025, -0.0025, -0.0091,  0.0070, -0.0149,  0.0241,
        -0.0275,  0.0462,  0.0510,  0.1086,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1194, -0.1595, -0.1811, -0.1240,  0.0932, -0.0695, -0.1047, -0.1239,
        -0.0272,  0.0813, -0.1277, -0.0509, -0.0557, -0.1482, -0.0070, -0.1162,
        -0.0381, -0.1560, -0.1514, -0.1420, -0.1177,  0.1115,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2216, -0.5015, -0.0342, -0.0386,  0.0382, -0.0764, -0.0699, -0.0429,
         0.1607, -0.1050, -0.1639, -0.1107, -0.0129, -0.0924, -0.0797, -0.0967,
        -0.1414,  0.1057,  0.0438,  0.0677,  0.3384,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3362,  0.0050,  0.0136, -0.1355,  0.0631, -0.0057, -0.0539, -0.0269,
        -0.0219,  0.0291,  0.0005,  0.0321,  0.0080,  0.0389,  0.0153,  0.0130,
         0.0061,  0.0357, -0.0726, -0.0277, -0.0054, -0.0482,  0.0193, -0.0040,
        -0.0366,  0.0389,  0.0161, -0.0535,  0.0007,  0.0210, -0.0419, -0.0061,
         0.0096, -0.0313, -0.0328, -0.0754,  0.0363, -0.0108, -0.0387, -0.0951,
        -0.3452, -0.0303, -0.0642, -0.0410, -0.0173, -0.0010, -0.0777, -0.0750,
         0.0277, -0.0161, -0.0502, -0.0502,  0.0407,  0.1033,  0.0266],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0090e-01,  4.4991e-03,  1.0207e-01, -8.7697e-02, -1.1434e-02,
        -2.1478e-02,  1.0018e-01,  9.2536e-02,  1.4625e-02, -3.7111e-05,
         2.7117e-02,  2.8635e-02, -2.4404e-02, -2.2883e-02,  1.4535e-02,
         2.6705e-02,  9.1276e-02, -5.4090e-02,  1.2295e-02, -5.4994e-02,
        -7.3540e-02, -6.3350e-02, -6.1383e-02, -1.2762e-02,  4.7540e-02,
        -1.1004e-02, -5.2474e-02, -1.6146e-01,  4.7362e-03, -3.2216e-02,
        -7.3141e-02, -2.6010e-02, -2.9354e-02, -1.8221e-01, -8.6261e-02,
        -2.4810e-02,  2.5386e-02, -3.8941e-02,  2.1889e-02,  4.6584e-03,
        -3.0003e-02,  5.6443e-03,  3.7038e-02, -5.1512e-02, -3.6816e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0534, -0.1568, -0.1332, -0.0807, -0.0337,  0.0064, -0.0272,  0.0052,
        -0.0457,  0.0158,  0.0633,  0.0069,  0.0359, -0.0096, -0.0424, -0.0206,
        -0.0847, -0.1474,  0.0350, -0.0485, -0.0975,  0.0988,  0.1857, -0.0239,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0390,  0.0793,  0.0215,  0.1771,  0.1828,  0.0773,  0.0751, -0.0084,
         0.0424,  0.0051, -0.0713,  0.0620,  0.0503,  0.0225,  0.0502,  0.1278,
         0.0519,  0.0371,  0.0396,  0.0945, -0.0469, -0.0299,  0.0655,  0.1425,
         0.2140,  0.0014,  0.0931, -0.0358,  0.0513,  0.1055,  0.0470,  0.0234,
        -0.0357,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0881, -0.2539, -0.0944, -0.2397, -0.0216, -0.1679, -0.0591, -0.0041,
        -0.3048, -0.0522,  0.0178, -0.0248, -0.1095, -0.0954, -0.0652, -0.1903,
        -0.0838,  0.0267,  0.1806,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4043, -0.0019, -0.0400, -0.0436, -0.0698,  0.0644, -0.0050,  0.0010,
        -0.1008,  0.1009,  0.0776, -0.1048,  0.0524, -0.0710,  0.0396,  0.0218,
         0.0045, -0.0274,  0.0161,  0.0182, -0.0707, -0.1579, -0.0108, -0.0900,
         0.2088,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2682, -0.0557, -0.2337,  0.0903,  0.1103, -0.0617, -0.7152,  0.1333,
         0.0626,  0.0171, -0.0608, -0.0284, -0.0157, -0.0589, -0.2003,  0.2224,
         0.0451,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.1646,  0.1265, -0.0797, -0.0150, -0.0402, -0.0279, -0.0514,  0.0240,
        -0.3635,  0.0049,  0.0019,  0.0349, -0.0056, -0.0787, -0.0258, -0.1042,
        -0.0131, -0.1003,  0.0584,  0.0220,  0.0420, -0.0125, -0.1095,  0.0149,
         0.0271, -0.0460, -0.0182,  0.1050,  0.0178,  0.0281,  0.0059, -0.3270,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5406,  0.1880, -0.0431, -0.0450,  0.0137,  0.0557, -0.0156,  0.0285,
         0.0142, -0.0177, -0.0701,  0.3122, -0.2138, -0.2876, -0.3562,  0.2572,
         0.0668, -0.0779, -0.0235, -0.0486, -0.0473, -0.0959,  0.0332, -0.0487,
         0.1668,  0.0035,  0.1601, -0.1906,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7043e-01,  4.8515e-01, -6.3491e-02,  3.4634e-02, -4.3789e-03,
         1.5753e-02,  1.1067e-01,  4.9760e-02,  7.0000e-02,  8.6321e-02,
         8.9118e-03,  8.3081e-02,  4.3956e-02, -6.5558e-02,  1.2735e-01,
         3.7698e-04,  3.6754e-02,  3.0246e-02,  6.8990e-02,  6.7821e-02,
         5.8551e-02,  8.8226e-02,  1.5884e-02,  1.6433e-01,  4.4311e-02,
        -4.1141e-02, -6.5337e-02,  2.8514e-01, -1.2400e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2141, -0.0467,  0.0298, -0.0160, -0.0306, -0.0902,  0.0480, -0.0797,
         0.0048, -0.0839, -0.0422, -0.0290,  0.0287, -0.1226,  0.0143, -0.0161,
        -0.0557, -0.1162,  0.0067,  0.0641, -0.0277, -0.0465,  0.0081,  0.0219,
        -0.0877, -0.0339, -0.0036, -0.0335,  0.0952,  0.0137, -0.0041, -0.0862,
         0.1307,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1445,  0.0568,  0.0005, -0.0268, -0.0786, -0.0315,  0.0211, -0.0314,
         0.0616, -0.0846, -0.0930, -0.0966, -0.0321, -0.0529, -0.0706, -0.0300,
        -0.0287, -0.0687, -0.0904, -0.0917, -0.0488,  0.0140,  0.0718,  0.0451,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2297, -0.1201, -0.0575, -0.0045, -0.0035, -0.0948, -0.0937, -0.0147,
         0.0123, -0.0044, -0.0222, -0.0209, -0.0124, -0.0499, -0.0087,  0.0213,
        -0.0219, -0.0172, -0.0162, -0.2097, -0.0168,  0.0011, -0.1052,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1090, -0.0807,  0.0536, -0.0222, -0.0118, -0.0494, -0.0104,  0.0362,
         0.0164,  0.0216, -0.0967, -0.0301, -0.0704, -0.0920, -0.1149,  0.0817,
        -0.0217, -0.0668, -0.0656,  0.0387,  0.0161,  0.0367,  0.0071, -0.0200,
        -0.0599, -0.0378,  0.0211, -0.0500, -0.1930, -0.0575, -0.0764, -0.0665,
        -0.0652, -0.0626,  0.0066, -0.0427, -0.1155, -0.1908, -0.1104,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1466, -0.2644,  0.0219, -0.0610, -0.1298,  0.0290, -0.0506,  0.0163,
        -0.1528,  0.2915,  0.0345, -0.1922,  0.1622,  0.0871,  0.0121, -0.0783,
        -0.0772, -0.0900,  0.1600, -0.0992, -0.1429,  0.0512, -0.0044,  0.1795,
         0.4659,  0.0987,  0.0109,  0.0152, -0.0041, -0.2184,  0.0092,  0.0568,
        -0.3744, -0.0357, -0.2843,  0.1172,  0.0501, -0.0538, -0.0748, -0.0049,
        -0.0315, -0.3722,  0.3024,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0303, -0.0235,  0.0200,  0.0158,  0.0358, -0.0506, -0.0559, -0.0407,
        -0.0561,  0.0238, -0.0239, -0.0059, -0.0166, -0.0220, -0.0262,  0.0118,
        -0.0113, -0.0232,  0.0065, -0.0164,  0.0173,  0.0130, -0.0318, -0.0123,
         0.0095,  0.0041, -0.0062, -0.0039,  0.0408, -0.0280, -0.0112,  0.0019,
        -0.0423, -0.0032, -0.0581,  0.0353,  0.0360,  0.0353, -0.0400, -0.0015,
        -0.0236, -0.0341,  0.0194,  0.0211,  0.0053,  0.0170, -0.0127, -0.0212,
        -0.0034, -0.0519, -0.0484,  0.0204, -0.0999, -0.0026,  0.0013,  0.0429,
         0.0155], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2824, -0.5829, -0.4591, -0.0618, -0.1141, -0.0848,  0.0433, -0.1133,
         0.3804,  0.0043, -0.1119, -0.1084, -0.0307, -0.1550,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3151, -0.0751, -0.1537, -0.0490, -0.0197,  0.0238, -0.0685, -0.0020,
        -0.0166, -0.0266, -0.0409, -0.2225, -0.0964,  0.0033, -0.0296, -0.1144,
        -0.0197, -0.0809, -0.0491, -0.0311, -0.1684, -0.0286, -0.0410, -0.0343,
         0.0024, -0.0061,  0.0250, -0.0828,  0.0747, -0.0905,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0400,  0.0242, -0.0128, -0.1413, -0.0189,  0.3557,  0.2750, -0.2107,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.1042, -0.0458, -0.0014, -0.0280, -0.1042, -0.2792, -0.0693, -0.0121,
         0.0162, -0.0507, -0.0516,  0.0976, -0.0808, -0.0619,  0.0304, -0.0233,
         0.1089, -0.0179,  0.0318, -0.0532,  0.0651, -0.0109, -0.0214,  0.0208,
        -0.0165, -0.0254, -0.0223,  0.1017, -0.1026, -0.1196, -0.0149,  0.2325,
        -0.0093,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0844,  0.0186, -0.0447,  0.1408, -0.0035, -0.0059,  0.0187, -0.0189,
         0.1668, -0.0005, -0.0335,  0.0303,  0.0193,  0.0222,  0.0427, -0.0023,
         0.0224,  0.0455, -0.0003, -0.0026,  0.0833,  0.1428, -0.1351, -0.0035,
         0.0068, -0.0335, -0.0028, -0.0434, -0.0631,  0.0369,  0.0015, -0.0007,
        -0.0353, -0.0897, -0.0059, -0.0300, -0.0035, -0.0046,  0.0428,  0.0932,
         0.0528, -0.0939, -0.0619,  0.0811,  0.1475,  0.0241,  0.1685, -0.0734,
         0.1482,  0.1509, -0.1432, -0.0051,  0.0137,  0.0179, -0.0325, -0.0216,
         0.0630, -0.0046, -0.0298,  0.0228, -0.0331, -0.0496, -0.1793, -0.0329,
         0.0102,  0.0009,  0.0246,  0.0425,  0.0452,  0.1398, -0.0836],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0110, -0.0500, -0.0361,  0.0403, -0.0640, -0.1986, -0.5075, -0.0240,
        -0.0131,  0.0016, -0.0272,  0.0191, -0.0691,  0.0100, -0.0031, -0.0495,
        -0.0351,  0.0523, -0.0028, -0.0828,  0.0440, -0.0211,  0.0206, -0.0185,
         0.0099,  0.0376, -0.0456, -0.0330,  0.0573,  0.0109,  0.0216,  0.0419,
        -0.0765, -0.0400,  0.2561, -0.0513, -0.0637, -0.0415, -0.0093, -0.0041,
        -0.0183, -0.2140,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1262, -0.1870, -0.0137, -0.0448,  0.0772,  0.0060,  0.0032,  0.0274,
        -0.0709, -0.0401, -0.1014, -0.0312, -0.0547,  0.0070,  0.0326,  0.0198,
         0.0018, -0.0203, -0.0880, -0.0233, -0.0278, -0.0172,  0.0175, -0.0143,
        -0.0287, -0.0635, -0.0028,  0.0010, -0.0994,  0.0457,  0.0773, -0.0230,
         0.1428,  0.0078,  0.0430,  0.0104,  0.0476,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0434,  0.2980,  0.1186,  0.0176, -0.0500, -0.1503,  0.0770, -0.1046,
        -0.0640,  0.0383,  0.0464,  0.0893,  0.0269, -0.0289,  0.0328,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0114,  0.1215,  0.0974,  0.0657,  0.0231, -0.0264,  0.0461, -0.0010,
         0.0550,  0.0412, -0.0749, -0.0293,  0.1958,  0.0531,  0.0857,  0.0874,
         0.0855,  0.0741,  0.0808,  0.0105,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2157e-02, -1.7289e-01, -9.2411e-02, -2.1384e-02, -1.5610e-01,
        -1.0822e-01, -6.8446e-02,  5.1247e-02,  2.2483e-02, -3.6019e-02,
        -3.5832e-02,  5.6897e-03, -6.8241e-02, -2.5812e-02, -1.1467e-02,
        -4.7165e-03,  4.0563e-02, -3.8010e-02,  2.1666e-02, -3.2760e-02,
         2.9909e-03,  1.4078e-04, -3.4774e-02, -5.4875e-02, -2.2728e-02,
        -3.7436e-02, -7.0712e-02, -5.0816e-03,  7.0584e-03, -2.9849e-02,
        -4.2463e-02, -1.3463e-01, -9.5815e-03, -4.3424e-02,  7.6349e-03,
        -3.4105e-02, -1.7217e-02, -8.0343e-03, -9.1794e-02, -1.1633e-01,
        -2.6717e-03,  3.1815e-01,  3.0146e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1535, -0.0900,  0.0443, -0.1851, -0.0730,  0.0018,  0.0152, -0.0542,
        -0.0057,  0.0361, -0.0775, -0.0560, -0.0354, -0.0160, -0.1020,  0.0224,
         0.0239, -0.0129,  0.0397,  0.1568, -0.0286, -0.0342, -0.0518, -0.0167,
         0.0206,  0.0705,  0.0530,  0.0149, -0.0959,  0.0778, -0.0563, -0.0509,
        -0.0046, -0.0282, -0.0439,  0.0141,  0.0417, -0.0251,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0426, -0.0878, -0.1744, -0.1925, -0.0837, -0.0546, -0.1039,  0.1116,
         0.0566,  0.0558, -0.0733, -0.0410, -0.0262,  0.0162,  0.0441, -0.0288,
        -0.1491,  0.0168, -0.0121, -0.0191, -0.0269, -0.0657,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0240, -0.0779,  0.0559,  0.0187,  0.0344,  0.1299, -0.0327,  0.0226,
        -0.0771, -0.0354,  0.0115,  0.0141,  0.0101, -0.0261, -0.0066, -0.0266,
        -0.1279, -0.0317, -0.0879, -0.0521,  0.0927,  0.0050, -0.0078, -0.0468,
        -0.0311,  0.0335, -0.0565,  0.0212,  0.0161, -0.0586,  0.0594, -0.0133,
        -0.0436,  0.0125, -0.0342, -0.0448, -0.0200, -0.0720, -0.0509,  0.0364,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1632,  0.0324, -0.0314, -0.0779, -0.0351, -0.0418, -0.0950, -0.0659,
        -0.0306, -0.2933, -0.0782, -0.0920,  0.0299, -0.0275, -0.0743, -0.1870,
        -0.0678,  0.2018, -0.1126, -0.0349, -0.0987,  0.0380, -0.0240, -0.1934,
        -0.0447, -0.0349, -0.0401, -0.0361, -0.0453, -0.0801, -0.0305, -0.2510,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0010, -0.2970,  0.1163,  0.0052, -0.1383,  0.1470, -0.0979, -0.1132,
        -0.2098,  0.0054,  0.0101,  0.0255,  0.0367, -0.0207, -0.0783,  0.1513,
        -0.0450, -0.0020, -0.0086,  0.0953, -0.0099,  0.0108,  0.0163, -0.0592,
        -0.0119,  0.0604, -0.0683, -0.0241, -0.0023, -0.0163, -0.0157, -0.0006,
         0.0465,  0.0200,  0.0248,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.1152, -0.1216, -0.0104,  0.0286,  0.0021, -0.0100, -0.0250, -0.2688,
        -0.1011, -0.2054,  0.0426, -0.0441, -0.0468, -0.0028, -0.0246,  0.0224,
        -0.0010, -0.0537, -0.0569, -0.0127, -0.0863,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1903, -0.3668,  0.0897, -0.1849, -0.0626, -0.1599,  0.2694, -0.0950,
        -0.2802, -0.0259, -0.0130,  0.0147,  0.0036,  0.0085, -0.0700,  0.0059,
        -0.0473,  0.0944,  0.0704,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0693, -0.9338, -0.1155, -0.1235, -0.0402, -0.0431,  0.0338, -0.0209,
        -0.0338, -0.2179, -0.1214, -0.0421,  0.0603, -0.1375,  0.0226, -0.0530,
         0.0441,  0.0017, -0.1285,  0.5329,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0184, -0.2256,  0.0040, -0.0403,  0.0117,  0.0755,  0.1110, -0.0171,
        -0.0544, -0.0184, -0.0927,  0.0087, -0.0243, -0.0714, -0.0720, -0.0415,
        -0.1350, -0.0245, -0.0129, -0.0741,  0.0284, -0.1888,  0.2036,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4189e-01, -6.3652e-02, -2.9037e-02,  3.8970e-05, -5.7033e-02,
        -2.7628e-03, -3.6497e-02, -1.2904e-02, -1.5547e-01,  1.5260e-02,
         4.5141e-02, -7.6494e-04,  2.4778e-01, -3.8453e-02,  7.5597e-02,
        -2.2881e-02, -8.7068e-02,  4.7791e-02,  4.6607e-02,  5.7395e-02,
         5.1123e-02,  5.5968e-03,  1.0454e-01, -4.2156e-01,  6.4775e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2308,  0.4323,  0.0812, -0.0999, -0.0041,  0.0135, -0.0650, -0.0253,
        -0.1406,  0.0569,  0.0537,  0.0089,  0.0163,  0.1870, -0.1021,  0.0318,
         0.0551,  0.0059,  0.0093, -0.0091,  0.0061,  0.0306,  0.0062,  0.0133,
         0.0072,  0.0186,  0.0204,  0.0145,  0.0256,  0.0189,  0.0559,  0.0050,
         0.0425, -0.0755,  0.0082,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2120,  0.0064, -0.0606, -0.0564, -0.0136, -0.0167, -0.0659,  0.0627,
        -0.1651, -0.0051, -0.0548, -0.0332,  0.0325, -0.0139, -0.0701, -0.0506,
         0.0180, -0.0290,  0.1249,  0.0116, -0.0527, -0.0803,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1677,  0.1237,  0.0574, -0.0014,  0.0461, -0.0399,  0.0128, -0.0188,
        -0.0210, -0.0074,  0.0061,  0.0034, -0.0537, -0.0059, -0.2363, -0.0582,
        -0.1384, -0.0409, -0.0906, -0.2138, -0.0206,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0572, -0.1917, -0.0557,  0.0318,  0.0089,  0.0340, -0.0474, -0.1012,
        -0.0025, -0.0016, -0.0614,  0.0412, -0.0894, -0.0747, -0.0474,  0.0086,
        -0.0073, -0.0253, -0.0416,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0765, -0.2220, -0.0221, -0.0944, -0.0567, -0.1020,  0.0876,  0.0109,
         0.0134,  0.0203, -0.0202, -0.0347,  0.0052, -0.0318,  0.0397,  0.0234,
        -0.0017, -0.0305,  0.0264, -0.0313, -0.0010, -0.0650, -0.0494, -0.0257,
         0.0532, -0.0295,  0.0152, -0.0719, -0.0122, -0.1490, -0.1276, -0.0278,
        -0.0061, -0.0765, -0.0048,  0.0055,  0.0284,  0.0121, -0.0090, -0.0025,
         0.0209, -0.0526,  0.0064,  0.0183, -0.0146, -0.0701, -0.0441],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5444, -0.1479, -0.1016,  0.0117,  0.0461, -0.0045,  0.0277, -0.0103,
        -0.0634,  0.0234,  0.0104,  0.0365, -0.0262,  0.0279,  0.2613, -0.0736,
         0.0536, -0.0248, -0.0203,  0.0361,  0.0267, -0.1271, -0.0107, -0.0948,
        -0.0516,  0.0790,  0.0715,  0.0772, -0.0642,  0.0028,  0.0468,  0.0046,
        -0.1554, -0.0938,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2030, -0.0813,  0.0035,  0.0041,  0.0215, -0.0578,  0.0576, -0.0214,
        -0.0008, -0.0219,  0.1146,  0.0700, -0.0141, -0.2955,  0.0273,  0.0176,
         0.0173,  0.1122,  0.0924, -0.0909, -0.0737,  0.0081,  0.0063,  0.0951,
        -0.0448, -0.0232, -0.0648, -0.0047,  0.0441, -0.0484,  0.0326,  0.0281,
        -0.0293, -0.0332,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.2332, -0.4041, -0.2294, -0.0990, -0.0738, -0.1514, -0.0327, -0.0833,
        -0.0702, -0.1411, -0.2060, -0.1412, -0.0528, -0.1423, -0.0033, -0.0798,
        -0.0287,  0.0367,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6455,  0.0230, -0.0260, -0.0660, -0.0894,  0.0341,  0.0496,  0.0012,
        -0.0215, -0.1946,  0.1263, -0.1083, -0.0865, -0.0865, -0.0358, -0.0571,
        -0.1888,  0.0297, -0.0698, -0.2823,  0.0397,  0.1697, -0.0556, -0.1534,
        -0.0078,  0.1237,  0.0061,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6421,  0.3036,  0.1752,  0.0057,  0.1383, -0.1221,  0.3483,  0.3824,
         0.1660,  0.1485, -0.0096,  0.0364,  0.0038,  0.2204,  0.0094,  0.0151,
         0.1385, -0.0114,  0.1444, -0.1690,  0.0190,  0.0244,  0.0243, -0.0256,
        -0.2580,  0.1072,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5648, -0.1622, -0.0519, -0.0570, -0.3795, -0.1715,  0.0086,  0.0391,
        -0.0294, -0.0389, -0.1101,  0.0164, -0.0954,  0.0128, -0.0541,  0.0106,
        -0.0345, -0.0278, -0.0305,  0.0929,  0.1108,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0202, -0.1343, -0.1089, -0.4341,  0.0959, -0.0157,  0.1340, -0.0303,
        -0.0029,  0.0761, -0.0664, -0.0450, -0.2018,  0.0377, -0.2046,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1979,  0.1127, -0.1430, -0.0158,  0.3586, -0.0400,  0.0452,  0.0354,
        -0.0772,  0.0210, -0.0530,  0.1304, -0.1243, -0.0895,  0.0690, -0.0249,
         0.2335,  0.1987,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0752, -0.0827, -0.0587, -0.1636, -0.0715, -0.1676,  0.0435, -0.0699,
        -0.0038, -0.0624, -0.0351,  0.0008,  0.0251, -0.0315,  0.0013, -0.0453,
         0.0389,  0.0087, -0.0109,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0719, -0.0795, -0.0447, -0.0210,  0.0477,  0.0189,  0.0328, -0.0365,
        -0.0542,  0.0099,  0.0328,  0.0257, -0.0206,  0.0142,  0.0023, -0.0283,
        -0.1006, -0.0680,  0.0732, -0.0520, -0.1094, -0.0051, -0.0126, -0.0003,
        -0.0127, -0.0375, -0.0018,  0.0033, -0.0174, -0.0182,  0.0021,  0.0399,
        -0.0675, -0.0270, -0.0413, -0.0210, -0.1207,  0.0130, -0.0074, -0.0158,
        -0.0085,  0.0071,  0.0520, -0.1205], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3369e-01, -9.9944e-02, -8.3187e-02, -3.8106e-02,  6.1646e-03,
        -3.3987e-02, -7.3017e-02,  2.5243e-02, -9.9947e-02,  4.8971e-02,
        -4.7667e-02,  2.9818e-03,  1.1905e-02,  1.6218e-04,  3.3776e-02,
        -4.7926e-02,  6.6060e-02, -1.0238e-01,  5.4526e-03, -2.2378e-02,
        -8.3012e-02,  5.8686e-04,  5.8343e-03,  3.8306e-03,  3.0105e-02,
        -4.1312e-02,  8.3184e-03, -5.1348e-02,  2.0049e-02,  1.6234e-01,
        -1.1343e-02, -1.4181e-01, -2.4739e-02, -5.9746e-01,  7.4704e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0826, -0.2595, -0.0245, -0.0430,  0.0151,  0.0279, -0.0109, -0.0963,
        -0.1835, -0.0196, -0.0310, -0.0178, -0.1731, -0.0318, -0.0121, -0.0903,
         0.0239, -0.0440,  0.1535,  0.0103, -0.0248, -0.0110, -0.0317,  0.0329,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2085,  0.0054, -0.0127, -0.0168,  0.0134, -0.0634,  0.0084, -0.1195,
        -0.1061,  0.0300, -0.1121, -0.0378,  0.1184, -0.0623, -0.0677,  0.0334,
         0.0559,  0.0033, -0.0607, -0.0198, -0.0283, -0.0342, -0.0189, -0.0241,
         0.0145, -0.0605, -0.0243, -0.0268,  0.0236, -0.0792, -0.0373,  0.0287,
        -0.1800, -0.0047,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0143, -0.3637, -0.1451,  0.1192, -0.1467,  0.0527, -0.0884, -0.0173,
         0.0187, -0.0230, -0.0671,  0.0122,  0.0079, -0.0996, -0.0444, -0.1056,
        -0.0409, -0.0283,  0.0035,  0.1698, -0.0164,  0.0943, -0.1206, -0.0604,
         0.0110,  0.0047,  0.0152,  0.1001,  0.1601, -0.0357,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.9245, -0.5517, -0.3242, -0.2207, -0.0231, -0.0514, -0.2373,  0.0162,
         0.0288, -0.1207, -0.0334, -0.1096,  0.1265,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2972,  0.3175,  0.0898, -0.2832,  0.2629,  0.1306, -0.3647,  0.0234,
        -0.0743,  0.1042, -0.0391, -0.2166,  0.0748, -0.1204, -0.0447, -0.1802,
        -0.1887,  0.1566,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5749, -0.2025,  0.0315, -0.0134, -0.0753,  0.2895, -0.0746, -0.3039,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0308, -0.0177, -0.0262, -0.0237,  0.0058, -0.0315, -0.0098, -0.0211,
        -0.0368, -0.0137,  0.0047, -0.0175, -0.0366,  0.0417, -0.0197, -0.0161,
         0.0007, -0.0041,  0.0006,  0.0033,  0.0547, -0.0632,  0.0001, -0.0015,
         0.0013, -0.0425, -0.0278, -0.1052, -0.1133, -0.0591,  0.0091, -0.0172,
         0.0264, -0.0771, -0.0399, -0.0173, -0.0868, -0.0178, -0.0190, -0.0128,
        -0.0185, -0.0231, -0.0266,  0.0843, -0.0354,  0.1007,  0.0538],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0078, -0.1093, -0.2577, -0.4775,  0.0298, -0.2232, -0.2658, -0.2174,
        -0.6800,  0.0748, -0.1735,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3977, -0.1496,  0.0052, -0.2203, -0.0982, -0.0590, -0.0792, -0.2844,
         0.1207, -0.0643, -0.0679, -0.0630, -0.1162, -0.1011,  0.1424, -0.1336,
        -0.0295,  0.0876,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3155, -0.3892, -0.2414, -0.3785,  0.1752,  0.0451,  0.1940, -0.0099,
        -0.1043, -0.1053,  0.0247,  0.0871,  0.1488,  0.2447, -0.1601,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0207, -0.2286,  0.0298, -0.0682, -0.2271, -0.0655, -0.0388, -0.1064,
        -0.1396, -0.1025, -0.1590,  0.0998,  0.0116,  0.0861, -0.0492,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3624, -0.0391, -0.1560, -0.1621,  0.0122, -0.1915, -0.0418, -0.1266,
        -0.0559,  0.0152,  0.0238, -0.0614, -0.1169, -0.0040,  0.0106, -0.0092,
        -0.0822,  0.0888,  0.0010, -0.0692, -0.0903, -0.0926, -0.0563,  0.0196,
        -0.1576,  0.1177,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0317, -0.2820, -0.3401,  0.0510, -0.1180,  0.0082,  0.0904, -0.1410,
        -0.1717,  0.0119,  0.0243,  0.1413, -0.1994,  0.1418, -0.0410,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7867e-01, -6.9903e-02,  1.8809e-02, -1.8078e-02, -2.2958e-02,
         5.2957e-02, -9.4883e-02,  7.9873e-03, -2.8944e-02, -3.0702e-01,
        -2.0057e-02,  3.4106e-02, -2.3343e-02, -3.7834e-02, -6.0674e-02,
         3.2931e-02, -2.3582e-02, -2.2556e-02,  2.6888e-02,  8.3484e-05,
        -2.5932e-02,  2.9572e-02,  4.6637e-03,  4.3061e-02,  2.9082e-01,
        -4.6649e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0541, -0.0056,  0.0347,  0.0531, -0.0128, -0.0561,  0.1041, -0.0600,
        -0.0198, -0.0768, -0.2122, -0.0200,  0.0153, -0.0151, -0.0344, -0.0319,
         0.0666, -0.0425, -0.0244, -0.0425,  0.0262, -0.0463,  0.0315,  0.0183,
         0.2023,  0.0199,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.0357, -0.0486, -0.1560, -0.0283, -0.0470,  0.0905, -0.0350, -0.0687,
        -0.0218, -0.0452, -0.0306, -0.0263, -0.0495, -0.0413, -0.0330, -0.1694,
        -0.0624,  0.0529, -0.0485, -0.0655, -0.0374,  0.0584,  0.0236, -0.0108,
        -0.0317, -0.0530, -0.0462, -0.0218, -0.1561, -0.0467, -0.0509, -0.0436,
         0.0113, -0.0104,  0.0903, -0.0814,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1728, -0.2563, -0.2523, -0.3109, -0.1073, -0.0400,  0.0474, -0.0981,
        -0.0029, -0.0839, -0.0435, -0.0045,  0.0159, -0.0636, -0.0184,  0.0095,
         0.0217, -0.0432,  0.1502, -0.1275, -0.0021,  0.0264,  0.0335,  0.0395,
         0.0328, -0.0464,  0.0030, -0.0490, -0.0480, -0.0494,  0.0958, -0.0789,
        -0.0352,  0.0783, -0.1317, -0.1784,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0301, -0.1895, -0.1021, -0.1134,  0.0467, -0.1234, -0.0402, -0.0304,
         0.0176, -0.1078, -0.0881, -0.0082,  0.0384, -0.0956, -0.0811,  0.0220,
         0.0019,  0.0152, -0.0440,  0.0334,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8021, -0.0325,  0.0341, -0.3376, -0.2179, -0.1089,  0.0640, -0.0336,
         0.0753,  0.1216,  0.0049,  0.0172,  0.1082, -0.0431,  0.0081, -0.1680,
        -0.1939, -0.0299,  0.0166,  0.0287,  0.1262, -0.0093,  0.0024, -0.0639,
        -0.1336,  0.0651, -0.0893,  0.1208,  0.1753, -0.3000, -0.1527,  0.2815,
         0.1876,  0.0672,  0.0639,  0.3762,  0.5320,  0.2353,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0215e-02, -8.7130e-02,  7.8618e-03,  3.1234e-02, -4.8227e-03,
        -4.8266e-03, -3.1099e-02,  5.7060e-02,  9.9147e-03,  2.6713e-02,
        -1.0642e-01, -1.2206e-01,  2.6908e-03,  2.8427e-02,  1.8733e-02,
        -3.3124e-02, -9.5267e-04, -1.1891e-01,  3.1129e-02, -1.6872e-03,
         1.3674e-03,  8.9662e-03,  2.2714e-02, -8.7157e-03,  4.6842e-02,
        -4.1089e-02, -8.0956e-03,  3.3957e-03,  2.5579e-02, -4.3059e-05,
        -1.6622e-02,  2.0066e-02,  1.0163e-02,  5.3312e-03, -6.6392e-02,
         5.9723e-02,  5.2013e-02, -1.2459e-01,  1.4233e-02, -6.0439e-02,
         9.9915e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4798, -0.6270, -0.0147,  0.0280, -0.0602, -0.0281,  0.0677, -0.0039,
        -0.0131,  0.0093,  0.0041, -0.0803, -0.0052,  0.0727, -0.0722, -0.0503,
         0.0352,  0.0047, -0.0023, -0.0209, -0.0488, -0.1692,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8410e-02,  2.1615e-02,  6.9257e-02, -2.0937e-02,  3.4793e-03,
        -2.6128e-02,  6.1443e-02, -9.4466e-02, -3.0884e-01, -3.6271e-02,
         8.4990e-02, -4.0353e-02,  8.0454e-02, -7.8104e-02,  6.7502e-02,
        -2.4981e-04, -1.6123e-02, -5.3831e-02, -5.5066e-02, -3.0462e-02,
        -2.0538e-01, -7.2633e-02,  2.5226e-02,  2.0542e-02, -1.3100e-02,
        -4.6622e-02, -5.8215e-03, -3.9032e-02, -1.5167e-01, -6.8284e-02,
        -2.6180e-02, -1.8704e-02,  3.0736e-03, -6.0685e-02, -2.9405e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1601, -0.0253, -0.0607, -0.1049, -0.0547,  0.0658,  0.0144, -0.0035,
         0.1698,  0.0313, -0.0471, -0.0354, -0.0420,  0.0218,  0.0639, -0.0928,
        -0.0452,  0.0255, -0.0533,  0.0372, -0.1029, -0.0591, -0.0368, -0.0534,
         0.0157, -0.0555,  0.0074,  0.0888,  0.0152,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2059, -0.0504, -0.1159, -0.1011, -0.0421, -0.0963, -0.0025, -0.0726,
        -0.0803, -0.0773, -0.0595, -0.1038, -0.0050, -0.1354, -0.1112, -0.0682,
        -0.0209, -0.0380, -0.1726, -0.0254, -0.1162,  0.0200,  0.0947,  0.1973,
        -0.1958,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0376, -0.1000, -0.1441, -0.4183, -0.1376, -0.0398, -0.0831, -0.0404,
        -0.0797,  0.0150,  0.0478,  0.0315,  0.0107,  0.1478, -0.0657, -0.0882,
        -0.0271, -0.0225,  0.0101, -0.0542, -0.0564, -0.1444, -0.0420,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1193,  0.0377, -0.0081, -0.0762, -0.1290, -0.0628,  0.4806, -0.0594,
        -0.0215, -0.0185, -0.0257, -0.1427, -0.0239, -0.0389, -0.0056,  0.0632,
         0.0393,  0.0624, -0.0016, -0.0132, -0.0822,  0.2683, -0.0374,  0.1459,
        -0.0406,  0.0827, -0.0697,  0.0192, -0.0457, -0.0350, -0.0630,  0.0219,
        -0.0317, -0.0500, -0.0813, -0.0506, -0.0099,  0.4412,  0.0891,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4040, -0.3295, -0.1761, -0.3613, -0.3592, -0.1357,  0.0892, -0.1631,
        -0.2904,  0.0263, -0.5028, -0.0701,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.1389, -0.1298, -0.2549,  0.0121, -0.0214, -0.1249, -0.0479, -0.0749,
        -0.0041, -0.0585, -0.0081, -0.0682, -0.0043, -0.0418, -0.0137,  0.0179,
        -0.0258, -0.0697, -0.0124, -0.0412, -0.0117, -0.0418, -0.0557, -0.0334,
        -0.0246, -0.0307, -0.0156, -0.0263,  0.0926,  0.0140,  0.0913,  0.0507,
        -0.0208, -0.0301, -0.0506, -0.0900, -0.0445, -0.0144, -0.0536, -0.0336,
        -0.0352, -0.0253, -0.0335,  0.0326,  0.0610,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3234, -0.2394, -0.0378, -0.1168,  0.5732, -0.2278, -0.8906, -0.0873,
        -0.1386, -0.0318,  0.3500, -0.0862,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1983, -0.2398, -0.4691, -0.0089,  0.4615, -0.2476, -0.0778,  0.2036,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1565, -0.1436, -0.0501,  0.1379,  0.0182,  0.0032, -0.0417, -0.0012,
        -0.0566, -0.1880, -0.0621,  0.0962, -0.0165, -0.0313, -0.0295,  0.0323,
         0.0344, -0.0569,  0.0309, -0.0178, -0.0310,  0.0387, -0.0110, -0.0079,
        -0.0244, -0.0006, -0.0136, -0.0359, -0.0032, -0.0111,  0.0185, -0.0435,
        -0.0039, -0.0410, -0.0077, -0.0292, -0.0464, -0.0294, -0.0451, -0.0538,
        -0.0210, -0.0036,  0.0586, -0.0067, -0.1473,  0.1640, -0.1768, -0.1256,
        -0.2157, -0.1741, -0.0488,  0.0424, -0.0247,  0.1390, -0.0259, -0.0449,
        -0.0248, -0.0118,  0.0908, -0.0386,  0.0150], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2550, -0.0953,  0.0710, -0.1135, -0.0676, -0.0147, -0.0906,  0.0050,
         0.0747, -0.0487, -0.1030,  0.0064, -0.0157, -0.0362, -0.1955, -0.0378,
        -0.0185, -0.0364, -0.0575, -0.0241, -0.0696, -0.0539,  0.0574, -0.0345,
        -0.0478, -0.0100, -0.0283, -0.0293, -0.0377, -0.1113, -0.0118, -0.0221,
        -0.0258,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0162, -0.0673, -0.1233, -0.0271,  0.0303,  0.0223, -0.0258,  0.0428,
         0.0060, -0.0165, -0.0679, -0.0542, -0.0618, -0.0519, -0.0502, -0.0514,
        -0.0697, -0.1408, -0.3210,  0.0133,  0.1174, -0.1314, -0.0159,  0.0367,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2084, -0.6544, -0.0680, -0.0925, -0.0172, -0.0232, -0.0733, -0.1136,
        -0.0178, -0.1219, -0.0351, -0.1450, -0.2133, -0.1060, -0.0488, -0.0673,
         0.0233,  0.0684, -0.0660, -0.0376, -0.0978, -0.0540, -0.1360, -0.2219,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5388, -0.0371, -0.0684, -0.0184,  0.0115,  0.0797, -0.0458, -0.0430,
         0.0294, -0.0309,  0.0224, -0.0581,  0.0828,  0.0012, -0.0765, -0.0888,
        -0.0566, -0.0244,  0.0431, -0.0976, -0.0509, -0.0371, -0.0732, -0.1844,
        -0.0449,  0.0400, -0.0083, -0.0496,  0.1032,  0.0335, -0.0186,  0.0160,
         0.0835,  0.0963,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2026, -0.2676, -0.0247,  0.0272, -0.1324,  0.0488, -0.0407, -0.0850,
         0.2560, -0.1464,  0.1584,  0.1158,  0.1132, -0.2909,  0.0900,  0.0931,
        -0.0945, -0.0629, -0.0054,  0.0138, -0.0026, -0.1678, -0.0127,  0.0765,
         0.1413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0110, -0.0066, -0.0130,  0.0501, -0.0248,  0.0275,  0.0635,  0.0200,
         0.0509,  0.3794,  0.1707,  0.0263,  0.0063, -0.0339, -0.1491, -0.0426,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6284, -0.0212, -0.1006, -0.1639, -0.0502, -0.0969, -0.1419, -0.0262,
         0.0829, -0.0554, -0.0474, -0.1062, -0.1126, -0.0552, -0.0429,  0.0227,
        -0.1028, -0.0568, -0.0787, -0.0741, -0.0130, -0.2810, -0.0965,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1449, -0.4598,  0.0833,  0.0958, -0.1733, -0.1484,  0.0406,  0.0215,
        -0.0812,  0.1568, -0.0436, -0.0301,  0.0244, -0.2960,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.0119, -0.0679, -0.0130, -0.1079, -0.0304,  0.0337, -0.0094,  0.0139,
         0.0215,  0.0069, -0.1357, -0.0400, -0.1296,  0.0132, -0.0248, -0.0232,
        -0.1002, -0.1656, -0.0226,  0.0150, -0.0296, -0.0375, -0.0068,  0.0203,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1229,  0.2480,  0.1758, -0.0338, -0.1290, -0.0505, -0.1022, -0.0192,
         0.0771, -0.1586, -0.0391,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1074, -0.6100,  0.2690,  0.2868, -0.0999,  0.1165, -0.4448,  0.1989,
         0.0662, -0.0166,  0.0798,  0.0357, -0.0410, -0.1362, -0.0950,  0.0114,
        -0.0481,  0.0630, -0.0889,  0.2961, -0.0528,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0475, -0.2022, -0.2579, -0.0523,  0.1059, -0.1127, -0.1958, -0.2059,
        -0.1121, -0.0675, -0.0102,  0.0169,  0.2230, -0.0693,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0145, -0.1077,  0.0848,  0.0078, -0.0061,  0.0139,  0.0268,  0.0964,
         0.0981, -0.1003, -0.0353,  0.1278,  0.2087, -0.0760,  0.0034, -0.0565,
         0.0456,  0.1045,  0.1952,  0.0220,  0.1249,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0014, -0.0822, -0.0727, -0.0277, -0.0368, -0.0562, -0.0318, -0.1245,
        -0.0545, -0.1286, -0.0015, -0.0429, -0.0268,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2253, -0.0910, -0.0105, -0.0096,  0.0130,  0.0183, -0.0340, -0.0491,
        -0.0251,  0.0345, -0.0336,  0.0319, -0.0296,  0.0102,  0.0035, -0.0034,
        -0.0357,  0.0552,  0.0023,  0.0260, -0.0888,  0.0391,  0.0125,  0.0872,
        -0.1185,  0.0034, -0.0848, -0.0427, -0.0602, -0.0121, -0.0587,  0.1739,
        -0.0169,  0.0641, -0.0356,  0.0226, -0.0383, -0.0059,  0.0004, -0.0212,
        -0.0523,  0.0191, -0.0050,  0.0075,  0.1265], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4723, -0.0969, -0.0999, -0.1070, -0.1300,  0.0373, -0.0468,  0.0206,
        -0.0585, -0.0015, -0.0695,  0.0408,  0.0565, -0.0395, -0.0571,  0.2025,
        -0.0630,  0.0247, -0.0056, -0.0834,  0.0335, -0.2351, -0.0682, -0.1673,
         0.0371, -0.0072,  0.0744, -0.1035, -0.3214, -0.1192,  0.2678,  0.3665,
        -0.3614, -0.3459,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0374, -0.2388, -0.0670, -0.1514, -0.1360, -0.0107, -0.1806, -0.1702,
         0.2109, -0.1117, -0.0461, -0.0734,  0.0554, -0.1296, -0.1045,  0.0524,
        -0.0128, -0.1318,  0.0163,  0.0152, -0.0560, -0.0943, -0.0185,  0.1894,
        -0.0243, -0.0597,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0966, -0.1875,  0.0793, -0.0604,  0.0575, -0.0338,  0.0468, -0.0247,
         0.0254, -0.1609,  0.0133, -0.0113,  0.0339,  0.0260, -0.0098, -0.0546,
        -0.1821,  0.0706, -0.0459, -0.0157,  0.0122, -0.1257,  0.0534,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0158, -0.0685, -0.0346, -0.0941, -0.0028, -0.0610,  0.0409, -0.0308,
        -0.0650, -0.0017, -0.0257,  0.1055, -0.0072, -0.0209, -0.1649,  0.0916,
         0.0212, -0.0261, -0.0303, -0.0372, -0.0152,  0.0057, -0.0037, -0.0265,
         0.0156,  0.0551, -0.0397, -0.1347,  0.0459,  0.0045, -0.0569, -0.0460,
         0.0790, -0.0099,  0.0009,  0.0258, -0.0267,  0.0030, -0.0648, -0.0278,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0999, -0.0316,  0.0038,  0.0643, -0.0656, -0.0864,  0.0083, -0.0937,
         0.0086, -0.1508, -0.0181, -0.0279, -0.0348, -0.0797, -0.0310, -0.0632,
        -0.0590, -0.0062,  0.1608,  0.0280, -0.0408, -0.0271,  0.0199,  0.0436,
        -0.0761, -0.0172, -0.0358, -0.1650, -0.0316, -0.3410, -0.3602,  0.0502,
         0.0162,  0.0294,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.1647, -0.1111, -0.0593,  0.0060,  0.1015, -0.0007,  0.0435,  0.0006,
        -0.0337,  0.0439, -0.0473, -0.0100,  0.0047, -0.0481, -0.1217, -0.1265,
        -0.0935,  0.0259,  0.0154, -0.0187, -0.0777,  0.0288,  0.0412,  0.1017,
        -0.0100, -0.0745,  0.1504,  0.0473,  0.0063,  0.0583,  0.0074, -0.0408,
        -0.0549, -0.0091, -0.0260,  0.0240,  0.0267, -0.0261, -0.0085,  0.0005,
        -0.0306, -0.0150, -0.0318, -0.0203, -0.0224,  0.0056,  0.0054,  0.0098,
        -0.0089,  0.0157, -0.0072,  0.0180], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0251, -0.0566, -0.0489, -0.0509, -0.1699, -0.1552,  0.0400, -0.0351,
        -0.0494, -0.0825,  0.0137, -0.0154, -0.1797, -0.0949, -0.1008, -0.0199,
        -0.0578,  0.0782, -0.0495, -0.0366, -0.0266, -0.0159, -0.0417, -0.1388,
         0.0235, -0.0169,  0.0413,  0.0154, -0.0303, -0.0233, -0.0675,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0153, -0.0760, -0.0457, -0.0078, -0.0337, -0.0183, -0.0802, -0.0119,
        -0.0462,  0.0172, -0.0313, -0.0446, -0.0434, -0.0612, -0.0839, -0.0136,
         0.0346,  0.0476, -0.0161, -0.0678,  0.0317, -0.0097, -0.0815,  0.0992,
         0.1274,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7183,  0.0354,  0.0018, -0.0957, -0.1283, -0.2335, -0.1328,  0.0610,
         0.0217, -0.1623, -0.0663,  0.1697,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0234, -0.1995, -0.1119, -0.1132, -0.0445, -0.0377, -0.0864,  0.0021,
        -0.0099,  0.0278, -0.0961, -0.0335, -0.0466, -0.0877, -0.0341, -0.0806,
        -0.0141, -0.0563,  0.0188, -0.0103, -0.0283, -0.0043,  0.1248, -0.0837,
         0.1063, -0.0843,  0.0276, -0.0090, -0.0476,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4668,  0.0207, -0.0992, -0.0203, -0.0340, -0.1745, -0.3091,  0.2951,
         0.4631,  0.0607, -0.0709, -0.2652,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6720e-01, -1.9919e-01, -9.6692e-02, -6.3889e-02,  8.4003e-02,
        -1.3744e-01, -1.4692e-01, -4.6954e-02, -6.8223e-02, -8.2329e-06,
         5.4093e-02,  1.5708e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0869, -0.0060, -0.2223,  0.0025, -0.0135,  0.0432, -0.0030, -0.0132,
        -0.1505, -0.0043, -0.0198, -0.0754, -0.0187,  0.0777,  0.0008, -0.0247,
         0.0016, -0.0562, -0.0527,  0.0428,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6522,  0.1945,  0.0485,  0.1274, -0.0601, -0.0471, -0.1143, -0.0372,
        -0.0895, -0.0460,  0.0088, -0.0363, -0.0708, -0.0985,  0.0421,  0.0203,
         0.0395, -0.0152,  0.0261, -0.0447, -0.0348, -0.0192, -0.0677,  0.0556,
         0.2127, -0.0572,  0.2499, -0.0590, -0.0101, -0.0327,  0.0455,  0.0129,
        -0.0413, -0.1608, -0.0085, -0.2025,  0.1393,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4594, -0.8353,  0.0177, -0.2594, -0.0970, -0.0198,  0.1472,  0.0347,
        -0.2008,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0734, -0.3131,  0.5676, -0.1415, -0.0260, -0.2658, -0.1077, -0.0472,
        -0.0721,  0.0602, -0.0582,  0.0379, -0.1134, -0.1040,  0.0731,  0.0977,
        -0.0581, -0.0291, -0.0614,  0.0463,  0.0349, -0.0355,  0.0280, -0.0903,
         0.0804, -0.7021,  0.3441,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7524e-02, -1.6916e-01, -7.3314e-02,  2.7289e-02, -5.5192e-02,
        -8.0660e-02, -4.2057e-02, -1.7227e-02,  1.2083e-03,  1.2158e-01,
         4.9154e-02, -4.3606e-02, -2.7905e-02, -8.3778e-02, -1.4778e-02,
        -5.3830e-02, -6.8451e-02, -1.9685e-02, -4.5985e-03,  2.7024e-03,
        -2.1360e-02, -1.4482e-02, -5.3488e-02,  5.5175e-02, -9.6727e-03,
         1.1567e-01,  5.7705e-03, -5.8871e-02, -6.1832e-01,  2.9801e-02,
        -5.5260e-04, -4.4934e-03,  9.9532e-02,  9.6730e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-5.4162e-02,  5.5057e-02,  8.7773e-02, -4.7761e-02, -1.0480e-01,
        -1.4835e-01,  7.4740e-02, -9.6198e-03,  1.1166e-02, -4.3459e-02,
        -1.7235e-01, -9.5900e-02, -4.5400e-02,  2.1901e-02,  4.6910e-02,
         2.4551e-03, -1.1586e-01,  4.9927e-02,  1.0252e-01,  1.2105e-02,
         4.9079e-02, -4.8067e-02, -1.9468e-02,  3.4122e-02,  1.9501e-02,
        -1.5090e-02, -5.1468e-02,  5.8245e-02,  3.4746e-02,  1.0326e-01,
         4.4647e-03, -8.9460e-02, -2.2213e-02,  1.0303e-01,  2.7561e-03,
        -6.0972e-02, -5.2756e-02,  4.6979e-03, -7.9120e-05,  2.3768e-02,
        -3.6605e-02, -8.7876e-02,  8.6445e-02,  1.1861e-02, -1.1202e-01,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2115, -0.2634, -0.1470, -0.0469, -0.2215, -0.0376, -0.3000,  0.0105,
        -0.0957, -0.1004,  0.0668, -0.0052, -0.2230,  0.0761,  0.0976,  0.0979,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3221,  0.0147,  0.1978,  0.1190,  0.0905, -0.0082,  0.0571,  0.0511,
        -0.0514, -0.0770, -0.0551, -0.0562, -0.1958, -0.0146, -0.0643,  0.0218,
        -0.0106,  0.0021,  0.0744,  0.0017, -0.0874, -0.0433, -0.0056, -0.0164,
         0.1501,  0.1117,  0.1387,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0551,  0.1178, -0.2303, -0.0855, -0.1000, -0.0566, -0.0130, -0.0049,
        -0.0346, -0.0185,  0.0123, -0.0126, -0.0296,  0.0405, -0.0222, -0.0087,
        -0.0205,  0.0142,  0.1480,  0.0040,  0.0391, -0.0109, -0.0418, -0.0715,
         0.0677,  0.0016, -0.0224, -0.0506,  0.0030, -0.0354,  0.0396, -0.0209,
        -0.0663,  0.0254, -0.0356,  0.0342, -0.0178, -0.0683, -0.0380,  0.0061,
         0.0086, -0.0169, -0.0207, -0.0120, -0.0215,  0.1923], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5212, -0.1022, -0.0193,  0.0193, -0.1059, -0.0152, -0.0808,  0.0179,
        -0.0049,  0.0276, -0.0557,  0.0297, -0.0054, -0.0162, -0.1625,  0.0649,
        -0.0938, -0.0935, -0.1290, -0.5128, -0.0574, -0.1352, -0.0461, -0.0826,
         0.0535, -0.0291, -0.0743, -0.1248, -0.0661,  0.1012,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8113, -0.5252, -0.0059,  0.2136,  0.1053, -0.0661, -0.0143,  0.0366,
        -0.1097, -0.0164,  0.1984,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2638, -0.2952,  0.0317, -0.0337, -0.1185,  0.0261, -0.0277, -0.1883,
        -0.0130, -0.0218, -0.0320, -0.0042, -0.1538,  0.0250,  0.0154, -0.0137,
         0.0582,  0.0087, -0.0814,  0.0196,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0195, -0.4278, -0.1631, -0.4121, -0.1951, -0.1331, -0.3467, -0.0552,
        -0.1217,  0.0778,  0.0226,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2877, -0.3794, -0.0305, -0.1076,  0.0236,  0.0461, -0.0623, -0.0482,
        -0.1834, -0.0195, -0.1601,  0.0095, -0.0959, -0.0279, -0.0620, -0.1240,
        -0.1042, -0.0172,  0.0395, -0.1247, -0.0764,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1744,  0.1331,  0.1147, -0.0389, -0.0252,  0.3379, -0.0439, -0.0865,
        -0.0569, -0.0480, -0.0707,  0.0263, -0.0605,  0.0109, -0.1193, -0.1032,
        -0.0602,  0.2008,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1598, -0.0845, -0.2644,  0.0449, -0.0085,  0.0705,  0.0464, -0.0127,
        -0.0040, -0.0165,  0.0474, -0.0032, -0.0103, -0.0515, -0.1241,  0.0643,
        -0.0331, -0.1562,  0.0224, -0.0736, -0.0457, -0.0689, -0.1762, -0.0197,
        -0.0104, -0.0262, -0.0158, -0.0531, -0.0315, -0.1023, -0.0022, -0.0415,
        -0.0514, -0.0488, -0.0714, -0.0418, -0.0776,  0.0868, -0.1498,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1020, -0.6041,  0.0413, -0.0604, -0.0192,  0.0397, -0.0389,  0.0864,
         0.0943,  0.0704,  0.0037, -0.0137,  0.0281, -0.0054, -0.1069, -0.0114,
        -0.0226,  0.0830,  0.0154, -0.0622, -0.0341, -0.0286, -0.0127, -0.0344,
         0.0145, -0.0183, -0.0643,  0.0351, -0.0811, -0.2062,  0.0043, -0.0293,
        -0.1759, -0.1170,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.3840, -0.3450, -0.0595,  0.0682,  0.0897, -0.1328,  0.4727, -0.0853,
        -0.1945,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0504, -0.1464, -0.0863,  0.0624, -0.0653,  0.0840, -0.0412,  0.0098,
        -0.0104, -0.0515, -0.0151, -0.0012,  0.0368, -0.0111,  0.0455,  0.0361,
        -0.0275, -0.3879,  0.0165,  0.0348, -0.0064, -0.0183,  0.0245, -0.0078,
        -0.0176, -0.0030,  0.0203, -0.0225, -0.1113, -0.0067, -0.0384,  0.0122,
         0.0066, -0.0059,  0.0073,  0.0169, -0.0312, -0.0132,  0.0535, -0.1124,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0168,  0.0643,  0.2141, -0.0910,  0.0025, -0.0910,  0.0824, -0.0922,
        -0.0251,  0.0958, -0.0097, -0.0373, -0.0024, -0.0133, -0.0475, -0.0764,
         0.0597, -0.0569,  0.0682,  0.0048,  0.1144,  0.0937, -0.0417,  0.0536,
        -0.0170,  0.0460,  0.0110,  0.0203,  0.1746, -0.0380,  0.0626,  0.0987,
         0.0266, -0.0284,  0.0100,  0.0731,  0.0621,  0.0653, -0.0472, -0.0005,
        -0.0242,  0.0572,  0.0474,  0.0279,  0.0138,  0.0227,  0.0607, -0.0014,
         0.0063, -0.0511,  0.0147, -0.0184,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1162, -0.1337, -0.1216,  0.0372,  0.0699, -0.0479, -0.0064, -0.0139,
        -0.0333,  0.0093,  0.0424,  0.0281, -0.1217, -0.0734, -0.1186, -0.1025,
        -0.0080,  0.1101, -0.0548, -0.2004,  0.0177, -0.0006,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9843e-01, -8.6514e-01,  4.3353e-02, -3.8253e-02, -6.5542e-02,
         3.8500e-03,  6.4766e-02, -5.9760e-02, -1.3054e-01,  3.4397e-02,
        -2.0844e-01, -3.4517e-04, -1.8476e-02, -9.7573e-03, -2.5197e-02,
        -3.7960e-02,  8.6386e-02, -8.7563e-04,  3.7795e-02,  1.0488e-02,
        -2.6757e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4127e-02, -2.6201e-01, -2.2215e-02, -1.0088e-01,  9.2778e-02,
        -3.5394e-03,  3.0470e-02, -3.3960e-02, -6.8219e-02, -1.7455e-02,
        -3.8292e-02, -2.7823e-03, -3.2846e-03,  7.8510e-02, -9.9805e-03,
        -5.0898e-02, -5.6891e-02,  8.2493e-03, -5.9337e-02, -3.6368e-02,
        -2.1070e-03, -9.2043e-03, -1.6917e-02, -6.9538e-03, -1.2386e-04,
        -4.7112e-04, -5.3944e-02, -1.5760e-02, -2.7713e-02,  1.7538e-02,
        -1.0235e-01, -5.3816e-02, -1.2016e-03, -3.3160e-02, -2.3011e-02,
         2.1007e-02, -1.7233e-02, -3.1959e-02, -3.0750e-02, -2.9386e-02,
        -1.7562e-01, -7.0757e-02, -1.5959e-01, -9.3955e-03, -3.1767e-02,
        -3.0734e-02, -8.9974e-02, -7.8894e-02,  3.3229e-05, -3.3194e-02,
        -2.3610e-02,  1.1727e-02,  1.9478e-02, -4.9961e-03,  3.8778e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1448, -0.0628, -0.2285, -0.2053,  0.1276, -0.0296,  0.0441, -0.0448,
         0.0217,  0.0424, -0.0088,  0.0275, -0.0032, -0.0321, -0.0124, -0.0413,
         0.0246, -0.0599,  0.0223, -0.0144,  0.0284, -0.0639, -0.1429, -0.0076,
         0.0164,  0.0234, -0.0753, -0.1052, -0.0330, -0.0208,  0.2030, -0.0128,
         0.0071, -0.1113, -0.0770, -0.0071, -0.0416, -0.0273, -0.0094, -0.0482,
        -0.0105, -0.0418, -0.1555,  0.0571,  0.0836,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1663, -0.1292, -0.2287, -0.2397, -0.0698, -0.0347, -0.1035, -0.1506,
        -0.0630,  0.0544,  0.0053, -0.0440, -0.0290, -0.0092,  0.0056, -0.0616,
         0.1191, -0.2867,  0.1091, -0.0005, -0.0120,  0.0268,  0.0724, -0.1500,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1053,  0.0351,  0.0250, -0.0509, -0.1500, -0.0043, -0.1193, -0.1028,
        -0.0901,  0.0058,  0.0940, -0.0331, -0.0802, -0.0516, -0.0661,  0.0236,
        -0.0114, -0.0283,  0.0387, -0.0547, -0.0563,  0.0641, -0.0069, -0.0916,
        -0.0985,  0.0296, -0.0838,  0.0046,  0.0043, -0.1041,  0.0990, -0.0700,
         0.0821,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6039, -0.2117, -0.1078, -0.1793, -0.1004,  0.0515,  0.0122, -0.0151,
        -0.1612,  0.0354,  0.0033, -0.0471, -0.0722,  0.1079,  0.1506, -0.1217,
        -0.1106, -0.0055, -0.0373,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1065, -0.1481,  0.1508,  0.0821, -0.0978,  0.0378, -0.1191,  0.0592,
        -0.1608, -0.0679,  0.0264, -0.0640,  0.0171, -0.0437,  0.0611, -0.0199,
        -0.0391,  0.0980, -0.0265,  0.0535, -0.0458, -0.2395, -0.2607, -0.0548,
        -0.1052,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3109, -0.1224,  0.0055,  0.0618, -0.1111, -0.1378, -0.3039, -0.0398,
        -0.0519, -0.0771, -0.0188, -0.0513, -0.0271, -0.0457, -0.0413, -0.0209,
        -0.0586,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.6461e-01, -1.9332e-02,  1.2415e-01,  4.2292e-02,  2.8366e-02,
         6.8117e-02, -7.8369e-05, -1.1201e-01,  7.9577e-04,  1.0944e-01,
        -5.2743e-02, -5.1046e-02, -5.3233e-02,  1.4941e-01, -1.3920e-02,
         1.4555e-02,  4.1872e-02,  7.6487e-02, -1.9090e-01,  4.8048e-03,
         2.1051e-02,  6.0258e-02,  1.0772e-01, -2.2364e-02,  8.7201e-02,
         6.3292e-03,  9.5955e-02,  8.0154e-02,  7.1115e-03, -1.1102e-03,
        -5.8853e-02,  1.2946e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4427, -0.1322, -0.0684, -0.0266,  0.0423,  0.0072, -0.0356, -0.1054,
        -0.0037, -0.0072,  0.0235,  0.0193, -0.1921,  0.0302,  0.1321, -0.0193,
        -0.1085, -0.1096, -0.1135,  0.0196, -0.0035,  0.0218, -0.1185, -0.0624,
         0.0120, -0.0685, -0.1084,  0.0531,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2854, -0.4059,  0.2632,  0.0869, -0.0011, -0.0559, -0.1484, -0.2695,
        -0.0347, -0.0274, -0.0370,  0.0031, -0.0024,  0.0424,  0.0267, -0.1013,
        -0.0181, -0.1301,  0.0752, -0.0968, -0.0582,  0.0011,  0.1572, -0.0584,
        -0.0427,  0.2247,  0.2616, -0.2224, -0.1182,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1845, -0.0605, -0.0409, -0.0268, -0.1813, -0.1288,  0.0256, -0.1988,
        -0.0150, -0.1117, -0.0523, -0.0228,  0.0125, -0.0586,  0.0966, -0.0044,
        -0.0627, -0.1038, -0.0509,  0.0891,  0.2254, -0.0962, -0.0463, -0.0596,
        -0.0330, -0.0154, -0.0531, -0.0294,  0.1521, -0.0099,  0.0041, -0.1341,
        -0.0820,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1023,  0.0268, -0.0510, -0.0589,  0.0601, -0.0559,  0.1139, -0.0331,
         0.0171, -0.0418,  0.0367, -0.0571,  0.0515, -0.0470, -0.0196, -0.0069,
        -0.0317, -0.0459, -0.1108, -0.0245, -0.0543, -0.0419, -0.0399, -0.0821,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8476e-02, -1.5128e-01, -1.3862e-01, -3.9629e-02, -7.2623e-03,
        -1.3775e-01, -8.8979e-02, -6.0289e-02, -2.1673e-02, -7.0695e-02,
         2.2625e-02,  1.2229e-04, -4.7685e-02, -1.7650e-02, -2.2539e-02,
        -1.7647e-02, -2.7159e-03, -3.8333e-02, -1.4351e-02, -3.0471e-01,
        -1.1720e-01, -1.0500e-02, -2.2386e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0236,  0.1903,  0.0031,  0.0438,  0.0723,  0.1041, -0.0221,  0.0179,
        -0.0256,  0.0239,  0.0640,  0.0296,  0.0869,  0.0603,  0.0770, -0.0473,
         0.0147,  0.0512,  0.0408,  0.1081,  0.0226, -0.0380, -0.0166, -0.0282,
         0.0374,  0.0164, -0.0343,  0.0368,  0.0371,  0.0679,  0.0985,  0.0516,
         0.0245,  0.0011,  0.0530,  0.0711,  0.0135, -0.0500,  0.0350,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0597, -0.6757,  0.0315,  0.0455, -0.0860,  0.0164,  0.0095,  0.0419,
        -0.1001,  0.0769, -0.1191, -0.0317, -0.0129, -0.0665, -0.0230, -0.0934,
         0.1541, -0.0242,  0.0153, -0.0174, -0.0100, -0.0400, -0.0028, -0.0657,
        -0.0051, -0.0517, -0.0225,  0.0158, -0.0510, -0.0163, -0.0668,  0.0186,
         0.0054, -0.0265, -0.0184,  0.0041, -0.0280, -0.0758, -0.0798, -0.0102,
         0.0409,  0.0742, -0.5858,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1129, -0.2519, -0.0111,  0.0317, -0.1126, -0.0089, -0.0442,  0.0047,
        -0.1451,  0.0061, -0.0454,  0.0011, -0.0432,  0.0062,  0.0031, -0.0680,
        -0.0502, -0.1267, -0.0061, -0.0053,  0.0019, -0.0426, -0.0207, -0.0238,
        -0.0241,  0.0004,  0.0830,  0.0306,  0.0590, -0.0121,  0.0031, -0.0809,
        -0.1218, -0.0505, -0.0286, -0.0745, -0.2301,  0.0774, -0.0549,  0.0130,
        -0.0441, -0.0993, -0.0160, -0.0045,  0.0087, -0.0167,  0.0011, -0.0012,
         0.0145, -0.0971, -0.0235, -0.0092,  0.0059, -0.1020, -0.0648,  0.0710,
        -0.0055], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1992, -0.2789, -0.2651, -0.1177, -0.0300, -0.1371, -0.2440, -0.1673,
        -0.0079,  0.1555, -0.0399, -0.1040, -0.1682, -0.0989,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2708,  0.2377,  0.0372, -0.1522,  0.0153, -0.0219,  0.0689,  0.0376,
         0.0147,  0.0257,  0.0424,  0.2751,  0.1681, -0.0194,  0.0335, -0.1215,
         0.0029,  0.1369, -0.1758,  0.0325,  0.2609,  0.0698,  0.0443, -0.0346,
         0.0445, -0.0060, -0.0174, -0.0029, -0.0433,  0.0573,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1421, -0.4151, -0.5108,  0.0204, -0.4681, -0.5532, -0.2672, -0.0963,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.1581, -0.0450,  0.0328, -0.0185, -0.0143, -0.2374,  0.0578,  0.0608,
         0.0056, -0.1241,  0.0035, -0.0592, -0.0929, -0.0696,  0.0049, -0.0302,
        -0.0033,  0.0052, -0.0064, -0.0278,  0.0027, -0.0773, -0.0327, -0.0401,
        -0.0751, -0.0869, -0.0578,  0.0106, -0.0397, -0.1623,  0.0405, -0.0468,
        -0.0139,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2860e-01, -1.8686e-02, -1.4641e-01, -6.5293e-02, -5.6796e-03,
        -2.2912e-02, -1.9807e-03, -2.5058e-02,  5.6350e-02, -1.9482e-02,
        -9.6656e-03,  3.6654e-03, -3.9795e-03, -1.7694e-02,  2.3635e-02,
         2.1701e-02, -7.7102e-03, -5.3894e-02, -1.1014e-02,  5.8931e-03,
        -3.6524e-02,  1.4708e-04,  2.0099e-02,  1.1256e-02,  3.9470e-02,
         2.6955e-03, -2.4380e-02, -1.1671e-02, -1.3112e-02, -8.8146e-03,
        -1.5865e-02, -4.1603e-03, -1.6600e-02, -2.1826e-01, -1.0864e-02,
         1.2805e-02, -3.9306e-02,  4.4352e-02,  2.3382e-02, -2.8604e-02,
        -4.2127e-03, -2.3198e-02, -5.8246e-02, -8.7581e-02,  1.7398e-01,
        -5.4148e-02, -1.0704e-01, -4.1302e-02, -1.6384e-01, -2.1207e-01,
        -1.8726e-01, -6.8016e-02, -4.5434e-02, -3.4500e-02, -1.0517e-01,
         1.4425e-03, -3.0745e-02, -2.6504e-03, -8.4225e-02, -5.6481e-02,
        -8.6697e-02,  4.4337e-02, -1.8194e-01, -6.0014e-02,  1.2057e-02,
         1.2780e-02, -2.5181e-02,  3.0703e-03,  3.2710e-02,  4.0406e-02,
         4.3394e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7935, -0.0369,  0.0139, -0.0467, -0.0607, -0.3297, -0.3334,  0.1579,
         0.0258,  0.0070, -0.0157,  0.0136, -0.0352, -0.0025,  0.0261, -0.0551,
         0.0016,  0.0323,  0.0240, -0.0844, -0.0678, -0.0061,  0.1055, -0.1045,
        -0.0386, -0.0510,  0.0496, -0.0233,  0.0127, -0.0305,  0.0341,  0.0370,
        -0.0558, -0.0118,  0.0515, -0.1062,  0.0254,  0.0443, -0.0128,  0.0707,
         0.1447, -0.0024,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1388, -0.2313, -0.0713,  0.0069, -0.0144, -0.0273, -0.0003, -0.0180,
        -0.0798, -0.0562, -0.0357, -0.0495, -0.0817, -0.0324,  0.0114,  0.0316,
        -0.0244, -0.0276, -0.2027, -0.0488, -0.0408, -0.0137, -0.0346, -0.1175,
        -0.0190, -0.0504,  0.0123, -0.0372, -0.1131, -0.0429, -0.0421, -0.0290,
        -0.0387, -0.0404,  0.0660,  0.0021,  0.0931,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4832,  0.3761, -0.2841,  0.1644, -0.0008,  0.1339,  0.2087,  0.0405,
         0.2416,  0.1694,  0.1486,  0.3221,  0.2144,  0.4653,  0.3118,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2600, -0.1259,  0.3196,  0.3366,  0.1491,  0.2192, -0.0207,  0.0716,
         0.2178,  0.2467, -0.0667, -0.0334,  0.3560,  0.2568,  0.2254, -0.2432,
         0.0463, -0.4154,  0.4896, -0.3756,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6877, -0.4314, -0.1462,  0.2878, -0.1710, -0.0696, -0.0456,  0.0986,
         0.0072, -0.0380,  0.0304, -0.0436, -0.0893, -0.0120, -0.0814, -0.0156,
         0.0536, -0.0438,  0.0669, -0.0387, -0.0320,  0.0594, -0.0852,  0.0677,
        -0.1623,  0.0481, -0.1450, -0.0247, -0.0307,  0.0353, -0.0507, -0.0838,
        -0.0038, -0.0568,  0.0653, -0.0530, -0.0705, -0.0103, -0.0401, -0.0478,
        -0.0314,  0.1387, -0.0153,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1811,  0.0153, -0.3430, -0.1224, -0.0197, -0.0286, -0.0231, -0.2542,
        -0.0300,  0.0038, -0.0240, -0.0378, -0.1092, -0.0202, -0.0703, -0.0230,
         0.0090, -0.0625,  0.0363,  0.1396, -0.0240, -0.0727, -0.0584, -0.0509,
         0.0830, -0.0843, -0.2325, -0.0180, -0.0043, -0.0030,  0.0022, -0.0428,
         0.0196, -0.0122, -0.0250,  0.0247, -0.0694, -0.2498,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2365, -0.2249, -0.0606, -0.3328,  0.0109, -0.1480,  0.0237,  0.1037,
        -0.0546, -0.2629, -0.0116, -0.0243, -0.0293, -0.0147,  0.0275, -0.0606,
        -0.0393, -0.0446, -0.0527,  0.0052, -0.0295, -0.0162,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0166e-01, -2.2725e-01,  1.7452e-02, -8.3497e-02, -6.2811e-02,
        -9.3630e-02,  2.9299e-03,  9.2640e-02, -2.4032e-01, -6.3640e-02,
        -4.8568e-02, -7.1036e-02, -7.6818e-02, -6.4842e-02, -3.2558e-02,
        -8.0686e-02, -1.9408e-01, -1.3944e-01, -8.0092e-02, -3.9166e-02,
         7.0889e-02,  4.5235e-04,  3.2427e-02,  2.9055e-02, -2.5663e-02,
         3.2367e-02,  2.2076e-03, -9.1908e-02,  4.9629e-02,  4.6697e-02,
         8.5618e-02, -7.0167e-02,  1.5640e-03, -4.9088e-02, -3.1144e-02,
        -3.0141e-01, -6.5438e-02, -9.0689e-02,  7.8104e-02,  4.1913e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1029, -0.1744,  0.0851, -0.0493, -0.0033, -0.2718, -0.1080, -0.0036,
        -0.0640, -0.0792,  0.0019, -0.0290, -0.0054, -0.0675, -0.0642, -0.0455,
        -0.0416,  0.0615, -0.1792, -0.0391, -0.0267, -0.0365,  0.0416, -0.0078,
        -0.0747, -0.0989, -0.0003, -0.0608, -0.1083, -0.0829,  0.0462, -0.0601,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3711e-01, -5.8930e-01,  1.1293e-02, -1.4918e-02, -4.4754e-02,
         4.7232e-02, -6.7669e-02, -1.1124e-01, -1.1618e-01, -1.5773e-02,
        -6.8928e-02, -4.4125e-02, -9.1286e-02, -2.9366e-02, -1.5232e-02,
        -1.0676e-01, -2.4524e-02, -4.9021e-02,  5.3200e-03, -7.4958e-02,
        -1.2931e-02,  2.7919e-04, -2.7496e-02, -5.4245e-02,  2.4755e-02,
         9.0829e-02, -5.8149e-02, -2.5149e-02,  4.9142e-02,  3.0689e-02,
         2.6959e-02, -2.7950e-02,  3.1024e-02, -1.4075e-01, -4.2051e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.0339,  0.0088, -0.1018,  0.1295, -0.0765,  0.0090, -0.0611,  0.1287,
         0.0087,  0.3543, -0.0339, -0.0079, -0.0511,  0.0892,  0.0997, -0.0293,
         0.0266,  0.0325,  0.0378, -0.2360,  0.1913,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1340, -0.3758, -0.1181, -0.1563, -0.1446, -0.0817,  0.0295, -0.0460,
        -0.1411, -0.0345, -0.0083, -0.0106,  0.0669, -0.0033, -0.0196, -0.0540,
         0.0026, -0.1156,  0.1358,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8664, -0.4239,  0.0399,  0.3641, -0.0363,  0.0603, -0.2174, -0.0063,
         0.2263, -0.4050, -0.1893,  0.0175, -0.0907, -0.1789,  0.0159, -0.0452,
         0.1076,  0.0143, -0.2402, -0.0097,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4456, -0.4577,  0.0017, -0.0740, -0.0591, -0.0622,  0.5328, -0.0868,
        -0.0524,  0.0116, -0.0324,  0.0255,  0.0058, -0.0400, -0.1079,  0.0055,
        -0.2129, -0.0678, -0.0375, -0.1121, -0.0622, -0.0118, -0.1891,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1362, -0.1072, -0.0076, -0.2037,  0.0024, -0.0172,  0.0271,  0.0939,
        -0.0208, -0.0558, -0.0197, -0.0502, -0.0027, -0.1219,  0.3019,  0.0283,
        -0.0640,  0.0022, -0.0510, -0.0025, -0.0525,  0.0338, -0.1343,  0.0144,
        -0.0040,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1339,  0.6923, -0.1884,  0.1102, -0.0109,  0.0990,  0.0203, -0.0253,
        -0.0058,  0.0377,  0.1475, -0.0666,  0.0422,  0.1467, -0.0526,  0.0188,
         0.0568,  0.0486,  0.0233, -0.1007,  0.0504,  0.0483,  0.0748,  0.0177,
         0.0467,  0.0210,  0.0443,  0.0958,  0.1358, -0.0713,  0.0825, -0.0116,
         0.0570, -0.0729,  0.2581,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0303,  0.1704,  0.0010, -0.1851, -0.0272,  0.0091, -0.0188,  0.0555,
        -0.1137, -0.0977,  0.0113, -0.2068, -0.0703, -0.0267, -0.2021, -0.1313,
        -0.1457, -0.1335, -0.0619, -0.0006, -0.0275,  0.0725,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1744,  0.0824, -0.0040,  0.0080, -0.0270, -0.0641, -0.0643,  0.0071,
         0.0262, -0.1902, -0.1562, -0.1327, -0.1307, -0.0439, -0.0423, -0.0900,
        -0.0432, -0.0618, -0.0479,  0.0489, -0.2263,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5969, -0.2868,  0.0070, -0.1371, -0.0832,  0.0049, -0.1385, -0.2392,
         0.0352,  0.0571,  0.0712,  0.1337, -0.2419, -0.1201, -0.0200, -0.0943,
        -0.0729, -0.0766, -0.0702,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1081e-02,  3.0877e-01,  9.1092e-03, -8.9740e-02, -7.3561e-02,
        -1.1222e-01,  1.5600e-01,  3.6462e-02,  3.8898e-03, -1.7291e-02,
        -7.0578e-02,  6.8094e-03, -3.6025e-05, -5.9818e-02, -1.4673e-02,
        -1.9977e-02,  2.3895e-03, -1.4478e-01, -2.9638e-02,  1.0441e-02,
        -3.6299e-02, -3.3417e-02,  5.8953e-03, -4.1430e-02,  2.9583e-02,
        -3.5027e-02,  2.9612e-02, -1.0625e-01,  1.8808e-02, -2.7143e-01,
         1.1269e-02, -4.6374e-02, -5.0319e-02,  1.1242e-02, -3.5878e-02,
         3.5041e-02, -6.2084e-03,  4.5157e-03, -5.3203e-02, -1.0886e-02,
         3.2170e-02,  3.8639e-02, -4.3895e-03,  7.1947e-04, -7.1425e-02,
         1.3135e-01,  1.8999e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0707, -0.1752, -0.0472, -0.0031,  0.0395, -0.0304, -0.0045, -0.0175,
        -0.0397, -0.0105,  0.0146,  0.0041, -0.0058,  0.0256,  0.0979,  0.0071,
        -0.1358, -0.0137, -0.0337,  0.0387, -0.0118, -0.0235, -0.0437, -0.1635,
        -0.0331,  0.0827,  0.0928, -0.0035, -0.0103, -0.0099,  0.0605,  0.0537,
         0.1286, -0.1507,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0968, -0.3542,  0.0055, -0.0305, -0.0849, -0.1150, -0.0437,  0.0467,
        -0.0478, -0.0530,  0.0336, -0.0028, -0.0628, -0.1205, -0.0154,  0.0271,
        -0.0032, -0.0450, -0.0437, -0.0449, -0.0423, -0.0659, -0.0465, -0.0527,
        -0.0359, -0.0468, -0.0364,  0.0147,  0.0626, -0.0885,  0.0464, -0.0080,
        -0.0403,  0.0589,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.3514, -0.2246,  0.0027, -0.3586, -0.0607, -0.1440,  0.0205,  0.1297,
        -0.5989, -0.1325, -0.1461, -0.0381, -0.0656, -0.2542, -0.1284,  0.0109,
         0.3454,  0.3744,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0466,  0.0722,  0.0054, -0.0360, -0.0516, -0.0348, -0.1004, -0.0562,
        -0.1331, -0.2004,  0.0687, -0.0716,  0.0091, -0.2661,  0.0668,  0.1654,
         0.0476,  0.0829,  0.0856,  0.0850,  0.2084, -0.0900, -0.1677,  0.0262,
        -0.0220, -0.1746,  0.1882,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1101, -0.6141,  0.1151, -0.1332, -0.0513, -0.0959, -0.1914, -0.0215,
        -0.2582, -0.1816, -0.0885, -0.1079, -0.1448, -0.1866, -0.0371,  0.0634,
        -0.1556, -0.0938, -0.0645, -0.3114, -0.0613,  0.0084, -0.0069, -0.0242,
         0.0456, -0.2628,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3528, -0.1349, -0.0563, -0.0710, -0.2709, -0.0019, -0.1591,  0.0020,
        -0.0316, -0.0841, -0.1511,  0.0180, -0.0506, -0.0625, -0.0440, -0.0819,
        -0.0528, -0.0776, -0.0829,  0.1483,  0.2363,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1006, -0.2829, -0.0642, -0.3158, -0.0419, -0.0412,  0.1520, -0.0168,
         0.0117, -0.1014, -0.0393, -0.0792,  0.0616, -0.1519, -0.0768,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5137, -0.0297, -0.0397, -0.2595,  0.0968, -0.1395,  0.0480, -0.0322,
        -0.1118, -0.0428, -0.1665, -0.2335, -0.1676, -0.1076, -0.2187, -0.1133,
         0.0628, -0.2486,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3266,  0.2283,  0.1969,  0.5167,  0.1936,  0.1717, -0.0528,  0.1042,
         0.1294,  0.1123,  0.0599,  0.0572, -0.1476,  0.0286,  0.0454,  0.0796,
         0.0594,  0.0237,  0.0294,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1791, -0.1907, -0.1243, -0.0325, -0.1396, -0.0800,  0.0481, -0.0259,
        -0.0419,  0.0046, -0.0002,  0.0691, -0.0178,  0.0124,  0.0558, -0.0915,
        -0.0839, -0.1140,  0.0201,  0.0373, -0.1304,  0.1185, -0.0607, -0.0131,
         0.0381, -0.0933, -0.0617, -0.0267, -0.0341, -0.0226, -0.0313, -0.0050,
        -0.0546, -0.0619, -0.0620, -0.0008, -0.1134,  0.0171, -0.0493,  0.0364,
        -0.0129,  0.0164,  0.1534,  0.1291], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0666, -0.0887, -0.0438, -0.1438, -0.1550, -0.0551, -0.0331,  0.0052,
        -0.1100,  0.0792, -0.0722, -0.0064, -0.0718, -0.0504,  0.1088, -0.0447,
         0.2478, -0.2030, -0.0634,  0.0262, -0.0652, -0.0394,  0.0083, -0.0064,
        -0.0313, -0.0173, -0.0399, -0.0118,  0.0107, -0.0822, -0.0599,  0.0021,
        -0.0069, -0.1420, -0.0774,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5127, -0.6396,  0.0084, -0.0792, -0.0241,  0.0145, -0.1648, -0.1996,
        -0.0915, -0.0472,  0.0382, -0.0908, -0.0180, -0.0391,  0.0106, -0.2023,
        -0.0414,  0.0129,  0.1742, -0.0424, -0.1063, -0.0171, -0.2023, -0.0033,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0218, -0.1230, -0.0659, -0.0670, -0.0764, -0.0697,  0.1438, -0.0413,
        -0.0793,  0.0588, -0.0263, -0.0062, -0.0513, -0.0806, -0.0853,  0.0171,
        -0.0337, -0.0692, -0.0957,  0.0361, -0.1127, -0.0092, -0.0029, -0.0102,
         0.0146, -0.0655, -0.0502, -0.0064, -0.0463, -0.0020, -0.0449, -0.0124,
         0.0720, -0.1134,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1213, -0.1665, -0.1934, -0.0128,  0.0544, -0.1023, -0.1384, -0.0492,
         0.0093,  0.0279, -0.0134,  0.0857, -0.0854, -0.0873, -0.0488, -0.2156,
         0.0070,  0.0769, -0.0194, -0.1320, -0.0440,  0.0233, -0.0084, -0.0772,
        -0.0606, -0.0309, -0.0236,  0.0395,  0.0809,  0.1119,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.0574, -0.4567, -0.3013, -0.2422,  0.0550, -0.1523, -0.1454, -0.1271,
        -0.0158, -0.3166, -0.0686, -0.0446, -0.0979,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3962, -0.1105,  0.0250,  0.3343, -0.1813, -0.1890,  0.0152, -0.0474,
        -0.0622, -0.1351, -0.0598, -0.1532, -0.1095, -0.0153, -0.0284, -0.4538,
        -0.1878,  0.0953,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5245, -0.0555, -0.1284, -0.1222, -0.1387, -0.1138, -0.1067, -0.2255,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0920,  0.0202, -0.0229, -0.0690, -0.0208,  0.0563,  0.0109,  0.0303,
        -0.0088, -0.0182, -0.0337, -0.0273, -0.0209, -0.0374,  0.0078, -0.0302,
         0.0062,  0.0244,  0.0104, -0.0130,  0.0079, -0.0024, -0.0742,  0.0108,
        -0.0186, -0.0148, -0.0426, -0.0069, -0.2170, -0.0138, -0.0071, -0.0304,
         0.0224, -0.0940,  0.0421, -0.0322, -0.0144, -0.0188, -0.0243, -0.0070,
        -0.0060, -0.0225, -0.0121,  0.0468,  0.1076,  0.0254,  0.0657],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5398, -0.3708, -0.4389, -0.3468,  0.3146, -0.0748, -0.1126, -0.1086,
        -0.7490, -0.0237, -0.4836,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1949, -0.1419, -0.2445, -0.2011, -0.1018, -0.0508, -0.0256, -0.0977,
        -0.0938, -0.0834, -0.0263, -0.1932, -0.1074, -0.1009,  0.1131, -0.1454,
        -0.0853, -0.1048,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3312, -0.3221, -0.4667,  0.0566, -0.0179,  0.2006, -0.1707,  0.0424,
        -0.2289, -0.2275, -0.0773,  0.1153,  0.1566,  0.1020,  0.1131,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1576, -0.2026,  0.0402, -0.0770, -0.0975, -0.0500, -0.1655, -0.1978,
        -0.0864, -0.1240, -0.2049, -0.0112, -0.0087, -0.0059,  0.0154,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0394, -0.1463, -0.0758, -0.1011, -0.0648,  0.0033,  0.0557, -0.0597,
        -0.0950, -0.0475, -0.0428, -0.1100, -0.2339, -0.0399,  0.0151, -0.0995,
        -0.1507, -0.1013, -0.0539, -0.0243, -0.0451,  0.0041, -0.0748,  0.0349,
         0.0078,  0.0407,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5075, -1.0989, -0.1508, -0.0572, -0.1719, -0.0422, -0.0023, -0.1741,
        -0.4013, -0.0491,  0.0094,  0.0785, -0.0282, -0.2751, -0.1408,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0340, -0.1774, -0.0504, -0.0620,  0.0050, -0.0746, -0.0719,  0.0342,
        -0.0576, -0.1101,  0.0328, -0.0152, -0.0250, -0.0109, -0.0277,  0.0014,
        -0.0132,  0.0203, -0.0319,  0.0670, -0.0647,  0.1020,  0.1079,  0.0651,
         0.0817, -0.0290,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0796, -0.2264, -0.0315, -0.1076, -0.0213, -0.0293,  0.0310, -0.1385,
        -0.0021, -0.0645, -0.2495, -0.1128, -0.1005,  0.3617, -0.0946, -0.0606,
         0.5807, -0.0852, -0.0650, -0.0865, -0.0866, -0.0446, -0.0053, -0.0822,
         0.0008, -0.1486,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.1689, -0.1855, -0.1713, -0.0822, -0.1564, -0.0157, -0.0175, -0.1074,
         0.0271, -0.0387,  0.0304, -0.0301, -0.2557, -0.0027, -0.0400, -0.0345,
         0.0512,  0.0678, -0.0870, -0.0178, -0.0125, -0.0388, -0.0145,  0.0033,
        -0.0988, -0.0195, -0.0550,  0.0269, -0.0319, -0.0287, -0.0867, -0.0218,
        -0.0150,  0.0355, -0.0653,  0.0135,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0122, -0.1265, -0.1262, -0.0031, -0.0152, -0.0106, -0.0467, -0.1127,
         0.0359,  0.0222, -0.0014,  0.0396, -0.0878,  0.0670, -0.0261, -0.0365,
        -0.0195,  0.0028, -0.0041, -0.0339,  0.0383,  0.0103,  0.0326, -0.0875,
        -0.0073, -0.0895,  0.0071, -0.0214, -0.0851,  0.0558,  0.0284, -0.0484,
        -0.0460,  0.0671,  0.0502, -0.1770,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9650,  0.3643,  0.0220, -0.1888,  0.1595,  0.0939,  0.0475,  0.2293,
        -0.0670,  0.0034,  0.0256,  0.0254, -0.0683,  0.1660,  0.1015, -0.0162,
        -0.0860, -0.0118,  0.2102, -0.1617,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0165e+00, -2.4221e-01, -2.5045e-02,  8.3152e-02,  8.9759e-02,
        -1.8564e-01, -2.6888e-02, -8.5746e-04, -2.6158e-02,  2.5855e-02,
        -2.4491e-02, -1.0299e-01, -9.5645e-02,  1.7977e-01, -9.1403e-02,
        -4.8354e-02,  3.2909e-02, -5.9278e-02, -9.5034e-02, -5.6359e-02,
        -3.5598e-01,  5.2453e-02, -4.3192e-02,  9.4000e-02,  1.1076e-01,
        -6.7500e-02,  2.9859e-02,  4.0400e-01, -1.1192e-01, -3.1392e-01,
         1.2581e-01, -1.6170e-01,  1.0541e-01, -1.2222e-01,  3.9534e-02,
         5.8884e-02, -1.0887e-01, -3.7674e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2221, -0.1266, -0.0778, -0.2061, -0.0461, -0.0056, -0.0178,  0.0334,
        -0.0231, -0.1364, -0.1824, -0.2082,  0.0055,  0.0142,  0.1054, -0.0458,
         0.1269,  0.0585, -0.0294,  0.0083, -0.0256,  0.0289,  0.0066, -0.0126,
        -0.0326,  0.0036,  0.0036, -0.0205, -0.1440, -0.1444, -0.0958,  0.0005,
         0.0943,  0.0149, -0.0246,  0.0540,  0.0478,  0.1328, -0.0521, -0.0319,
         0.0810], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2442,  1.0898, -0.1208,  0.3468, -0.1299,  0.1144,  0.2444,  0.1472,
         0.0520,  0.0377,  0.0762,  0.0518,  0.0101,  0.0681, -0.1443,  0.1629,
         0.0363,  0.1561,  0.0066, -0.0334,  0.4411, -0.0251,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0740, -0.0018, -0.0200,  0.0236, -0.0381, -0.0790,  0.0226,  0.0166,
        -0.1669, -0.1541, -0.0413, -0.1229,  0.0179, -0.0334,  0.0393, -0.0331,
        -0.0519, -0.0663,  0.0079, -0.0897, -0.3263, -0.0834, -0.1047,  0.0072,
        -0.0383, -0.0651,  0.0087, -0.0702,  0.0410, -0.0335, -0.0454, -0.0464,
        -0.0526,  0.0955, -0.0944,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2463, -0.1297, -0.1253, -0.0343, -0.2569,  0.0420, -0.0384,  0.0634,
         0.0478, -0.0300, -0.1445, -0.1254, -0.1187,  0.0097, -0.0167, -0.0733,
         0.0433, -0.0737, -0.1047, -0.0186, -0.0833, -0.0115,  0.0544, -0.0306,
        -0.0534,  0.0235, -0.0092,  0.1871,  0.0606,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0300, -0.0557,  0.0284, -0.0795, -0.0375, -0.0073, -0.0308, -0.0514,
        -0.0545,  0.0100,  0.0428, -0.0455,  0.0126,  0.0920, -0.0130,  0.0495,
         0.1352,  0.0560, -0.0996, -0.0108, -0.1189, -0.2014, -0.0289,  0.0662,
        -0.1456,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0131, -0.2781, -0.1234, -0.1475, -0.2301, -0.0680, -0.0895, -0.0258,
        -0.1100, -0.2151, -0.0713,  0.0327, -0.1709, -0.0480, -0.0823, -0.1246,
         0.0204, -0.0836, -0.0259,  0.0364, -0.0104,  0.0328, -0.2372,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4386,  0.1277, -0.0503, -0.1687,  0.0900,  0.0856, -0.0393, -0.0297,
         0.0064, -0.0754, -0.0013,  0.0250, -0.0353, -0.1913, -0.0066, -0.0600,
         0.0014, -0.0755,  0.0585, -0.0113, -0.0112,  0.0799, -0.0964,  0.0117,
        -0.0643,  0.0131, -0.1446, -0.0497, -0.0068, -0.0193, -0.1371, -0.0862,
        -0.0797, -0.1110, -0.0757, -0.0129, -0.1036,  0.0848, -0.0102,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1013, -0.0835, -0.7376, -0.3296, -0.4898, -0.2135, -0.1193, -0.2299,
        -0.2702, -0.2038, -0.1616,  0.0201,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.1993, -0.1819, -0.6671, -0.0763,  0.0029, -0.0479, -0.0191, -0.0356,
        -0.0229, -0.0163,  0.0033, -0.0068,  0.0452, -0.0643, -0.0966,  0.0172,
         0.0434,  0.0046,  0.0389,  0.0298, -0.0365, -0.0814, -0.0033,  0.0450,
        -0.0019,  0.0090, -0.0404,  0.0651,  0.0647,  0.0264, -0.1290, -0.1209,
        -0.0728, -0.0972, -0.0458, -0.0898,  0.0087,  0.0084, -0.0165, -0.0346,
        -0.0079,  0.0143, -0.0044,  0.1279,  0.2682,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4087, -0.5681, -0.0191, -0.1785, -0.0570, -0.0064, -0.1147, -0.0920,
        -0.2079, -0.3855,  0.0344,  0.0514,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0904, -0.9143, -0.7289, -0.0310,  0.3718, -0.0772,  0.3609,  0.1227,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0617, -0.2147,  0.0344,  0.1260, -0.0115,  0.0137,  0.0133,  0.0124,
         0.0067, -0.1320, -0.0152,  0.0752, -0.0051, -0.0084, -0.0075,  0.0386,
        -0.0295, -0.0517,  0.1292,  0.0155,  0.0034,  0.0010, -0.0015,  0.0445,
        -0.0021,  0.0423,  0.0261, -0.0020,  0.0088,  0.0158,  0.0622, -0.0145,
        -0.0108, -0.0014, -0.0004,  0.0018, -0.0025, -0.0359,  0.0096, -0.0058,
        -0.0073, -0.0004,  0.0171,  0.0156, -0.0597, -0.0231, -0.1674, -0.0010,
        -0.0716, -0.0542, -0.0266,  0.0084, -0.0240,  0.1142,  0.0454,  0.0158,
        -0.0411, -0.0073, -0.0380,  0.0020,  0.1310], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3348, -0.0108,  0.0229, -0.0443, -0.0687, -0.0306, -0.1024, -0.1011,
        -0.0760, -0.1170, -0.1164, -0.0289,  0.0149,  0.0017, -0.2601, -0.0253,
         0.0385, -0.0399, -0.0235, -0.0428, -0.0649, -0.0512, -0.0315, -0.0450,
        -0.0351, -0.0567, -0.0480,  0.0028,  0.0124, -0.1245, -0.0774, -0.0248,
         0.0824,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0334, -0.1264, -0.0979, -0.0626,  0.0034, -0.0481, -0.0832, -0.0349,
        -0.0426, -0.1355,  0.0185, -0.0445, -0.0466, -0.0568, -0.0643,  0.1547,
         0.0600, -0.1250, -0.5515,  0.0109, -0.0991,  0.0121,  0.0744,  0.0430,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1912, -0.3385, -0.1302, -0.1427, -0.0511, -0.1821, -0.0442, -0.1298,
         0.2448, -0.0697, -0.0421, -0.1071, -0.2718, -0.0525, -0.0086, -0.0558,
        -0.0223, -0.0063, -0.1127, -0.0602, -0.0255, -0.0668, -0.0540, -0.0594,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2840, -0.0628, -0.1512, -0.1251, -0.0646,  0.1416, -0.0794, -0.0785,
         0.0111, -0.0053, -0.0293,  0.0370, -0.0010,  0.0151, -0.0960, -0.1005,
        -0.0524,  0.0341, -0.0090, -0.0912, -0.0670, -0.0121,  0.0449, -0.1152,
         0.0340, -0.0196, -0.0619, -0.0268,  0.1913, -0.0943, -0.1153,  0.0833,
         0.0579,  0.1527,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4862,  0.1324,  0.1468, -0.0627,  0.0487, -0.3179,  0.0167, -0.0703,
         0.0032,  0.0450, -0.0364,  0.0717,  0.0281,  0.6101,  0.0992,  0.1795,
         0.0549, -0.0078,  0.0141,  0.0204, -0.0037,  0.0777, -0.1086, -0.0652,
        -0.0453,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4962, -0.1668,  0.0940,  0.0938,  0.0193, -0.0747,  0.1811,  0.0667,
         0.1246,  0.3062,  0.1256,  0.0082,  0.0953,  0.0845, -0.1492, -0.0984,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5954, -0.0435, -0.0582, -0.1518, -0.0976, -0.0683, -0.1855, -0.1162,
         0.1825,  0.0176, -0.0753,  0.0412, -0.0301, -0.0517, -0.1213,  0.0151,
        -0.0610, -0.0478, -0.1057, -0.1053, -0.0147, -0.2506, -0.1165,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0181,  0.7384,  0.2852,  0.1617,  0.4058,  0.2461,  0.0070,  0.0899,
         0.0397, -0.0384, -0.2935,  0.0923, -0.0973,  0.2478,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.3631,  0.1093,  0.0740, -0.0269, -0.0036, -0.1583,  0.1192, -0.0838,
        -0.1324, -0.0779,  0.0084,  0.1200, -0.0621,  0.1022,  0.0091, -0.0697,
         0.1363, -0.1119, -0.0743, -0.0495, -0.0371, -0.3141,  0.1544, -0.2777,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0770,  0.1322, -0.1158,  0.0959, -0.0719, -0.1244, -0.1221,  0.0456,
         0.1847,  0.1627, -0.2228,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0867, -0.2209,  0.0143, -0.0278, -0.0427, -0.1305, -0.2140, -0.0255,
        -0.0885, -0.0150,  0.0739,  0.0690,  0.0091, -0.0069, -0.0021, -0.0179,
        -0.1166, -0.1040,  0.2078,  0.0464, -0.1926,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4599, -0.0203,  0.0577,  0.1299,  0.0341, -0.0807, -0.3517, -0.2799,
         0.0256, -0.1936, -0.0038, -0.1771, -0.4787, -0.1094,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0332e-01,  9.9691e-02, -6.0138e-04,  1.0926e-01, -1.0936e-01,
        -4.0360e-02, -2.4083e-02,  1.4227e-01, -7.3611e-02, -9.2303e-02,
        -6.3723e-02, -1.3880e-04, -1.5659e-01,  8.2552e-03,  1.9088e-02,
        -5.7093e-02, -6.4186e-02,  2.1181e-02, -6.4405e-02, -1.8907e-04,
         1.2461e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3115, -0.0813, -0.1152, -0.1908,  0.3198, -0.1529, -0.0717, -0.3190,
        -0.1328, -0.1360, -0.1007,  0.0140, -0.0444,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1370, -0.1751,  0.0056, -0.0063, -0.0385, -0.0253, -0.0475, -0.1477,
         0.0263,  0.0240,  0.0294, -0.0596,  0.0670, -0.0222, -0.0679, -0.0391,
        -0.0645,  0.0382,  0.0191, -0.1207, -0.0964, -0.0422, -0.0107,  0.0259,
        -0.2086, -0.0495, -0.0817, -0.0801, -0.0362, -0.0500, -0.0787, -0.1265,
        -0.0230,  0.0012,  0.1356, -0.0452, -0.0546, -0.0041,  0.0111, -0.0354,
        -0.0197,  0.0129,  0.0219, -0.0616,  0.0700], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1467,  0.1329, -0.1533, -0.0174, -0.0520,  0.0539,  0.0896, -0.0596,
         0.0293, -0.0917, -0.0941,  0.0605, -0.0475, -0.1436,  0.0220,  0.0652,
        -0.1619, -0.0333, -0.0057, -0.0520, -0.0249, -0.5240, -0.0166, -0.1539,
         0.0545,  0.0383, -0.1490,  0.0193,  0.0419,  0.1069, -0.0284,  0.0094,
         0.1406,  0.1546,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5944, -0.6061, -0.1729, -0.0288, -0.0426,  0.0579, -0.0572,  0.0277,
        -0.1621, -0.0467, -0.0137, -0.0109,  0.1545, -0.0306,  0.0134, -0.0352,
        -0.0384, -0.1531, -0.0196, -0.0144, -0.1206,  0.2457, -0.0204,  0.0389,
         0.1813,  0.1663,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5084, -0.4326, -0.0019, -0.0721,  0.1413, -0.1346, -0.2540, -0.1293,
        -0.1474,  0.0288, -0.0791, -0.0850, -0.0148, -0.0466, -0.1120, -0.0695,
        -0.1515,  0.0277, -0.0590, -0.0349, -0.0341, -0.0493,  0.2949,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2838e-01, -1.2772e-01,  6.4647e-02, -6.3019e-02, -4.1153e-02,
        -2.3497e-04,  6.1951e-02, -6.5032e-02, -9.0520e-02, -3.9335e-02,
        -8.4954e-03,  2.5592e-02, -5.0912e-03, -4.9150e-02, -4.6012e-01,
        -9.0855e-02,  1.0662e-01, -8.5406e-02, -5.0657e-02, -1.0237e-01,
         5.2287e-03, -4.0408e-03, -1.1750e-03,  6.3766e-02, -6.4209e-03,
         4.0016e-02, -3.1025e-02, -6.0834e-02,  3.4540e-02,  1.7084e-02,
        -4.2468e-07,  2.2696e-03,  6.0237e-02,  1.6805e-02,  4.3411e-02,
        -2.3575e-02, -1.9293e-02, -6.1707e-02, -2.7223e-02, -1.1306e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0452, -0.0784, -0.0162, -0.0338, -0.1306, -0.0565,  0.0666, -0.1015,
        -0.0336, -0.0575,  0.0100, -0.0138, -0.0747, -0.0787,  0.0081, -0.0680,
        -0.0256, -0.0691,  0.1452,  0.0056, -0.0347, -0.0589,  0.0089, -0.0368,
        -0.0553, -0.0285,  0.0034, -0.1187, -0.0118,  0.0925, -0.0649, -0.1343,
        -0.0442,  0.0399,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.1911, -0.1273, -0.0971, -0.0815, -0.0755, -0.1727, -0.0558,  0.0095,
         0.0072,  0.1138, -0.0091, -0.0198,  0.2499, -0.0078, -0.2443, -0.0631,
        -0.0051,  0.0341, -0.0650,  0.0399, -0.0166,  0.0359, -0.0327,  0.0496,
        -0.0731, -0.2469, -0.1503, -0.0724, -0.0400, -0.0471, -0.0344, -0.0015,
         0.0175, -0.0483, -0.0309,  0.0090, -0.0184, -0.0453, -0.0328, -0.0157,
        -0.0308, -0.0396, -0.0289,  0.0030, -0.0213,  0.0016, -0.0201, -0.0181,
        -0.0302, -0.0353,  0.0040,  0.0445], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0704, -0.2275,  0.0728,  0.0316, -0.1143, -0.0032, -0.0206, -0.0938,
         0.1032, -0.0161, -0.0164,  0.0095, -0.0279, -0.1070, -0.0964, -0.0705,
        -0.1807,  0.0191, -0.0039,  0.0781, -0.0311,  0.0018, -0.0731, -0.1542,
         0.0190,  0.0421,  0.0004, -0.0521, -0.0779,  0.0235, -0.0307,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1721,  0.2574, -0.0143, -0.0399, -0.0385, -0.0413,  0.0870,  0.1124,
         0.0730,  0.0312, -0.0109,  0.3064, -0.2885,  0.0585,  0.0458,  0.2091,
        -0.4697, -0.0406,  0.1384, -0.0627, -0.1203,  0.1187,  0.1368, -0.0106,
        -0.1790,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0065, -0.4524,  0.2553,  0.0011, -0.0785, -0.2722, -0.3066, -0.0443,
         0.2702, -0.2258, -0.0118, -0.2684,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1296, -0.4046, -0.0624, -0.0642, -0.0674, -0.2602,  0.1197,  0.0452,
        -0.0023, -0.0449, -0.1210,  0.1532,  0.0049, -0.1300,  0.0138, -0.1171,
        -0.0352, -0.0098,  0.1130, -0.0251,  0.0972, -0.1199,  0.1141, -0.2648,
         0.0646,  0.1476, -0.0078,  0.3605, -0.0673,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1276, -0.2888, -0.0084,  0.1721,  0.0734, -0.1645, -0.2831, -0.0450,
         0.4036, -0.4474, -0.0525, -0.1652,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1539, -0.3811,  0.1925, -0.1998, -0.0484, -0.0444, -0.2738, -0.0283,
        -0.1122, -0.0484, -0.1483,  0.1762,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9449e-01, -2.0386e-01, -3.2500e-01, -1.5646e-01, -1.4108e-01,
         3.0915e-01,  4.3093e-02, -1.4544e-01, -2.4836e-01,  3.0665e-02,
        -9.7206e-03, -1.9886e-01,  4.3023e-04,  5.5616e-02,  9.1065e-02,
        -1.0764e-01, -1.3922e-01, -7.1017e-02,  3.2029e-01,  1.0614e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0714, -0.2245, -0.0851, -0.0729, -0.4091, -0.1233, -0.1412,  0.0404,
        -0.0105, -0.0188, -0.0244, -0.0603, -0.0650, -0.0449,  0.0215,  0.0097,
        -0.0245,  0.0497,  0.0024, -0.0324, -0.0028,  0.0005, -0.0162, -0.0446,
        -0.0319,  0.1002,  0.0091, -0.0184, -0.0013,  0.0066,  0.0545,  0.0263,
        -0.0639, -0.1791,  0.0105,  0.1086,  0.2643,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2558, -1.1570,  0.0520,  0.0262,  0.1965, -0.0284,  0.0401,  0.5811,
        -0.1782,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5724, -0.1625, -0.3630, -0.2242, -0.0251, -0.1349, -0.1404, -0.1457,
        -0.1618, -0.0790,  0.0779,  0.1221, -0.0994, -0.2043, -0.1605, -0.0257,
        -0.1282,  0.0417, -0.0768, -0.1226, -0.1393,  0.0043,  0.0272,  0.1022,
        -0.1556,  0.0397, -0.1682,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0817, -0.1869, -0.1103, -0.0787, -0.0306, -0.0515, -0.0465,  0.0005,
        -0.0027,  0.0012,  0.0137, -0.0425, -0.0560, -0.0723,  0.0218,  0.0173,
        -0.0361,  0.0038, -0.0513,  0.1109,  0.0626, -0.0252, -0.0733, -0.0136,
        -0.0549,  0.0337, -0.0004, -0.0419, -0.3165,  0.0424,  0.0144,  0.0376,
         0.1447, -0.1468,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.3263, -0.1052,  0.0445, -0.0217, -0.0802, -0.0206,  0.0149, -0.0938,
        -0.0603, -0.0657, -0.1028,  0.0275,  0.0192, -0.0084, -0.0052,  0.0112,
        -0.1954,  0.1211,  0.0054,  0.0326,  0.0218, -0.1465, -0.0800,  0.0387,
        -0.1056, -0.0407, -0.0606,  0.0039,  0.0400, -0.0025,  0.0291, -0.0631,
        -0.2369,  0.0198, -0.0547, -0.0094, -0.1166,  0.0012, -0.0397, -0.0028,
        -0.0225, -0.2096, -0.0291, -0.0057,  0.0104,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4564e-02, -3.3429e-01, -1.0756e-02,  9.6432e-03, -2.0113e-01,
        -1.1659e-01, -1.1116e-01,  9.6345e-02, -9.3953e-02, -1.0816e-01,
        -2.3633e-01, -9.3888e-05, -1.2131e-01,  1.9121e-01, -6.2705e-02,
        -1.7739e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0523, -0.1245,  0.0834,  0.1114, -0.0384, -0.0237, -0.0789,  0.0506,
        -0.1002, -0.1199, -0.0704, -0.0766, -0.1234, -0.0572, -0.0644,  0.0112,
        -0.0278, -0.0108,  0.0104,  0.0015,  0.0143,  0.0212, -0.0198, -0.0656,
         0.0729,  0.1008,  0.1856,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1470,  0.3278,  0.0131, -0.0794, -0.0953,  0.0086, -0.0137, -0.0362,
        -0.0048, -0.0827, -0.0265, -0.0418, -0.0545, -0.0131,  0.0017, -0.0338,
        -0.0113,  0.0880, -0.0012, -0.0346,  0.0048, -0.0589, -0.0682, -0.1938,
         0.0393,  0.0096, -0.0437, -0.0969,  0.0091, -0.0533,  0.0406,  0.0212,
        -0.1563,  0.0213, -0.0083,  0.0314, -0.0265, -0.1121, -0.0456,  0.0701,
        -0.0103, -0.0760, -0.0601, -0.0518,  0.0084,  0.0141], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6417,  0.0104,  0.1030,  0.0421, -0.0175, -0.0717,  0.0115, -0.0023,
         0.0356, -0.0035,  0.0090, -0.0963, -0.0261, -0.0378, -0.0673, -0.0528,
        -0.0447, -0.0977,  0.1092, -0.5244, -0.1031, -0.0189, -0.0443, -0.0195,
        -0.0301, -0.0365, -0.0156,  0.1377,  0.1201,  0.1438,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2243,  0.0766, -0.0901,  0.0673, -0.1704, -0.2684, -0.5340, -0.6763,
        -0.1240, -0.0358,  0.1361,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0216, -0.4000, -0.1277, -0.0622, -0.0910, -0.0302, -0.0842, -0.2612,
         0.0453, -0.3008,  0.0246,  0.0170, -0.0205, -0.1715,  0.0287, -0.0643,
        -0.0319,  0.0344,  0.0958, -0.2306,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3089, -0.1299, -0.7054, -0.1386, -0.5024, -0.0789, -0.1290, -0.5759,
        -0.3768,  0.1155, -0.3568,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0619, -0.2928, -0.1037, -0.1837, -0.0004, -0.0125,  0.0132, -0.1085,
        -0.0557,  0.0123, -0.1314, -0.0835, -0.0651, -0.0197, -0.0309, -0.0705,
         0.0287, -0.0175, -0.0426,  0.0927, -0.0747,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2782, -0.1360, -0.0500, -0.0892,  0.1028, -0.7368, -0.0936, -0.1337,
        -0.0707,  0.1330, -0.0687, -0.1468,  0.0387, -0.0387, -0.1139,  0.0029,
         0.1008, -0.1060,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1930, -0.1303, -0.2203, -0.1046,  0.0793, -0.0201, -0.0208, -0.0460,
        -0.0054, -0.0020,  0.0811,  0.0470,  0.0143, -0.0328, -0.1184,  0.1008,
        -0.0228, -0.0949, -0.0105, -0.1120,  0.0130, -0.0502, -0.1451, -0.0285,
        -0.0316, -0.0360, -0.0232, -0.0202, -0.0055, -0.0942,  0.0049, -0.0275,
        -0.0032, -0.0014, -0.0105,  0.0088, -0.0200, -0.0477, -0.0063,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6023, -0.4467,  0.1953, -0.1314,  0.0471, -0.0279, -0.0344, -0.1308,
         0.0501,  0.1046,  0.0579, -0.1445, -0.0175,  0.0381, -0.0509, -0.0341,
        -0.0814,  0.0429,  0.0237, -0.0690, -0.0685, -0.0300,  0.0302,  0.0196,
         0.0150, -0.0247, -0.0607,  0.0489, -0.1553, -0.2402,  0.0176, -0.0524,
         0.0206,  0.0425,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.1899, -0.0662,  0.0519, -0.2048, -0.7143, -0.1385, -0.5751, -0.1296,
         0.3144,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4402, -0.3114,  0.0491,  0.0602, -0.1012, -0.0231, -0.0145, -0.1017,
        -0.0208, -0.0546, -0.0685, -0.0188, -0.0148, -0.0104,  0.0460, -0.0428,
        -0.0937, -0.4017, -0.1235,  0.0084,  0.0891, -0.0421,  0.0642,  0.0964,
        -0.0986, -0.0749,  0.0686, -0.0494, -0.0018,  0.0121, -0.0028, -0.0593,
         0.0146, -0.0175, -0.0818, -0.0942,  0.0346, -0.0527, -0.0075, -0.2383,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2691, -0.0240, -0.2394, -0.0989, -0.0740, -0.0200, -0.0608,  0.0651,
        -0.0360, -0.1280,  0.0254, -0.0060, -0.0158, -0.0070,  0.0369,  0.0294,
         0.0115, -0.1356, -0.0646, -0.0179,  0.0215, -0.0191,  0.0491,  0.0019,
        -0.0192, -0.0127, -0.1061,  0.1176, -0.0900,  0.0052,  0.1006, -0.1709,
        -0.0005, -0.0211, -0.0493, -0.0896, -0.0654, -0.0969, -0.0056,  0.0182,
         0.0651,  0.0088, -0.0318, -0.0159, -0.0388, -0.0145, -0.0173, -0.0031,
        -0.0149, -0.0782, -0.0120,  0.0375,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2160, -0.2750, -0.1027,  0.1273, -0.0236, -0.0762, -0.0530,  0.0578,
         0.0791,  0.0424,  0.0149,  0.1244, -0.0949, -0.0359, -0.1085, -0.0869,
        -0.0515, -0.0360, -0.1175, -0.1052,  0.2614,  0.2495,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3540e-01, -9.7926e-01, -6.1941e-02, -1.8911e-01, -5.8436e-02,
         1.3610e-03, -6.3196e-02, -6.7921e-02,  9.2890e-02,  6.5765e-02,
         9.5482e-04,  2.1286e-02, -9.0967e-02, -2.0775e-02,  4.1983e-03,
         1.7215e-02, -5.7133e-03, -9.7874e-03,  7.9245e-02, -1.1567e-01,
         3.1903e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.8559e-02, -3.7693e-02,  4.8611e-02, -7.2699e-02,  3.3696e-02,
         3.2492e-04, -5.7446e-02, -3.5099e-02,  9.5722e-03, -3.4577e-02,
        -9.9029e-03, -1.6801e-02, -1.7975e-02,  3.8199e-02,  4.2768e-03,
        -2.2118e-02, -1.3142e-02, -4.5653e-02, -5.0819e-02,  7.1814e-03,
         5.3964e-03, -1.4872e-03,  1.1910e-02,  1.3709e-02,  7.0564e-03,
         5.9259e-03, -1.7828e-02,  1.9660e-04, -5.3734e-02,  6.3910e-03,
        -1.0060e-02, -4.7998e-02, -1.5572e-02,  2.1283e-03, -1.3099e-02,
         1.0040e-03, -1.8920e-02,  4.2572e-02,  5.1938e-02, -5.5904e-02,
        -4.2220e-01, -3.4012e-02, -8.0163e-02, -5.5445e-02, -2.0540e-02,
        -2.1596e-02, -3.4537e-02, -9.1386e-02,  3.4427e-02, -2.8121e-03,
        -5.2303e-02, -1.3713e-02,  5.5213e-02, -1.6384e-02, -7.3391e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2583, -0.1377, -0.2001, -0.0808, -0.0067, -0.0621,  0.0601,  0.0590,
        -0.0622,  0.0407,  0.0296,  0.0603, -0.1263, -0.0443, -0.0919,  0.0167,
         0.0311, -0.0064, -0.0482,  0.0505, -0.0710, -0.0733, -0.0618, -0.0066,
         0.0358, -0.0967, -0.0747, -0.1714, -0.0573,  0.0352,  0.0989, -0.0384,
        -0.0763, -0.1705,  0.1012, -0.0025, -0.0458,  0.0139, -0.0213,  0.0306,
        -0.0862, -0.0062, -0.1043,  0.0377,  0.1988,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1517, -0.1392, -0.2704, -0.1110,  0.0317, -0.0522, -0.1030,  0.0837,
         0.0463,  0.1530,  0.0189, -0.1388,  0.0409,  0.0100, -0.0477,  0.0617,
         0.0269, -0.2055,  0.0095, -0.0165, -0.2369, -0.0366, -0.0675, -0.0431,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0486,  0.0076, -0.1360,  0.0079,  0.0291, -0.1269, -0.0300, -0.0944,
         0.0196,  0.0387, -0.0147,  0.0110, -0.1042, -0.0755, -0.0439,  0.0038,
        -0.0450, -0.0475,  0.0025, -0.0553, -0.0348,  0.0244, -0.0239, -0.1553,
         0.0041, -0.0301, -0.0941,  0.0051, -0.0296, -0.0459,  0.0448, -0.1780,
        -0.2482,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0376, -0.5720, -0.1225, -0.1748, -0.0748, -0.2954, -0.1113,  0.0855,
        -0.1567,  0.0198, -0.0700,  0.0122,  0.0534, -0.0055, -0.0022, -0.0394,
        -0.0158,  0.0434,  0.0695,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1579, -0.2818, -0.1407,  0.1267, -0.2522, -0.0113, -0.1576, -0.0123,
        -0.1406,  0.0760, -0.0166, -0.0962, -0.0298, -0.0713,  0.0977,  0.0349,
         0.0088,  0.0038, -0.1230, -0.0949, -0.0408, -0.2963, -0.0659, -0.1608,
         0.5407,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2343, -0.0360, -0.0704,  0.0041, -0.0774, -0.2020, -0.5814, -0.0674,
        -0.0307,  0.0131,  0.0079, -0.1222, -0.0688, -0.1002, -0.0752,  0.0511,
        -0.1544,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0507, -0.0587, -0.0434, -0.0858, -0.3489, -0.1360, -0.0430, -0.1851,
         0.0590, -0.1350,  0.0040, -0.0374, -0.1042,  0.0148, -0.0132,  0.0802,
        -0.0706, -0.1612,  0.0069, -0.0011,  0.0203, -0.0836, -0.2165, -0.0035,
         0.0403, -0.1063,  0.0554, -0.1649,  0.0202, -0.0324, -0.1028, -0.4486,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1301, -0.3374, -0.0761, -0.0282,  0.0697, -0.0647, -0.1018, -0.2894,
        -0.0458,  0.0136, -0.0263, -0.0202,  0.4258, -0.0195, -0.0238,  0.0291,
        -0.1490, -0.0479, -0.0010,  0.0791,  0.0291, -0.0371, -0.0767, -0.3184,
         0.1420, -0.0080,  0.0407,  0.1712,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0189, -0.5160, -0.0066, -0.0349, -0.0620, -0.1066, -0.2934, -0.0847,
        -0.1412, -0.1532, -0.0291, -0.0830, -0.0448,  0.0010, -0.0752, -0.0788,
        -0.0832, -0.0556, -0.0302, -0.0530, -0.0807, -0.0979,  0.0999, -0.0856,
        -0.0953,  0.0546,  0.1202, -0.2969,  0.2378,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2662,  1.4751, -1.4925,  0.6853,  0.0511,  2.0936,  3.4486,  1.2238,
        -0.0820,  0.7784,  1.4304,  0.8191,  1.0152,  0.5933,  0.0730, -0.8008,
        -0.4832, -0.8362,  0.0381, -0.3924,  0.3524, -0.4492, -0.7320, -0.2139,
         0.1891,  0.1827, -0.7123, -0.2260,  1.0078,  0.1359,  0.6586,  1.0259,
         5.0282,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6539, -0.1211, -0.1646,  0.1243, -0.0073, -0.1924, -0.0357, -0.0323,
        -0.0263, -0.1323, -0.0160, -0.0264, -0.0231, -0.0554, -0.0931,  0.0323,
        -0.0826, -0.0225, -0.1321, -0.0217, -0.1998, -0.0366, -0.0482,  0.0545,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0655, -0.2309,  0.0363, -0.0880, -0.0770, -0.2206, -0.1786, -0.1252,
        -0.1121, -0.0711, -0.0430,  0.0520, -0.0851,  0.0111,  0.1188,  0.0642,
        -0.0029, -0.0660, -0.0420, -0.3540, -0.0276, -0.0638, -0.0281,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1272e-03, -1.2761e-01, -5.3618e-02,  9.0782e-03, -3.7983e-02,
        -1.3150e-01, -4.9332e-02,  1.1161e-01,  2.1429e-04, -5.6690e-02,
         2.1987e-01, -4.3082e-02, -6.5263e-02, -5.1251e-02,  1.0713e-02,
         1.8508e-03, -7.7828e-02, -5.3553e-02, -1.5501e-01, -9.8727e-02,
        -5.6171e-02, -7.7020e-02,  3.3387e-02, -1.5730e-01, -7.2346e-02,
        -4.5573e-02, -1.6519e-02, -1.0346e-01, -1.2963e-02, -2.8146e-02,
        -5.6874e-02, -2.2808e-02, -3.5683e-02,  1.1712e-01, -3.5168e-02,
        -7.9707e-02, -2.1767e-02, -9.2416e-02, -1.6780e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2228e-01, -3.6244e-01, -1.9409e-01, -5.9821e-02, -1.6804e-01,
         3.3359e-05, -2.4959e-02,  7.1073e-02, -1.5497e-02,  9.5371e-02,
        -6.3975e-02, -1.9805e-02,  3.3486e-02, -7.6556e-02, -3.7676e-02,
        -8.4912e-02, -2.0466e-01, -1.2644e-02, -1.6267e-02, -1.9288e-02,
        -2.4506e-02, -1.4102e-02, -1.4852e-02,  1.1105e-02, -8.7585e-02,
         4.5857e-02, -4.7455e-02,  1.2894e-02, -5.8867e-02, -1.2007e-02,
        -7.4354e-02,  6.5323e-04,  3.9083e-02, -8.0629e-03, -2.0892e-02,
        -1.2087e-02, -3.2232e-02, -8.1199e-02, -3.0610e-02, -5.0747e-02,
        -1.1778e-03, -6.5942e-02, -1.1401e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1024, -0.2537,  0.0501,  0.0228,  0.0012, -0.0068, -0.0145, -0.0173,
        -0.1166,  0.1203,  0.0130, -0.0040, -0.0552, -0.0091,  0.0004,  0.0103,
        -0.0573, -0.0826,  0.0134,  0.0096,  0.0343,  0.0030, -0.0118, -0.0085,
        -0.0156, -0.0259,  0.0259,  0.0051,  0.0116, -0.0095, -0.0203,  0.0656,
         0.0681, -0.0065, -0.0352,  0.0664, -0.0185, -0.0040, -0.0429, -0.0112,
        -0.0513, -0.0563,  0.0003, -0.0555, -0.0039, -0.0024,  0.0006,  0.0206,
         0.0302,  0.0120,  0.0728,  0.0188, -0.0568, -0.0113,  0.0492,  0.1129,
         0.0290], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0387, -0.5975, -0.1814, -0.0102, -0.0413, -0.1487, -0.1622, -0.1248,
         0.0382, -0.0712, -0.2616, -0.1775, -0.2340,  0.0982,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2118, -0.1172, -0.1737, -0.0222,  0.0246, -0.1000, -0.0636,  0.0035,
        -0.0167, -0.0125, -0.0164, -0.2514, -0.1521,  0.0162, -0.0367,  0.0365,
        -0.0219, -0.0541,  0.0309, -0.0068, -0.1153, -0.0957, -0.0210, -0.0042,
         0.0272,  0.0613,  0.0044, -0.0426,  0.0978, -0.0479,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2304, -0.1258,  0.1174,  0.1795,  0.1049,  0.0019,  0.1853, -0.0539,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.1059, -0.0492, -0.0178, -0.0841, -0.0511, -0.1603, -0.0207,  0.0019,
         0.0284, -0.0300, -0.0068, -0.0762, -0.0420, -0.1186, -0.0294,  0.0108,
         0.0381, -0.0131, -0.0472, -0.0367,  0.0419,  0.0299, -0.0605, -0.0767,
        -0.0836, -0.0350, -0.1081, -0.0502, -0.0662, -0.0778,  0.0618, -0.0789,
        -0.0077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0393, -0.1036, -0.0987,  0.0663, -0.0605, -0.0137,  0.0204, -0.0688,
        -0.0127, -0.0791,  0.0040,  0.0015, -0.0312, -0.0596,  0.0594, -0.0010,
        -0.0084,  0.0141, -0.0165,  0.0281,  0.0211, -0.0039, -0.0086,  0.0161,
         0.0215, -0.0326, -0.0073, -0.0963, -0.0050,  0.0104, -0.0052, -0.0122,
        -0.0249, -0.1710,  0.0283, -0.0326, -0.0347,  0.0499, -0.0138, -0.0492,
         0.0054,  0.0005, -0.0838, -0.1380, -0.0099, -0.0147, -0.0407, -0.0443,
        -0.0441, -0.0203, -0.0540, -0.0463, -0.0201, -0.0079,  0.0078,  0.0066,
         0.0109, -0.0475, -0.0318, -0.0475, -0.0489, -0.0038, -0.1698, -0.0123,
         0.0063, -0.0202,  0.0209,  0.0488, -0.0087,  0.0112, -0.1080],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4947, -0.0498,  0.0102,  0.0660, -0.1149, -0.0226, -0.4595, -0.0539,
        -0.0342,  0.0381, -0.0052, -0.0429, -0.0761,  0.0007,  0.0211, -0.0779,
         0.0037,  0.0209,  0.0583, -0.1208,  0.0258,  0.0342,  0.1288, -0.0552,
         0.0287,  0.0096, -0.0540, -0.0348,  0.0888, -0.0336,  0.0539,  0.0714,
        -0.0438, -0.0178, -0.1459, -0.0887, -0.0080, -0.0124, -0.0271,  0.0276,
         0.0093, -0.1467,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0934, -0.3442, -0.0215,  0.0237, -0.0132,  0.0666, -0.0168, -0.0074,
        -0.0785, -0.0610, -0.0514, -0.0553, -0.1301, -0.0307,  0.0172,  0.0558,
         0.0754, -0.0282, -0.1398, -0.0168, -0.0284, -0.0162, -0.0108, -0.0367,
        -0.0021, -0.0532, -0.0553, -0.0451, -0.1158,  0.0335, -0.0282, -0.0218,
        -0.0106,  0.0293, -0.0180,  0.1582,  0.0757,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0838, -0.6809,  0.0227, -0.1095,  0.1579,  0.1036, -0.2122, -0.0307,
         0.1568, -0.1219, -0.0905, -0.1572, -0.0689,  0.1800,  0.0887,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3918, -0.3801, -0.0665, -0.1036, -0.0731, -0.0457, -0.0059, -0.0011,
        -0.1907, -0.0412, -0.0835,  0.0995, -0.2101,  0.0426, -0.0082,  0.0645,
        -0.0025,  0.0602,  0.2241,  0.0753,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0457, -0.1285, -0.0372, -0.0920, -0.1745, -0.0488, -0.0558, -0.0352,
        -0.0696, -0.0795, -0.0253, -0.0463, -0.0220,  0.0139, -0.0514, -0.0141,
        -0.0351, -0.0421,  0.1044, -0.0563, -0.0141, -0.0081, -0.0007,  0.0251,
        -0.0981, -0.0701, -0.0597,  0.0686, -0.0153, -0.0610, -0.0634,  0.0222,
         0.0039, -0.0097, -0.0134, -0.0324, -0.0342, -0.0203, -0.0582, -0.1116,
         0.0172,  0.0117,  0.2591,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2067, -0.1752,  0.0199, -0.0984,  0.0102, -0.0666, -0.0497, -0.2499,
        -0.0146, -0.0640,  0.0415,  0.0223, -0.0608,  0.0247, -0.0686, -0.0411,
         0.0091, -0.0450, -0.0276, -0.0366, -0.0625, -0.0443, -0.0405, -0.0469,
        -0.0318,  0.0449,  0.0491, -0.0286, -0.0799,  0.0026, -0.0269, -0.0678,
        -0.0034, -0.0228, -0.0615,  0.0142, -0.0662,  0.1961,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0357, -0.2268, -0.2024, -0.4626,  0.0633, -0.0921,  0.0466, -0.0136,
        -0.0120,  0.0098, -0.0127, -0.0173, -0.0206, -0.0088, -0.0158, -0.0252,
        -0.0892, -0.0945,  0.0010, -0.0266,  0.0740, -0.0644,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2347, -0.1670, -0.1105, -0.0607, -0.0800, -0.0283, -0.0767, -0.0580,
        -0.2414, -0.2206, -0.0044,  0.0005, -0.0153,  0.0024, -0.0144, -0.0462,
        -0.1691, -0.0288, -0.0808, -0.0434,  0.1052,  0.0601, -0.0605, -0.0787,
        -0.0609, -0.0197, -0.0445,  0.0217, -0.0129, -0.0438,  0.1097, -0.0030,
         0.0912, -0.0942, -0.0600, -0.1280,  0.0547, -0.0554, -0.0182,  0.0003,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1516, -0.2525, -0.0128, -0.0201, -0.0288, -0.1293, -0.1100,  0.0689,
        -0.0226, -0.1492,  0.0219, -0.0836, -0.0723, -0.0928, -0.0682, -0.0315,
        -0.1131,  0.0361, -0.2027,  0.0093, -0.0163,  0.0121,  0.0362, -0.1101,
        -0.0699, -0.1156, -0.0128, -0.0874, -0.0825, -0.1209,  0.0124,  0.0688,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6101e-01, -5.7441e-01,  4.9360e-02,  7.1969e-02, -2.6967e-02,
        -2.8172e-02, -4.8959e-03, -7.1386e-02, -1.9361e-01,  2.9233e-04,
         2.0621e-03, -3.4711e-02,  3.3393e-02,  3.8788e-03, -2.0431e-02,
        -8.9764e-03, -4.0722e-03, -8.8984e-03,  1.3203e-02, -2.2376e-02,
        -4.2961e-02,  2.2574e-02,  2.6657e-02, -1.6527e-01, -4.7683e-03,
         8.9176e-02, -6.3319e-02, -3.3839e-02,  4.7828e-02, -1.0722e-02,
         4.0095e-02, -1.0253e-02,  3.8144e-03,  4.4604e-02,  2.2119e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.2046, -0.1532, -0.0692, -0.1624,  0.0954, -0.0472, -0.0852, -0.1367,
        -0.1197, -0.1511, -0.0583, -0.0469,  0.0370, -0.0736, -0.0649,  0.0196,
         0.0008, -0.0367, -0.0244, -0.0190,  0.0699,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1703, -0.6588,  0.0857, -0.1772, -0.0357, -0.1285, -0.0318, -0.1163,
        -0.1911, -0.0884,  0.0249, -0.0054, -0.0608,  0.1015, -0.0343,  0.0387,
        -0.0453,  0.0726, -0.0669,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6122, -0.1905, -0.0946,  0.2629, -0.0751, -0.0914, -0.4214, -0.0263,
        -0.1510, -0.3974, -0.3549, -0.0157, -0.0310,  0.0088,  0.0399, -0.2326,
        -0.0117, -0.0577, -0.0096, -0.1429,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1089, -0.7043, -0.1201, -0.0911, -0.2225,  0.0346, -0.2402,  0.0668,
         0.1645,  0.0190,  0.0225, -0.0580, -0.0963, -0.0454, -0.0783, -0.0594,
        -0.1358, -0.1348,  0.0369, -0.0133, -0.0884,  0.1145, -0.2037,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1138, -0.4616, -0.1833, -0.0974,  0.0450, -0.0098,  0.0683,  0.0069,
         0.0345,  0.0940,  0.0432, -0.0625, -0.0434, -0.3732,  0.0138,  0.0599,
        -0.2091, -0.0048, -0.0564,  0.0296, -0.1138, -0.0172, -0.0836,  0.1507,
        -0.2127,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1624, -0.6906,  0.0033,  0.0201,  0.0193, -0.0119,  0.0834, -0.0660,
         0.0312,  0.0371, -0.1531, -0.0227, -0.0277, -0.1243,  0.1948, -0.0236,
        -0.0202, -0.0155, -0.1024,  0.0064, -0.0103,  0.0121, -0.0482, -0.0700,
        -0.0333,  0.0266,  0.0913,  0.0395, -0.0180,  0.0013, -0.0830,  0.0464,
        -0.0748, -0.1655,  0.2206,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0338,  0.0550, -0.1293, -0.1068, -0.0919, -0.0760, -0.0516,  0.0283,
        -0.1932,  0.0138, -0.0643, -0.0463, -0.0759, -0.1109, -0.1803, -0.0689,
        -0.0700, -0.0422, -0.1374, -0.0771, -0.0367, -0.0941,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5577,  0.1453,  0.1802, -0.0873,  0.0330,  0.2158, -0.0401, -0.0434,
        -0.1352,  0.0965,  0.4149, -0.0217, -0.1947, -0.0429,  0.4189,  0.1240,
         0.1101, -0.0218,  0.1309,  0.1427,  0.3563,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0443, -0.3471,  0.0069,  0.0858,  0.0200,  0.0432, -0.0843, -0.1607,
        -0.0421, -0.0669, -0.0403,  0.0364, -0.1049,  0.0050, -0.0032, -0.1788,
         0.0850,  0.1409,  0.0597,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2081, -0.2689, -0.1087, -0.2409, -0.1010, -0.1473, -0.0429,  0.0663,
         0.0153, -0.0436,  0.0892, -0.0418, -0.0208, -0.0472, -0.0109,  0.0518,
         0.0838, -0.0966, -0.0967,  0.0173,  0.0143, -0.0199, -0.0585,  0.0623,
        -0.2489, -0.2345,  0.1651, -0.0827,  0.0634, -0.1843, -0.0921,  0.0293,
        -0.1046, -0.1722, -0.0119,  0.0191, -0.1265,  0.0501, -0.0854, -0.0606,
         0.0733, -0.1065, -0.0185,  0.0189, -0.0241,  0.4559, -0.1058],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1002, -0.1901, -0.1268,  0.0162,  0.0072, -0.0661,  0.0157,  0.2306,
        -0.0524, -0.0566,  0.0570,  0.0865,  0.0209, -0.0340,  0.0807, -0.1413,
        -0.2284,  0.0215, -0.0487,  0.0234, -0.1551,  0.0220,  0.0560, -0.1749,
         0.1667, -0.1285, -0.3050,  0.1368,  0.0163,  0.1951, -0.0942,  0.0192,
        -0.0858, -0.1051,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2230, -0.3037,  0.0608,  0.0204,  0.0238, -0.0363,  0.0237, -0.0346,
        -0.0248, -0.0334, -0.0033,  0.0183, -0.0887, -0.1758,  0.0298,  0.1082,
        -0.0254, -0.0598, -0.1585, -0.1415,  0.0256, -0.0236, -0.0007, -0.0431,
         0.0304, -0.0728, -0.0272, -0.0405, -0.0092, -0.0622,  0.0431, -0.0752,
        -0.0660, -0.0943,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.1117, -0.1759,  0.0222, -0.0015, -0.0214, -0.0353,  0.1499, -0.0068,
         0.0891, -0.1230, -0.0705,  0.0242, -0.0402, -0.3144, -0.0973, -0.0454,
         0.0659, -0.1401,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4244, -0.0375,  0.1302, -0.0251,  0.0666, -0.0158, -0.0062, -0.1350,
        -0.0090,  0.3732,  0.0598, -0.0517,  0.1403, -0.0176, -0.0460,  0.0359,
        -0.0605, -0.1605, -0.1419,  0.1108,  0.0117,  0.1184,  0.0356,  0.0532,
         0.0910,  0.0246,  0.2141,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4103,  0.1161,  0.2366,  0.0734,  0.0617,  0.2136,  0.1011,  0.1297,
         0.1829,  0.1454,  0.0190, -0.0123,  0.1451,  0.0427,  0.0032, -0.0770,
         0.0193,  0.0046,  0.0307,  0.3847,  0.0755,  0.0042, -0.0019, -0.0545,
         0.1765, -0.0436,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0929, -0.3049,  0.0286, -0.0038, -0.1522, -0.2305, -0.0975,  0.0056,
        -0.0481, -0.0363, -0.1314,  0.0248, -0.0283,  0.0214, -0.0023, -0.0667,
        -0.0205, -0.0493, -0.0131,  0.2408, -0.0069,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1092, -0.1402, -0.1671, -0.4949, -0.0792,  0.1687,  0.0880, -0.0209,
        -0.0318,  0.1369, -0.0753, -0.0055,  0.0221,  0.0203,  0.2126,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2919,  0.0754,  0.0316,  0.0652, -0.1699,  0.1001,  0.0350,  0.0713,
         0.1285, -0.0253,  0.0561,  0.0552,  0.1560,  0.0695, -0.0028, -0.0036,
        -0.1254,  0.0396,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3269, -0.1671, -0.0122, -0.3975, -0.2295,  0.0083,  0.0541, -0.1092,
        -0.0441, -0.0700, -0.0377, -0.0295,  0.1461, -0.0372, -0.0626, -0.1520,
        -0.0936,  0.1516,  0.2297,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2166, -0.1212, -0.0422,  0.0179, -0.1215, -0.0536,  0.1264,  0.0374,
        -0.0358, -0.0099, -0.0279,  0.0271,  0.0046, -0.0045,  0.0058,  0.0010,
        -0.0556,  0.0451, -0.0057, -0.1956, -0.1304, -0.1045,  0.0112,  0.1085,
         0.0515, -0.0626, -0.0430, -0.0247, -0.0035, -0.0303,  0.0039,  0.0147,
        -0.0740, -0.0408, -0.0088, -0.0346, -0.1364,  0.0164, -0.0176,  0.0487,
        -0.0468,  0.1954, -0.1039, -0.0135], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0065, -0.0739,  0.0067, -0.1017, -0.0650, -0.0136, -0.0426,  0.0150,
        -0.1806,  0.0377, -0.0602, -0.0226,  0.1080, -0.0346,  0.0978, -0.0416,
         0.0471, -0.2869, -0.0442, -0.0047, -0.0469, -0.0302, -0.0064, -0.0086,
        -0.0139,  0.0061, -0.0263, -0.1034,  0.1446, -0.0935, -0.0729, -0.0373,
        -0.0373,  0.2029, -0.2135,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2115, -0.4693, -0.0578, -0.0176,  0.0330, -0.0155, -0.0145, -0.1577,
        -0.1141, -0.0619, -0.0300, -0.0808, -0.1100, -0.0512, -0.1205, -0.1675,
         0.0540,  0.0062,  0.1230, -0.0636, -0.0267, -0.0500,  0.1109, -0.1364,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0525, -0.1305, -0.0977, -0.0744, -0.0262, -0.0393,  0.0421, -0.0542,
        -0.0344, -0.0401, -0.0709, -0.0649,  0.0510, -0.0468, -0.0813,  0.0286,
         0.0512, -0.0215, -0.1150,  0.0606, -0.0036, -0.0074, -0.0273, -0.0580,
         0.0081, -0.0447, -0.0695, -0.0169,  0.0008, -0.0127, -0.1003, -0.0344,
        -0.0622, -0.1150,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4060, -0.4227,  0.0426,  0.1039,  0.0526, -0.0497, -0.0569,  0.0154,
         0.0152, -0.0203,  0.0077,  0.0883, -0.0676, -0.0290, -0.0374, -0.3404,
        -0.0346,  0.0482, -0.0459,  0.1988,  0.0312, -0.0102, -0.1158, -0.0090,
        -0.0119, -0.0900,  0.0396,  0.0799, -0.1833,  0.2160,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.3967, -0.1154, -0.1349, -0.2371,  0.0885, -0.1037, -0.1971, -0.0786,
         0.0094, -0.0525,  0.0692, -0.1929, -0.3971,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3507, -0.0715,  0.1974, -0.0463,  0.0443, -0.0805,  0.0119, -0.0027,
        -0.0906,  0.0491, -0.0593, -0.0110,  0.1057, -0.0432, -0.0692,  0.0784,
        -0.1062, -0.1346,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5358, -0.0582, -0.0732, -0.2007, -0.1063,  0.1611, -0.4703, -0.1587,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0963, -0.0943, -0.0511, -0.0421, -0.0425, -0.0249,  0.0017, -0.0475,
         0.0628,  0.0246, -0.0385, -0.0333, -0.0683,  0.0212, -0.0309, -0.0261,
        -0.0104,  0.0072, -0.0357,  0.0254, -0.0342,  0.0567, -0.0815, -0.0334,
        -0.0548, -0.0360, -0.1742, -0.0355, -0.2289, -0.0220, -0.0257, -0.0910,
         0.0115, -0.2190,  0.0122, -0.0158, -0.0600, -0.0636,  0.0343,  0.0092,
        -0.0408, -0.0763, -0.0205,  0.0426,  0.0544,  0.0325, -0.0485],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0358, -0.1762, -0.5881, -0.2670,  0.2343,  0.0029,  0.2668,  0.0513,
        -0.3051, -0.0797, -0.0562,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4136, -0.2853, -0.1438,  0.0078, -0.1261, -0.0426, -0.1137, -0.2463,
         0.0763, -0.1787, -0.0846, -0.1751, -0.0359, -0.0314,  0.2561, -0.0588,
         0.1281,  0.1214,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9229, -0.5729, -0.2371, -0.2266, -0.0570, -0.0400, -0.1504,  0.0208,
        -0.1096, -0.0336,  0.0078,  0.1895,  0.3183,  0.3187,  0.0478,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0304, -0.3257, -0.1620, -0.0853, -0.0756,  0.0627, -0.2506,  0.0083,
        -0.0207, -0.2398, -0.0198,  0.1349,  0.0574, -0.0660,  0.0936,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2276, -0.2350,  0.0384, -0.0469,  0.0042,  0.0162,  0.0464, -0.0611,
        -0.2534, -0.1212,  0.0492, -0.0695, -0.1593, -0.0615, -0.0490,  0.1229,
        -0.1109,  0.0604,  0.0093,  0.0610, -0.0600, -0.0197,  0.0446, -0.0130,
         0.0387,  0.1541,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2945, -0.7764,  0.0215, -0.0013,  0.0694,  0.1485, -0.1390, -0.2459,
        -0.4020, -0.0487, -0.0068,  0.0589,  0.0641,  0.0948,  0.0397,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2059, -0.1965, -0.0404, -0.0713,  0.0011, -0.0209, -0.0557,  0.0241,
        -0.0006, -0.2355,  0.0599, -0.0745, -0.0673, -0.0342,  0.1128, -0.0151,
        -0.0181,  0.0298, -0.0214,  0.0107, -0.0296,  0.0708, -0.0593,  0.0515,
        -0.0685,  0.0933,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0715, -0.1221, -0.0029, -0.0097, -0.0626, -0.0777, -0.0129, -0.0927,
         0.0149, -0.1076, -0.2759, -0.0223, -0.0025,  0.0799, -0.0791, -0.0477,
         0.0661, -0.0714, -0.0698, -0.0182,  0.0843, -0.0135,  0.0214, -0.0096,
         0.2723, -0.0287,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.3377, -0.3804, -0.1030, -0.0121, -0.0887,  0.0216, -0.0300, -0.0543,
         0.0243, -0.0511,  0.0032, -0.0019, -0.1378, -0.0619, -0.0508,  0.0160,
        -0.1058,  0.0670, -0.1579,  0.0049,  0.0109, -0.0396, -0.0515, -0.0451,
        -0.0330, -0.0713, -0.0781,  0.0205,  0.0373,  0.0538,  0.0088, -0.0714,
        -0.0032, -0.0418,  0.0316, -0.1066,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0760, -0.1290, -0.1579, -0.0953,  0.0053,  0.0237, -0.0326, -0.1084,
        -0.0023,  0.0836, -0.0315,  0.0519, -0.0603,  0.0286,  0.0212, -0.0100,
         0.0486, -0.0260,  0.0653, -0.1126,  0.0887, -0.0630, -0.0360,  0.0061,
        -0.0617, -0.0584,  0.0059, -0.0217, -0.0904,  0.1011, -0.0741, -0.0722,
        -0.0807, -0.0220,  0.0762, -0.1954,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0117,  0.4054,  0.2040,  0.1828, -0.1815,  0.0329,  0.0380,  0.0521,
         0.1040,  0.0873,  0.1252,  0.0012,  0.0458,  0.0388, -0.2048, -0.0049,
         0.0205,  0.0759,  0.0682,  0.0358,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3685, -0.1298, -0.0025, -0.0841,  0.0223, -0.1578, -0.0124, -0.0047,
         0.0199, -0.0654, -0.0008, -0.0736,  0.0007,  0.0613, -0.0649,  0.0168,
         0.1357, -0.0754, -0.0224, -0.0132,  0.0328, -0.0311, -0.0216, -0.0786,
         0.0144, -0.0049, -0.0166,  0.2008, -0.0930,  0.0233, -0.0177, -0.0578,
        -0.0275, -0.1788, -0.0012,  0.1439, -0.0160,  0.0281,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2171, -0.0749, -0.0097, -0.1724,  0.0147, -0.0044, -0.0218, -0.0127,
        -0.0489, -0.0417, -0.0990, -0.2106, -0.0666,  0.0637, -0.0876, -0.0415,
         0.0511, -0.0926, -0.0715, -0.0176, -0.0341,  0.0063,  0.0510,  0.0162,
         0.0463, -0.0148,  0.0230,  0.0025, -0.1404,  0.0729, -0.0052,  0.0613,
         0.0419, -0.0176, -0.0201,  0.1028,  0.0563, -0.0665, -0.0396,  0.1390,
         0.0078], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4853,  0.9477, -0.0499,  0.0618,  0.1162,  0.1686, -0.1326, -0.1276,
         0.0523, -0.0259, -0.0034,  0.0640,  0.1139,  0.1576,  0.0543,  0.0120,
         0.1374,  0.0749, -0.0447,  0.1940, -0.1194,  0.0834,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1192, -0.1631, -0.0547, -0.0296, -0.0810, -0.0429, -0.0113, -0.1362,
        -0.0596, -0.0863,  0.0454, -0.0564, -0.0065, -0.0407,  0.0158, -0.0185,
        -0.0368,  0.0424, -0.0458, -0.0378, -0.2804, -0.0115, -0.0925, -0.0271,
         0.0069, -0.0175,  0.0203, -0.0751, -0.2182, -0.0103, -0.0203, -0.0357,
        -0.0754,  0.2145, -0.0044,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1538, -0.1244, -0.1148, -0.1420, -0.3495,  0.1733, -0.0624,  0.0384,
        -0.1947,  0.0210, -0.0880, -0.1025,  0.0005, -0.0088, -0.0396, -0.0050,
         0.0144, -0.0323, -0.1212, -0.0892, -0.0361, -0.0919, -0.0026, -0.0443,
        -0.0619, -0.0801, -0.0821, -0.0188, -0.0225,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0943, -0.2895, -0.0396, -0.0445, -0.0630, -0.0969, -0.0544,  0.2279,
        -0.0325, -0.0453, -0.0649, -0.0910,  0.0254,  0.0215,  0.0045, -0.0168,
        -0.0628,  0.0082, -0.1126, -0.2969, -0.1419, -0.2006,  0.0381, -0.4544,
         0.0156,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1163,  0.0040, -0.0897,  0.0128, -0.1392, -0.1035, -0.0732, -0.0034,
        -0.1020, -0.0915, -0.0744, -0.0550, -0.0489,  0.1894, -0.1344, -0.2229,
         0.0833, -0.0517, -0.0262,  0.0409, -0.0266,  0.0091, -0.0452,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0617,  0.0011, -0.0271, -0.0168, -0.1590, -0.0145,  0.0891, -0.1351,
        -0.0791, -0.0388, -0.0658,  0.0280, -0.0633, -0.0689, -0.0227, -0.1377,
        -0.0372, -0.0417, -0.1349, -0.0694,  0.0096, -0.0986, -0.0022,  0.0320,
         0.0123,  0.0499, -0.2522, -0.0085, -0.0015, -0.0290, -0.0868, -0.0254,
         0.0044, -0.0616,  0.0062,  0.0624, -0.0245,  0.0738, -0.0435,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2062, -0.3223,  0.0956, -0.3107, -0.5347, -0.1652,  0.0532, -0.0592,
        -0.1223,  0.0794, -0.0064, -0.0835,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.2645, -0.1561, -0.5507, -0.0503, -0.0438, -0.0905, -0.0797, -0.0465,
        -0.0464, -0.0743, -0.0615,  0.0098, -0.0498, -0.0471, -0.0688, -0.1257,
        -0.0902, -0.1105, -0.0195, -0.1064, -0.0611, -0.0144, -0.0314, -0.0047,
         0.0397, -0.0299,  0.0525,  0.0524,  0.0186, -0.0040, -0.0791, -0.0651,
        -0.0307, -0.0188, -0.0936, -0.1824, -0.0480, -0.0314, -0.0211, -0.0200,
         0.0498,  0.0887,  0.1575,  0.0779,  0.0074,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3415, -0.5732,  0.1072, -0.1104, -0.1150,  0.0172, -0.0354, -0.0034,
        -0.0763, -0.0607, -0.0870,  0.0620,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2508, -0.7067,  0.3416,  0.0877,  0.0749,  0.0239, -0.0638,  0.0850,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2021e-01,  6.0844e-03,  5.9146e-02,  1.8679e-01,  2.1036e-02,
        -1.1214e-01, -1.7232e-02, -4.7279e-02, -6.0784e-02, -3.9290e-01,
        -3.5214e-02, -1.0308e-01,  3.3080e-02, -3.1587e-02,  2.4463e-02,
         1.6169e-02, -2.3247e-02,  3.1156e-02, -2.6582e-02, -2.5583e-03,
        -1.6685e-02,  7.3874e-03,  1.9346e-02, -5.1312e-02, -4.7008e-02,
        -1.7397e-02, -1.1035e-02,  8.1149e-04, -6.3318e-02, -3.9468e-02,
        -1.8072e-02, -1.8387e-02, -1.5036e-02, -3.0543e-02,  3.6544e-02,
        -5.2465e-02, -6.9392e-02,  4.1852e-02, -3.6145e-02, -1.1191e-02,
         2.3485e-02, -3.2910e-03,  1.0087e-01, -3.3317e-02,  8.2635e-02,
        -1.1001e-01, -8.6694e-02, -7.7459e-02, -8.1473e-02, -2.2699e-01,
        -7.3820e-02, -4.3098e-02, -5.5613e-02, -6.6440e-02, -3.4854e-02,
        -1.3678e-02, -1.0979e-01,  7.7739e-03,  1.8178e-04,  1.2646e-01,
        -8.7780e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0530, -0.1386, -0.0143, -0.0062, -0.1545,  0.1746, -0.1078, -0.0079,
        -0.0482, -0.0103, -0.0973, -0.0898, -0.0054, -0.0485, -0.0965, -0.0139,
         0.0187, -0.0004, -0.0055, -0.0493, -0.0727, -0.0202, -0.0292,  0.0336,
        -0.0870, -0.0271, -0.0623, -0.0219, -0.0511, -0.0788, -0.0228, -0.1154,
        -0.0475,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4424, -0.1549, -0.1330, -0.0643, -0.0157,  0.0326, -0.0693,  0.0770,
         0.0477, -0.0495,  0.0579, -0.0408,  0.0105,  0.0190, -0.0302, -0.0510,
         0.0108, -0.1370, -0.5172, -0.1082,  0.4153, -0.1878,  0.1522, -0.0929,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4879, -0.1881, -0.0694, -0.2209,  0.1702,  0.0338,  0.0867, -0.0616,
         0.0414, -0.2977, -0.0381, -0.1359, -0.1190, -0.1178, -0.1674,  0.0583,
         0.0704, -0.1129,  0.0059,  0.0962,  0.0258,  0.0723, -0.1639,  0.0562,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4862, -0.2196, -0.1141, -0.0058, -0.0138,  0.0269, -0.0150, -0.0232,
        -0.0024,  0.0747, -0.0039,  0.0007, -0.0355,  0.0014, -0.0058, -0.0791,
        -0.0235,  0.0753,  0.0484,  0.0476, -0.0012, -0.0264,  0.0279, -0.2671,
        -0.0032,  0.0516,  0.0988, -0.0916,  0.1348,  0.0065, -0.0744, -0.0209,
        -0.0349,  0.2577,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4783, -0.2116, -0.0221, -0.0332, -0.0320,  0.1288, -0.0820, -0.0167,
         0.0283, -0.0787,  0.1258, -0.0017, -0.0591, -0.4503,  0.0813, -0.3421,
        -0.0726, -0.0166, -0.0069, -0.0311,  0.1107, -0.0685, -0.0458, -0.1071,
         0.3279,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2277, -0.1927, -0.2366, -0.1189, -0.0497,  0.0144, -0.0902, -0.0457,
        -0.0282, -0.6216, -0.0858,  0.0099, -0.0431, -0.0416,  0.0359,  0.1761,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0411, -0.0942, -0.0424, -0.1009, -0.0240, -0.0897, -0.3443, -0.0496,
         0.0051, -0.0479,  0.0311,  0.1238, -0.0573, -0.0597, -0.0292,  0.0900,
         0.0004, -0.0365, -0.0380, -0.0477, -0.0287, -0.0046,  0.0006,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5916, -0.7544, -0.0055,  0.0104, -0.3123,  0.0364, -0.1157, -0.5580,
        -0.2518, -0.1933,  0.1383,  0.0200, -0.4173, -0.3461,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.3462, -0.1087,  0.0033, -0.0649, -0.0630,  0.1244, -0.0786,  0.0520,
        -0.0198,  0.0189, -0.1143, -0.0650, -0.0663,  0.0006, -0.0556,  0.0340,
         0.0426, -0.1610, -0.1020, -0.0249, -0.0423, -0.0519,  0.0148,  0.1830,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2009, -0.1029, -0.1828, -0.2120, -0.2354, -0.2654, -0.2195, -0.1828,
        -0.2132,  0.1109, -0.1353,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0790, -0.0624,  0.0597,  0.1568, -0.0511, -0.1429, -0.2008,  0.0782,
        -0.1508,  0.0455, -0.0129,  0.0729,  0.0112,  0.0906, -0.0915,  0.0109,
        -0.0434, -0.2202, -0.0074,  0.1290, -0.0023,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0377, -0.0875, -0.1350,  0.2388,  0.1091, -0.0503, -0.2278, -0.3448,
        -0.0026, -0.0577, -0.0169, -0.2350,  0.1619,  0.0644,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7324,  0.2300,  0.1405, -0.0678,  0.1024,  0.1164, -0.0430,  0.0494,
         0.1752,  0.0070,  0.1694,  0.6277,  0.3076,  0.0840,  0.0471,  0.0265,
        -0.0135,  0.1626,  0.0901,  0.0259,  0.0884,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0795, -0.0663, -0.0430, -0.1246, -0.1196, -0.1745, -0.0539, -0.1996,
        -0.0003, -0.1205, -0.0962,  0.0133, -0.1273,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5256e-03, -2.4741e-02,  1.4598e-03,  1.7309e-02, -2.3316e-02,
        -1.3255e-02, -1.1280e-02, -8.0805e-02,  1.5341e-02, -2.7518e-02,
        -4.5373e-02, -2.2497e-02,  4.2038e-02, -1.6634e-02,  2.3654e-03,
        -2.1926e-02,  8.0856e-05,  5.6035e-02, -3.4250e-02, -4.6823e-02,
        -1.6113e-02, -3.6596e-02, -1.5027e-01, -1.6251e-02, -1.6130e-01,
         5.9422e-02, -4.5028e-02, -1.4657e-01, -3.0878e-02, -5.2498e-02,
        -3.6312e-02, -4.9071e-02,  9.2301e-02,  3.9230e-02, -3.8282e-02,
        -5.4010e-02, -3.0867e-02,  1.4372e-03, -1.2480e-03,  3.1364e-03,
         6.4678e-03,  2.0606e-02,  1.3969e-02, -7.5113e-02,  1.3875e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0394, -0.0590, -0.0525, -0.0489, -0.0886,  0.0487, -0.0882, -0.0295,
        -0.0775, -0.0492, -0.0856, -0.1911, -0.0033,  0.4910, -0.2203, -0.0570,
        -0.3790,  0.0838, -0.0253, -0.0181, -0.0647, -0.1650,  0.0062, -0.1121,
         0.0236,  0.0029, -0.1416,  0.0247, -0.0671,  0.0563, -0.0932, -0.0177,
         0.1348,  0.1501,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7904, -1.6282,  0.0791, -0.2432, -0.3066, -0.2620, -0.0872, -0.3032,
        -0.6190, -0.2779, -0.1964, -0.3292,  0.2087, -0.1897,  0.0312, -0.0413,
        -0.2785,  0.0899, -0.1207, -0.3244,  0.0445, -0.3259, -0.2440, -0.5646,
        -1.0149,  0.0339,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0768, -0.6031,  0.1769, -0.0163,  0.0300, -0.1420, -0.1927, -0.0775,
         0.0323,  0.0578, -0.0906, -0.0225, -0.0274, -0.0629, -0.1473,  0.0128,
        -0.1793,  0.0339, -0.0213, -0.0211, -0.0130,  0.1023, -0.0065,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2944, -0.1384, -0.1741, -0.0920, -0.0239,  0.0038,  0.0187, -0.0655,
        -0.0798,  0.0352, -0.0465,  0.0144, -0.0415,  0.0560, -0.3021,  0.0289,
        -0.0153, -0.0764, -0.0490, -0.0316, -0.0345,  0.0047,  0.0575,  0.0538,
        -0.0433, -0.0488, -0.0492, -0.1671,  0.0122, -0.0537,  0.0103, -0.0041,
         0.0309, -0.0219, -0.0417, -0.0336, -0.0314, -0.0530, -0.2510,  0.0599,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0600, -0.2921, -0.1175, -0.0464, -0.1083, -0.0644,  0.0678, -0.2220,
        -0.2334, -0.1065, -0.0714, -0.0335, -0.0541, -0.1410, -0.0529, -0.1077,
        -0.0340, -0.0150,  0.2446, -0.0197,  0.0499,  0.0347, -0.0178,  0.1000,
        -0.0389, -0.0657,  0.0185, -0.0880,  0.0054,  0.0733, -0.2924, -0.1213,
         0.4224, -0.3487,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.1139, -0.0357,  0.0025, -0.0289, -0.0114, -0.0181, -0.0399,  0.0043,
        -0.0101,  0.0610, -0.0247, -0.0380,  0.0755, -0.0264, -0.1797, -0.1056,
         0.0285, -0.0244, -0.0525, -0.0044, -0.0107, -0.0179,  0.0205,  0.0180,
        -0.0348, -0.2108, -0.0928, -0.0253, -0.0343,  0.0242, -0.0566, -0.0032,
        -0.0085,  0.0168, -0.0064,  0.0283, -0.0415, -0.0205, -0.0015, -0.0231,
        -0.0146, -0.0383, -0.0227, -0.0370, -0.0242, -0.0034, -0.0440, -0.0108,
        -0.0319, -0.0229, -0.0164,  0.0379], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2160, -0.2369, -0.0449,  0.0720, -0.1964, -0.0236, -0.0430, -0.0543,
         0.0100, -0.1082, -0.0604, -0.0263, -0.1407, -0.1368, -0.0751, -0.0850,
        -0.1218,  0.1487, -0.0390,  0.0105, -0.1056,  0.0058, -0.0685, -0.0539,
        -0.0518, -0.0216,  0.0771, -0.0881, -0.0210, -0.0261, -0.0129,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1199, -0.0446,  0.1425,  0.0193, -0.1640, -0.0294,  0.0142, -0.0006,
        -0.0985,  0.0211, -0.0293, -0.1950, -0.1430,  0.0917,  0.0352, -0.0735,
         0.5196,  0.1204, -0.1314, -0.0491, -0.1741, -0.1769, -0.1406, -0.2718,
         0.5466,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0448, -0.3198, -0.2594,  0.0037, -0.2471, -0.2271, -0.5784, -0.0311,
        -0.1874,  0.0959,  0.0332, -0.2981,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1480, -0.4964, -0.0506, -0.1954,  0.0556,  0.0349,  0.1019, -0.0153,
        -0.0055, -0.1277,  0.0463,  0.1129, -0.0554, -0.3584, -0.0706, -0.1442,
        -0.1083, -0.0213,  0.0186,  0.0866, -0.0630,  0.0349,  0.0120,  0.1555,
         0.0688, -0.3891, -0.0373,  0.0087, -0.1227,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2599, -0.2965,  0.1020,  0.0571,  0.1408, -0.0707, -0.3517, -0.2547,
         0.2781, -0.1568, -0.0872, -0.0553,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0740, -0.3566,  0.0830, -0.0657, -0.0974, -0.1544, -0.2962, -0.0419,
        -0.0932, -0.1130, -0.1705,  0.0176,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9419,  0.0890, -0.2941,  0.1431, -0.5100,  0.0613, -0.1376, -0.1304,
        -0.2171, -0.0608, -0.0878, -0.1029, -0.0332,  0.1083, -0.0553, -0.2363,
        -0.3001, -0.0701, -0.2676,  0.0290,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0161, -0.4045, -0.0222, -0.0742, -0.3077, -0.1032, -0.0728,  0.0708,
        -0.0924,  0.0086,  0.0047,  0.0116, -0.0676, -0.0045,  0.0321,  0.0335,
        -0.0289,  0.0065, -0.0019, -0.0240, -0.0629, -0.0007,  0.0251,  0.1188,
         0.0122,  0.0922, -0.0122, -0.0462, -0.0186, -0.0523,  0.1781, -0.0196,
        -0.0524, -0.1384, -0.0854,  0.1525,  0.0206,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1115, -0.8267, -0.5491, -0.0289,  0.0711,  0.0364, -0.0502,  0.2993,
        -0.2378,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0564, -0.2018,  0.2372, -0.1443, -0.0904, -0.0474, -0.1543, -0.1255,
        -0.0268,  0.0017,  0.0857,  0.0522, -0.1844, -0.1396, -0.0638,  0.0721,
        -0.0097,  0.0010, -0.1374, -0.0768, -0.1282,  0.0369,  0.0167, -0.0632,
         0.0276, -0.5242,  0.2667,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2930, -0.2062, -0.0136,  0.0041, -0.0191, -0.0829, -0.0261, -0.0092,
         0.0076,  0.0120, -0.0088, -0.0357, -0.0250,  0.0937,  0.0213,  0.0270,
        -0.1072,  0.1509,  0.0156,  0.0222, -0.0206, -0.0437,  0.0120,  0.0705,
         0.0282,  0.0315, -0.0517, -0.0779, -0.1585, -0.0210, -0.0214, -0.0040,
         0.1389, -0.0928,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.4333, -0.0220,  0.0081,  0.0176, -0.0802, -0.0119,  0.0182, -0.0391,
        -0.0847, -0.0907, -0.0588, -0.1814, -0.0769,  0.0124,  0.0075,  0.0305,
         0.0266,  0.0406, -0.0272,  0.0166,  0.0646, -0.0009, -0.0653,  0.0257,
        -0.0422, -0.0512, -0.0209,  0.0095, -0.0476,  0.0434,  0.0271, -0.0693,
        -0.1722,  0.0750,  0.0039, -0.0135, -0.0776, -0.0428,  0.0174,  0.0132,
        -0.0229, -0.1385, -0.0438, -0.0148, -0.0172,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1668, -0.0026,  0.0285, -0.0966, -0.3199, -0.1322, -0.5324, -0.0158,
        -0.1987, -0.2337,  0.0689, -0.0062, -0.1554, -0.0708, -0.0351, -0.0761,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2463,  0.1479, -0.0685, -0.1271, -0.0534,  0.0745,  0.1131,  0.0835,
        -0.1054, -0.1511,  0.0460, -0.0654, -0.0713, -0.0412, -0.0866,  0.0587,
        -0.0854,  0.0062,  0.0208, -0.0271, -0.0807, -0.0472, -0.0515,  0.0230,
        -0.0856,  0.0082, -0.2321,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3464e-01, -3.0810e-01, -5.6496e-02, -2.1502e-02, -3.2300e-02,
         1.1767e-02, -3.5682e-02, -4.1605e-02, -3.3507e-02, -4.9864e-02,
        -4.8721e-02, -1.4045e-02, -4.1302e-02, -1.3125e-02, -3.4489e-02,
         8.1944e-03,  6.1811e-03, -4.5491e-05, -1.7483e-02, -3.5227e-02,
         2.8695e-03, -4.7994e-02, -6.4684e-02, -1.1516e-01, -2.5515e-03,
        -8.6468e-03, -2.5712e-02, -6.8013e-02,  1.4321e-03, -3.3410e-02,
        -1.5453e-02, -2.1967e-04, -3.9093e-02, -1.6085e-02, -3.6865e-02,
        -3.3239e-02, -2.9348e-02, -4.1094e-02, -2.5818e-02,  3.0710e-02,
        -7.0986e-02, -1.7839e-02, -5.5973e-03, -2.3986e-03, -7.5407e-02,
         1.7263e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0721, -0.0168, -0.0599, -0.1328, -0.0381, -0.0524, -0.0403, -0.0762,
         0.0571, -0.0471, -0.0334, -0.0340, -0.0286, -0.0196, -0.1025,  0.0024,
         0.0154, -0.0064, -0.1127, -0.3706, -0.1136, -0.0714,  0.0212, -0.0547,
        -0.0856, -0.0458, -0.1189, -0.1929,  0.0054,  0.1140,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5364,  0.0534, -0.3127,  0.2816,  0.1361, -0.0695,  0.1761,  0.0404,
         0.0869,  0.4165,  0.2077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1879, -0.3006, -0.0673, -0.0061, -0.1308, -0.0060, -0.1311, -0.2698,
        -0.1008, -0.2949,  0.0492, -0.0832, -0.0661,  0.0633, -0.0544, -0.2616,
        -0.0185,  0.0409, -0.1532, -0.4850,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0252, -0.6847, -0.2526, -0.3928,  0.0595, -0.3350, -0.2168, -0.1450,
        -0.2122, -0.0897,  0.0625,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0519, -0.3960, -0.0076, -0.1879, -0.1443,  0.0736,  0.0022, -0.0552,
        -0.1006, -0.0833,  0.0026,  0.0568, -0.0165,  0.0391, -0.0563, -0.1362,
        -0.0571, -0.0112, -0.1237, -0.0242, -0.0518,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0627,  0.2067,  0.1044, -0.1103,  0.1436,  0.4373, -0.2983,  0.2316,
         0.0684, -0.1898,  0.0634,  0.1107, -0.0222,  0.0360, -0.0324, -0.1309,
         0.1147, -0.0115,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1418, -0.0863, -0.2563, -0.0794,  0.0190, -0.0431,  0.0236,  0.0134,
        -0.0318, -0.0044, -0.0119,  0.0078, -0.0653, -0.0878, -0.1193,  0.0142,
        -0.0306, -0.0606, -0.0686,  0.0506, -0.0411, -0.0408, -0.1383, -0.0015,
        -0.0629, -0.0051, -0.0680, -0.0252, -0.0078, -0.1565, -0.0349, -0.0081,
        -0.0025,  0.0118, -0.0042, -0.0171, -0.0127, -0.0020,  0.0242,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5034e-01, -5.7024e-01, -3.4907e-02, -5.0152e-02,  2.0700e-04,
        -2.5275e-02,  1.7019e-02, -4.1514e-03,  1.3021e-01,  4.4010e-02,
         3.0298e-02, -4.5692e-02, -9.2442e-02, -6.7621e-02,  2.8656e-02,
        -3.3596e-02, -8.4730e-02,  1.7281e-02, -1.6883e-02, -2.0731e-02,
        -1.1100e-01, -1.1306e-02, -4.4014e-03,  8.1540e-03,  7.2855e-03,
        -3.9952e-03, -3.4012e-02,  4.3803e-02, -1.0978e-01, -1.6086e-01,
         1.8045e-02,  3.0630e-02, -1.9623e-01, -1.7615e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.4889, -0.6683, -0.0735, -0.4312, -0.3574, -0.0182,  0.1567, -0.7482,
        -0.0703,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1020,  0.2979, -0.0279,  0.0442,  0.1841,  0.2006, -0.0007, -0.0312,
         0.0834,  0.1181,  0.1079,  0.0306,  0.0186,  0.0393, -0.0420,  0.0915,
        -0.0018,  0.5706, -0.0305, -0.1410, -0.0145, -0.0210, -0.0625, -0.0272,
         0.0249,  0.0360,  0.0178, -0.0194, -0.0938, -0.0052,  0.0936,  0.1232,
         0.0030, -0.0441,  0.0260, -0.0030,  0.0120,  0.1349, -0.0464, -0.2921,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2228e-02, -9.0869e-02, -2.3926e-01, -1.7446e-02, -9.7609e-02,
        -6.8916e-02,  3.5840e-02, -4.9105e-02, -5.8327e-02, -1.9426e-01,
         1.7814e-04, -1.9902e-02,  1.4232e-02, -1.4578e-02,  8.9177e-03,
         2.7438e-02,  6.1653e-02,  2.1042e-02, -5.9496e-02, -2.1514e-02,
         2.2639e-02, -9.0074e-02, -1.3248e-02, -1.6832e-02, -1.6156e-01,
        -2.9766e-02, -8.7272e-02,  6.5612e-02, -1.8741e-01, -1.1205e-01,
         2.7876e-02, -1.1411e-01, -3.1795e-02, -9.3712e-03, -3.2015e-02,
        -3.4419e-02, -4.8058e-02, -1.1969e-01,  3.0404e-02,  2.2184e-03,
        -4.8199e-03,  7.9642e-03, -8.2689e-02, -3.9666e-02, -1.4861e-02,
         1.0747e-02,  1.4557e-02, -2.5966e-02,  1.1995e-02, -4.8971e-02,
        -9.4903e-03,  8.8785e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2862, -0.2612, -0.1382, -0.0977, -0.1494, -0.1585, -0.1266, -0.0249,
        -0.1178,  0.0098, -0.0367, -0.0543, -0.0427, -0.1123, -0.1099, -0.1344,
        -0.0299, -0.0945, -0.1442,  0.0565, -0.0339, -0.1126,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2584,  1.0183, -0.0848,  0.0215, -0.0867,  0.0057,  0.1357,  0.0271,
         0.2804,  0.0292, -0.0165,  0.0107, -0.0060,  0.0523, -0.0056,  0.0338,
         0.1286, -0.0558, -0.0389,  0.0623,  0.0487,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4065e-02, -4.5300e-02,  4.0750e-02, -8.9321e-02, -1.1275e-01,
         7.2740e-03,  3.0497e-02,  4.4287e-02, -6.6505e-02, -1.3511e-02,
        -3.9200e-02, -8.5051e-03, -1.8420e-03,  2.4925e-02, -1.1502e-02,
         1.3821e-02,  3.7186e-02, -2.2403e-02, -6.5494e-02, -3.6876e-02,
         3.0501e-02,  9.0441e-05, -2.7693e-02, -2.0374e-02, -7.5972e-03,
        -3.6069e-04, -1.0346e-01, -4.1321e-02,  6.4127e-03,  2.0986e-02,
        -1.2966e-01, -5.6377e-02, -2.1358e-02, -3.6957e-02, -5.9324e-02,
        -2.6061e-02, -1.0352e-02,  4.2344e-02, -2.0788e-02, -1.0023e-01,
        -2.2431e-01, -6.9609e-04, -1.2576e-01,  1.9671e-02, -5.0521e-02,
         7.1975e-03, -5.9993e-02, -6.1133e-02,  2.3646e-02,  8.3344e-02,
        -5.0066e-02,  1.9806e-02, -2.0154e-02,  1.3176e-01, -2.1315e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0584, -0.1172, -0.0221, -0.0227, -0.0431, -0.0678,  0.0258, -0.0514,
         0.1642,  0.0063,  0.0717,  0.0118, -0.1037,  0.0127,  0.0109,  0.0008,
         0.1271, -0.0171, -0.0286,  0.0275, -0.0970, -0.0372, -0.0369, -0.0110,
         0.0975,  0.0173, -0.1065, -0.2009, -0.0743, -0.0498,  0.0180,  0.0142,
        -0.0541, -0.2459, -0.0033, -0.0376, -0.0769, -0.0721, -0.0070, -0.0423,
        -0.0284,  0.0211, -0.1179, -0.0896, -0.2495,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5118, -0.3022, -0.2153, -0.2634, -0.0043, -0.0439, -0.1449,  0.0249,
        -0.1740, -0.2053, -0.0581, -0.0455,  0.1045, -0.1155, -0.0767,  0.0533,
        -0.0434, -0.1621,  0.0822, -0.0527, -0.2607, -0.1626,  0.0829, -0.0052,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3390, -0.1306,  0.0359, -0.0074, -0.0620, -0.1091,  0.0405, -0.1180,
         0.0981, -0.0042, -0.0116, -0.0110,  0.0266, -0.2337, -0.1072, -0.0099,
        -0.0984, -0.0418, -0.0090, -0.0155, -0.0744, -0.0105, -0.0563, -0.2508,
        -0.0614, -0.0495, -0.3178, -0.0931, -0.0460, -0.2309, -0.0392,  0.0097,
        -0.1734,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8876e-02, -5.3704e-01, -1.0974e-01, -1.9319e-01,  8.0233e-02,
        -1.3410e-01,  2.8829e-03,  1.4122e-01, -3.6663e-01, -1.2329e-01,
         3.7291e-04, -9.3903e-02, -3.2560e-02, -2.9721e-02, -1.6930e-01,
         8.1394e-03, -1.7929e-02,  1.0757e-01,  8.4525e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3666,  0.2513,  0.0824, -0.1401, -0.1377,  0.1033, -0.1884, -0.0336,
        -0.1856,  0.0202, -0.1508, -0.0336, -0.1071, -0.0157, -0.0031,  0.0547,
        -0.0049, -0.0670,  0.0178,  0.0025, -0.0393, -0.1783, -0.0231,  0.0585,
         0.2239,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5888, -0.0421, -0.0259,  0.0239, -0.1246,  0.0283, -0.3878,  0.2033,
         0.0701, -0.0581, -0.0893,  0.0230, -0.0235, -0.0237, -0.0636, -0.0745,
         0.0708,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.5702,  0.0577, -0.0344,  0.0267, -0.0574, -0.1169,  0.0126, -0.1278,
        -0.1788, -0.1813, -0.0322, -0.0696,  0.0089, -0.0517, -0.0533, -0.0672,
        -0.0755, -0.0505,  0.0315, -0.0543, -0.0263, -0.0637, -0.4034,  0.0033,
        -0.0330, -0.0889,  0.0446,  0.0896, -0.0108,  0.0816,  0.0657, -0.0569,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0545, -0.2525, -0.0587, -0.0417, -0.0892,  0.0045, -0.0470, -0.1895,
        -0.0445, -0.0521, -0.0174, -0.0613,  0.0886, -0.0788,  0.0424, -0.0588,
        -0.0821, -0.0046, -0.0272, -0.0487, -0.0225, -0.0458, -0.0699, -0.1987,
        -0.1208, -0.0555, -0.1355, -0.3145,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1365, -0.2429,  0.0992,  0.2094,  0.0572, -0.0836, -0.3578, -0.1872,
         0.0846,  0.0172, -0.0294, -0.0478,  0.0626, -0.0429, -0.0377, -0.0886,
         0.0050, -0.1866,  0.0130, -0.0870, -0.0412,  0.0758,  0.0077, -0.0403,
        -0.0878, -0.0720,  0.0260, -0.0080, -0.2225,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0357, -0.1175, -0.0148, -0.0465, -0.2449, -0.1009, -0.0148,  0.0053,
        -0.1208,  0.0291, -0.0189, -0.0170, -0.0201, -0.1925, -0.2050, -0.0286,
        -0.0487, -0.1516, -0.0504,  0.0632,  0.0007, -0.0040, -0.0097,  0.0015,
        -0.0262, -0.0310, -0.0291, -0.0298,  0.0166, -0.0020, -0.0608, -0.0457,
         0.0718,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0862, -0.1754, -0.0841, -0.1512, -0.0664, -0.0996, -0.0011, -0.1024,
        -0.0305, -0.0813, -0.0413, -0.0229, -0.0183, -0.0326, -0.0218, -0.0314,
        -0.0724,  0.0467, -0.1254,  0.0479, -0.1290, -0.0568, -0.0269,  0.1140,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0336, -0.1031, -0.1272, -0.0422,  0.0117, -0.1102, -0.1588, -0.0867,
         0.0101, -0.0490,  0.0253, -0.0681, -0.0767, -0.0752, -0.0270,  0.0380,
         0.0004, -0.0391, -0.0458, -0.2706, -0.0836,  0.0267, -0.0517,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4709, -0.1850,  0.0055, -0.0325, -0.0919, -0.0841, -0.0656, -0.0326,
         0.0133, -0.0672, -0.0019, -0.1212, -0.1071, -0.0217, -0.0281, -0.4774,
        -0.1218, -0.0594, -0.0107,  0.0421, -0.0901, -0.0944,  0.0659,  0.0311,
        -0.0335, -0.0658, -0.0544, -0.0200, -0.0079, -0.1003, -0.1583, -0.0368,
        -0.0295,  0.0088,  0.0151, -0.0397, -0.0274,  0.3415,  0.0817,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0520, -0.3609, -0.0351,  0.0336, -0.1301, -0.0087, -0.0130,  0.0250,
        -0.1055, -0.0212, -0.0468,  0.0176, -0.0151, -0.0655,  0.0154, -0.0357,
        -0.0095,  0.0110,  0.0361,  0.0045, -0.0454, -0.0188, -0.0403,  0.0072,
        -0.0279, -0.0310, -0.0277, -0.0059, -0.0450, -0.0116, -0.0612, -0.0318,
         0.0464, -0.0009,  0.0184, -0.0581, -0.0248, -0.0221, -0.0272, -0.0132,
        -0.0435, -0.1312, -0.1562,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0925, -0.3940, -0.0025,  0.0209, -0.0544, -0.0033, -0.1326, -0.0228,
         0.0134,  0.0349, -0.0328,  0.0100, -0.0159,  0.0307, -0.0072, -0.0174,
        -0.0491, -0.0861, -0.0129,  0.0073,  0.0405,  0.0242, -0.0370, -0.0283,
        -0.0248, -0.0201,  0.0098, -0.0126,  0.0015, -0.0262, -0.0295,  0.0154,
        -0.1254, -0.0189,  0.0397,  0.0215,  0.0290,  0.0502, -0.0392, -0.0174,
        -0.0116, -0.0585, -0.0282,  0.0365,  0.0048, -0.0262, -0.0179, -0.0125,
         0.0215, -0.0347,  0.0382,  0.0547, -0.0192, -0.0546, -0.0004,  0.0411,
         0.0235], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1396, -0.6938, -0.2139, -0.4413, -0.1009,  0.1813, -0.2518, -0.7855,
         0.2643, -0.4743, -0.2475, -0.3158, -0.1224, -0.3773,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7518, -0.2535, -0.2327,  0.2362,  0.0126, -0.0432, -0.1612, -0.0511,
        -0.0363,  0.0231, -0.0492, -0.2909, -0.0562, -0.1837, -0.1415,  0.0703,
         0.0091, -0.0786,  0.1361, -0.0115, -0.3068, -0.1217, -0.0192,  0.0176,
        -0.0259, -0.0196, -0.0331, -0.0226,  0.1064,  0.0578,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0486,  0.2670, -0.4500,  0.2438,  0.1248, -0.2206,  0.0749, -0.0426,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.4354, -0.1697, -0.0147,  0.0019, -0.0599,  0.0658, -0.0014,  0.0114,
         0.0031, -0.0356, -0.0731, -0.0417, -0.1108, -0.2893,  0.0016,  0.0374,
         0.0038, -0.0229, -0.0393, -0.0822,  0.0434, -0.0786, -0.0937, -0.0584,
        -0.0611, -0.0811, -0.0924,  0.0649, -0.1376, -0.2555,  0.0380,  0.2140,
         0.0413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6847e-01, -1.0436e-01, -1.5553e-01, -5.4943e-02, -7.4602e-03,
        -3.8691e-02,  5.0944e-03, -2.3650e-02, -4.7496e-03, -4.0398e-02,
        -3.7094e-02,  4.8658e-02, -3.7373e-02, -4.8241e-02,  6.2592e-03,
         1.5606e-01, -2.0837e-03, -1.0830e-02, -4.2692e-02, -2.1866e-02,
        -1.0822e-02, -4.0792e-02,  2.5973e-02, -1.7219e-02, -1.3965e-02,
         2.2544e-02, -7.7965e-03, -5.8692e-02,  3.8570e-03,  2.7171e-02,
        -3.0161e-03, -9.4190e-03, -4.7914e-02, -2.1600e-01,  6.3288e-02,
        -3.0324e-02, -4.7336e-02, -4.0758e-04,  3.5706e-03,  1.3428e-02,
         1.1086e-02, -3.1567e-03,  3.4585e-03, -5.3826e-02,  3.0529e-02,
         8.2371e-02, -6.0407e-02,  4.2501e-02, -5.0727e-02, -3.9162e-02,
        -2.5252e-02, -4.2436e-02, -3.0665e-02, -7.2026e-04, -9.5958e-03,
        -1.7228e-02, -2.6371e-02,  4.8076e-02, -7.3704e-02, -6.3211e-02,
        -7.4965e-02, -7.0879e-03, -3.7578e-01, -1.6798e-02, -1.3345e-02,
        -1.0971e-02,  3.9876e-03, -5.4321e-02, -6.3065e-02,  7.8695e-02,
         2.3840e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0503, -0.1057, -0.0074,  0.0055, -0.1272,  0.0144, -0.3278, -0.0997,
        -0.0278,  0.0488,  0.0750, -0.0063, -0.0313, -0.0328, -0.0691, -0.0635,
        -0.0187,  0.0456, -0.0982, -0.1444,  0.0127, -0.0593,  0.0325, -0.1053,
         0.0620, -0.0660, -0.1040,  0.0547,  0.0394,  0.0328, -0.0056, -0.0850,
        -0.0393, -0.0804, -0.0921, -0.0796, -0.0223, -0.0520, -0.0167,  0.1236,
         0.0655,  0.0051,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0293, -0.4541,  0.0225,  0.0077, -0.0121, -0.0345,  0.0461,  0.0696,
        -0.0548, -0.1770, -0.0872, -0.0513, -0.0493,  0.0185,  0.0229,  0.0118,
         0.0441, -0.0437,  0.0199,  0.0984, -0.0171, -0.0217,  0.0133, -0.0068,
         0.0364,  0.0083, -0.0100, -0.0275, -0.0667,  0.0709,  0.0128, -0.0183,
         0.0816,  0.0205, -0.0356,  0.1528,  0.1269,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0382, -0.3355,  0.2384, -0.1884, -0.2369,  0.1089,  0.0660, -0.0069,
         0.1267, -0.0778, -0.0110, -0.0824, -0.0429, -0.1833,  0.1605,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0132,  0.3310, -0.0190, -0.0710,  0.0166,  0.0702, -0.0473,  0.0194,
         0.3399,  0.0395, -0.1281, -0.0819,  0.1289,  0.0228, -0.2232,  0.1326,
         0.2728,  0.1243,  0.0966,  0.1007,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0050, -0.1759, -0.1012,  0.0105, -0.0345, -0.1710, -0.0314, -0.0554,
        -0.0058, -0.0259, -0.0166, -0.0485, -0.0658, -0.0450, -0.0669, -0.0463,
         0.0227, -0.0313,  0.0581, -0.0433,  0.0835,  0.0558,  0.0199,  0.0525,
        -0.0681, -0.0814, -0.2650, -0.0321, -0.0019, -0.0577, -0.0498, -0.1496,
        -0.0296, -0.0694, -0.0613, -0.0487, -0.0020, -0.0547, -0.0211,  0.0034,
         0.1013,  0.4200, -0.3647,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0850, -0.1282,  0.1401, -0.1253, -0.0339, -0.0500, -0.0533, -0.1877,
         0.0508, -0.1531, -0.0438,  0.0031, -0.0288, -0.0769, -0.1719, -0.0014,
        -0.0255, -0.0186,  0.0162, -0.0242, -0.0215, -0.0219,  0.0241, -0.0851,
        -0.0077, -0.0489, -0.0897,  0.0429,  0.0065, -0.0353, -0.0103, -0.0498,
         0.0037, -0.0331, -0.0528, -0.0201, -0.0253, -0.1772,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0434, -0.3926, -0.2114, -0.2374, -0.0281, -0.1639, -0.1973,  0.1285,
        -0.0372, -0.0505, -0.0245, -0.1228, -0.0821, -0.0390, -0.0014,  0.0270,
         0.0037, -0.1270, -0.0123, -0.0581,  0.2132,  0.0438,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0813, -0.1657,  0.0273,  0.1328,  0.0344,  0.0372, -0.0104, -0.0411,
        -0.1036, -0.1221, -0.0234, -0.0107, -0.0344,  0.1085, -0.0403, -0.0525,
        -0.2604, -0.0129, -0.0404, -0.1184, -0.0217, -0.0721, -0.0572, -0.0983,
        -0.0176, -0.0107, -0.0421, -0.0527, -0.0750, -0.0332, -0.1361,  0.0126,
        -0.0462, -0.1717, -0.0376, -0.2936,  0.0244, -0.0971, -0.0307,  0.0534,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2204, -0.1432,  0.0649, -0.1544, -0.0314,  0.0479,  0.0696, -0.0061,
        -0.0454, -0.0604,  0.0720,  0.0333,  0.0204, -0.0657, -0.1076, -0.0468,
        -0.0409,  0.0220, -0.2416, -0.0511, -0.0407, -0.0024, -0.1159,  0.0123,
         0.1510, -0.2333,  0.0399, -0.1376, -0.0353, -0.0335, -0.0463,  0.0051,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3726, -0.5496, -0.0136,  0.0319,  0.0552, -0.0776, -0.1306, -0.0998,
        -0.1858,  0.0402,  0.0341, -0.0286, -0.0175,  0.0760, -0.0124, -0.0671,
        -0.0364,  0.0119,  0.0385, -0.0285,  0.0448,  0.0688,  0.1320, -0.0892,
         0.0189,  0.0747, -0.0136, -0.0418,  0.0014, -0.0533,  0.0233, -0.0177,
         0.0698, -0.1102, -0.0830,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.2766, -0.1528, -0.0470, -0.2766,  0.0252,  0.0614, -0.1195, -0.1448,
         0.1327, -0.1074, -0.0073, -0.0127, -0.0466, -0.1103, -0.0365, -0.0637,
        -0.0809, -0.0579, -0.0119,  0.0129,  0.1369,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3333,  0.8038,  0.1405,  0.2371,  0.1597,  0.1698,  0.0323,  0.1145,
         0.3452, -0.1217, -0.0702, -0.0602, -0.0201,  0.0025,  0.0641, -0.0281,
         0.0369, -0.0644, -0.0192,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0777, -0.4132, -0.2237,  0.0455, -0.0303, -0.1030, -0.0060,  0.0071,
         0.0441, -0.0568, -0.1907, -0.0202, -0.0051, -0.1895, -0.0434, -0.0451,
         0.0360, -0.0403, -0.0665, -0.2671,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3044, -0.5887, -0.0370, -0.1201, -0.0640, -0.0537,  0.1682, -0.1243,
        -0.1375, -0.0635, -0.0465, -0.0652,  0.0459, -0.0859, -0.2663, -0.0294,
        -0.1731,  0.0218, -0.0521, -0.1445,  0.0327,  0.0010,  0.1137,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2562, -0.1371, -0.0448, -0.0333, -0.0390, -0.0974, -0.0796,  0.0009,
        -0.1162, -0.0489, -0.0954, -0.1008, -0.0507, -0.1595, -0.0203,  0.1492,
         0.0128,  0.0694, -0.0381,  0.0239, -0.0435, -0.0254, -0.1122, -0.0697,
        -0.2107,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0122, -0.2976, -0.0344,  0.0031, -0.0740, -0.0251,  0.0736, -0.0399,
         0.0656,  0.1044, -0.0836, -0.0019, -0.0253, -0.0476, -0.0015,  0.0700,
        -0.0790,  0.0078, -0.0092, -0.0516, -0.0195, -0.0468, -0.0286,  0.0061,
         0.0115, -0.0699, -0.0218,  0.0079,  0.0292,  0.0331,  0.0502, -0.0047,
         0.0344, -0.0684,  0.1455,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4364,  0.0523, -0.0677, -0.0786, -0.0816, -0.0482, -0.1308,  0.0694,
        -0.0237,  0.0041,  0.1203,  0.0148,  0.1405, -0.0416, -0.1215, -0.4102,
        -0.0449, -0.2078, -0.2071, -0.0921,  0.0898, -0.0603,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2649,  0.0030, -0.0029,  0.0973,  0.1038, -0.1721, -0.0238,  0.0934,
        -0.0876, -0.0683, -0.2489,  0.0100,  0.0055, -0.0408, -0.0200,  0.0466,
        -0.1978,  0.0278,  0.7187,  0.1604, -0.3269,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3355, -0.4048, -0.1035,  0.1323,  0.0802,  0.0579, -0.0259, -0.3172,
        -0.1370,  0.1019,  0.0726,  0.0570, -0.2265, -0.1767, -0.0066, -0.0704,
        -0.0969,  0.1275, -0.2359,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4058, -0.0384, -0.1960, -0.1242,  0.0215, -0.1506,  0.0506,  0.0275,
         0.0063, -0.0018, -0.0101, -0.0294, -0.0337, -0.0632, -0.0107,  0.0035,
         0.0334, -0.0857,  0.0198, -0.0498,  0.0171,  0.0561, -0.0136, -0.0169,
        -0.0207, -0.0677, -0.0033, -0.0175, -0.0862, -0.3759, -0.0655,  0.0014,
        -0.0655, -0.1533,  0.0137, -0.0443,  0.0426, -0.0029,  0.0198, -0.0120,
         0.0363, -0.0154,  0.0206,  0.0216, -0.0034,  0.1396,  0.1173],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6622e-01, -2.2953e-01, -1.4606e-01, -1.8920e-02,  1.7685e-02,
        -1.1125e-01,  3.9327e-02, -3.1456e-05, -3.4022e-02, -1.6575e-02,
         3.8000e-02,  2.8275e-02,  7.9717e-02, -6.6348e-02,  8.1500e-02,
        -1.7028e-01, -6.7103e-02, -1.0697e-01,  1.2134e-01, -4.6417e-03,
         2.7523e-03, -5.7421e-02, -3.1504e-02, -3.4775e-01, -4.3573e-02,
        -1.3857e-03,  1.5727e-02, -1.1791e-02,  3.1204e-02,  2.8777e-02,
         7.8093e-04,  8.0014e-02, -8.9702e-03,  8.5390e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1050, -0.6142,  0.0245,  0.0488,  0.0563, -0.0357,  0.0253, -0.0342,
        -0.0325, -0.0349,  0.0164,  0.0020, -0.0812, -0.1974,  0.0676,  0.0150,
         0.0370,  0.0710, -0.1212, -0.1043,  0.0095,  0.0115, -0.0157, -0.0217,
        -0.0044, -0.0085, -0.0431, -0.0320,  0.0321, -0.0928,  0.0245, -0.1413,
        -0.1060,  0.0637,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.3245, -0.4703, -0.1643, -0.0442, -0.0167, -0.0670, -0.0283, -0.3343,
        -0.1757, -0.1708, -0.1930, -0.0581, -0.1266, -0.1977, -0.1720, -0.0505,
        -0.0724, -0.3122,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6635, -0.0640,  0.0071, -0.0536, -0.0395,  0.0244, -0.0287,  0.0265,
        -0.0209, -0.3196,  0.1217, -0.0879, -0.0965, -0.1268, -0.1719, -0.0374,
        -0.0993, -0.1019, -0.0142, -0.2916,  0.0472, -0.0010, -0.0110,  0.0597,
         0.0774,  0.0636,  0.0342,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0189, -0.2485, -0.0394, -0.0545,  0.0543, -0.1385, -0.0561, -0.3399,
        -0.4077, -0.1508, -0.0988, -0.1185, -0.1447, -0.1785, -0.0245,  0.0524,
        -0.1962, -0.0151, -0.0696, -0.3452, -0.0644, -0.0436, -0.0070,  0.0936,
         0.0342, -0.2390,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9247e-01, -3.0248e-01, -8.2911e-02, -8.5387e-02, -3.2095e-01,
        -1.0717e-01, -8.5677e-05,  8.7925e-03, -7.9978e-02, -7.9701e-02,
        -9.8608e-02, -3.0894e-02, -1.7824e-02,  1.6583e-02,  2.4017e-02,
        -6.8873e-02, -8.1213e-02, -3.4355e-02, -5.3425e-02,  5.8085e-02,
        -1.8657e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3026, -0.2814, -0.3430, -0.5864, -0.0573,  0.1098, -0.0967, -0.0181,
         0.0452,  0.2911, -0.1985, -0.0296,  0.2947,  0.3255, -0.5817,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2199,  0.1227,  0.0665,  0.1095, -0.2640,  0.2486, -0.2734, -0.0373,
        -0.0816, -0.0080,  0.1884,  0.1536,  0.2177,  0.1761, -0.0797, -0.0903,
        -0.2995,  0.2106,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0227, -0.2952, -0.1439, -0.1759, -0.0893, -0.0038, -0.0179, -0.0479,
         0.1247, -0.0578, -0.0713, -0.0265, -0.0397, -0.0521, -0.0607, -0.0330,
        -0.0805, -0.1246, -0.2427,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0249e-01, -3.4790e-01, -4.9561e-03,  3.3546e-02, -2.8013e-02,
        -7.0793e-02,  4.4272e-02, -3.9730e-02, -4.1370e-02,  4.7832e-02,
        -8.7433e-03, -1.2669e-02, -5.3842e-02, -8.8414e-03, -7.9042e-03,
        -6.2960e-02, -1.3753e-01,  2.4885e-05,  1.1538e-01,  2.2777e-01,
        -2.0660e-01,  2.1369e-02, -1.1014e-01, -8.3877e-02, -1.0844e-01,
        -1.0900e-01, -9.9790e-02,  9.3360e-02, -6.6309e-02, -5.9342e-02,
         9.1636e-03, -1.4138e-01, -1.4196e-01, -1.1530e-02, -1.0716e-02,
         5.7767e-02, -2.5179e-01, -1.5238e-02, -8.1922e-02,  1.9320e-02,
         1.9161e-02,  1.7094e-01,  1.2589e-01, -6.1543e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1053, -0.0981, -0.0912, -0.1682, -0.1129, -0.0541, -0.0376, -0.0076,
        -0.1575, -0.0023, -0.0658,  0.0471,  0.0254,  0.0018,  0.0613,  0.0086,
         0.1382, -0.2536,  0.0041,  0.0033, -0.0851,  0.0280,  0.0116,  0.0062,
        -0.0488, -0.0296,  0.0013, -0.0885,  0.0985, -0.0125, -0.0408, -0.0413,
        -0.0023, -0.2653,  0.0123,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1150e-02, -7.6079e-01,  2.9242e-02, -4.9438e-02,  1.9683e-02,
        -4.0772e-02, -1.1603e-01, -1.4869e-01, -5.3836e-02, -3.4966e-02,
        -4.4881e-04, -9.7922e-02, -1.1132e-01,  5.4392e-05, -4.6608e-03,
        -7.4106e-02,  5.1498e-02,  1.3102e-02,  4.1836e-03, -5.5059e-02,
        -4.5490e-02, -1.4516e-02, -7.5046e-02, -7.8780e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0504, -0.2457, -0.0301, -0.0230, -0.0023, -0.0458, -0.0746, -0.0440,
        -0.1052,  0.0133, -0.0357, -0.0581,  0.0450, -0.0302, -0.1434,  0.0015,
         0.0659,  0.0342, -0.2940, -0.0161, -0.0666, -0.0532, -0.0046, -0.0508,
        -0.0844, -0.0694, -0.0361, -0.0354,  0.0081, -0.0204, -0.0661, -0.0382,
         0.0563, -0.1880,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-74.6006, 142.5216, 193.0111, 112.3364, -30.9811, -23.8130,  -6.1771,
          4.1667, -32.0668, -21.0825,   7.4035, -20.8009,  -0.4056,  18.9567,
          0.7088, -54.0204, -25.7930, -25.2815,  -2.1312,  30.2092,  -0.3332,
        -16.8630, -26.4966, -14.8294, -19.9532,  20.4895,  20.7681, -23.0714,
         80.5140,  53.3290,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.2684, -0.1946, -0.1494, -0.0946, -0.0018, -0.0743, -0.0574, -0.0695,
        -0.1782, -0.4998, -0.0155, -0.0161,  0.1783,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1717, -0.1142,  0.2237, -0.0083, -0.0560, -0.0679, -0.0636, -0.1333,
        -0.3367, -0.0188, -0.1190, -0.0645, -0.0401, -0.0269, -0.2104, -0.2016,
         0.0382,  0.1011,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5329, -0.1386, -0.0053, -0.0763, -0.2431, -0.2408, -0.2830, -0.0395,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0755, -0.1792, -0.0291, -0.0529, -0.0306, -0.0728, -0.0429, -0.0281,
        -0.0045, -0.0277, -0.0932, -0.1032, -0.0308,  0.0409, -0.0222, -0.0110,
         0.0018,  0.0116, -0.1125,  0.0391, -0.1529, -0.0472, -0.1224, -0.0870,
        -0.0260, -0.0828, -0.0771,  0.0863, -0.2333,  0.0237,  0.0039, -0.0152,
         0.0166, -0.2299,  0.0852, -0.0459, -0.0427, -0.0602,  0.0126, -0.0299,
        -0.0326, -0.0097, -0.0184,  0.0177,  0.0110, -0.0816, -0.0222],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0333, -0.2981, -0.1503, -0.5625,  0.0089, -0.1657,  0.0585,  0.1865,
        -0.0339,  0.0863, -0.3214,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5416, -0.1474, -0.0502, -0.0925, -0.1932, -0.1119, -0.0999, -0.2764,
        -0.2753, -0.0773, -0.1057, -0.1346, -0.1123, -0.0120,  0.1554, -0.0116,
        -0.1268, -0.1380,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4160, -0.4841, -0.2495,  0.0981,  0.1583,  0.1484, -0.1642, -0.0095,
        -0.0549,  0.0807,  0.0019,  0.0404, -0.1432,  0.2723,  0.1623,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1184, -0.3774,  0.0515, -0.1150, -0.0575, -0.1378, -0.3336,  0.1165,
        -0.0508, -0.0652, -0.3102, -0.0267,  0.2175, -0.0274, -0.0177,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3543, -0.1730, -0.1097,  0.0248,  0.0817,  0.1086, -0.0718,  0.0160,
        -0.1291, -0.1235,  0.0125, -0.0789, -0.1614, -0.0292,  0.0147,  0.0407,
        -0.1080,  0.0977, -0.0064,  0.0666,  0.0055, -0.1692,  0.0290,  0.1739,
         0.0600, -0.0720,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5911, -0.0277, -0.3280, -0.2985, -0.1964,  0.0371, -0.0695, -0.1558,
        -0.9696, -0.1411, -0.1627, -0.2226, -0.2377, -0.1470,  0.2442,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3838e-02, -1.5752e-01, -9.2330e-02, -6.7584e-02, -3.5276e-02,
        -2.0618e-02, -3.3565e-02, -1.6313e-02, -5.9308e-02, -4.5716e-01,
        -4.9251e-02, -5.7684e-02, -4.7898e-02, -5.9437e-02,  6.3855e-02,
         7.8478e-03,  4.9644e-03,  2.9558e-02, -1.4928e-01, -4.1389e-01,
        -3.1817e-04, -3.0453e-02,  5.8396e-02, -3.8815e-02,  9.1098e-02,
        -9.6399e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0793,  0.1955,  0.0442, -0.0236, -0.0557, -0.1274, -0.0650, -0.0947,
        -0.0035, -0.0985, -0.3333, -0.0053, -0.0198, -0.1002, -0.0780, -0.1104,
        -0.0042, -0.0856,  0.0363, -0.0937,  0.0698,  0.0213, -0.0719, -0.0367,
         0.3280, -0.1145,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.4633, -0.4006,  0.0830, -0.0406, -0.0649,  0.0214, -0.0446, -0.1267,
         0.0862, -0.0803,  0.0304, -0.0641, -0.3101,  0.0286, -0.1260,  0.0791,
        -0.0715,  0.0251, -0.0925,  0.0157, -0.0591, -0.0501,  0.0634, -0.0179,
        -0.2427,  0.0056, -0.0652, -0.0052, -0.0594,  0.0348,  0.0290, -0.0075,
         0.0367,  0.0921, -0.0476, -0.0303,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6336e-02, -2.3845e-01, -1.1165e-01, -1.4786e-01, -9.2064e-02,
        -1.0522e-02, -5.0862e-02,  1.0383e-02, -1.9675e-02, -6.3991e-02,
        -1.5938e-02,  2.1229e-04, -3.1825e-02,  2.1313e-02, -1.0209e-02,
        -1.1205e-02,  2.2622e-02, -2.7894e-02, -4.2576e-03,  2.5566e-03,
        -3.0606e-02,  4.2894e-02, -6.1224e-02,  9.2852e-04, -3.2255e-02,
        -1.6855e-01, -3.6829e-02, -2.0497e-02, -1.3045e-01, -2.7399e-02,
        -3.3428e-02, -5.5780e-02, -6.9460e-02,  3.4725e-03,  5.5255e-02,
        -1.1922e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5613,  0.2670,  0.0964,  0.1114, -0.1226,  0.0860,  0.1111,  0.0382,
        -0.0107, -0.0374,  0.0255,  0.0153,  0.0307, -0.0023, -0.0453, -0.0463,
         0.1520,  0.2380,  0.1195, -0.4492,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6291e-01, -2.8731e-01,  1.6128e-01, -3.4310e-02,  6.4444e-02,
        -3.6160e-02, -6.1207e-02, -4.2387e-02, -9.8275e-03, -5.2102e-02,
        -2.4343e-02, -5.3309e-02,  4.4413e-02, -8.2734e-02, -2.1938e-01,
        -1.4310e-02,  1.0692e-01, -1.0266e-01,  1.6883e-04, -1.8922e-02,
        -1.3447e-01, -4.9906e-02, -1.0579e-02, -7.8141e-02, -6.4257e-02,
         4.4157e-02, -4.4132e-02, -1.5225e-02, -8.5340e-02, -4.8582e-02,
        -4.0700e-02, -4.1645e-02, -3.0775e-02, -2.1962e-01,  3.2754e-02,
        -1.5073e-01, -6.2656e-02, -6.6365e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1153, -0.0342, -0.0613, -0.0825,  0.0022, -0.0138,  0.0378,  0.0196,
        -0.0532, -0.1607, -0.0635, -0.2096, -0.0051, -0.0179, -0.1381, -0.0801,
        -0.0200, -0.0719,  0.0043, -0.0261, -0.0051, -0.0210, -0.0209,  0.0582,
        -0.0227,  0.0329,  0.0095, -0.0450, -0.0247, -0.0575, -0.0713,  0.0341,
         0.0666,  0.0020, -0.0638,  0.0835, -0.1281, -0.0873,  0.0084, -0.0660,
         0.1594], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5746e-01, -1.1173e+00,  1.4748e-01,  3.8735e-02, -1.4599e-01,
        -1.2829e-01, -3.1787e-01,  3.1137e-02,  1.3479e-02,  1.6600e-02,
        -8.2910e-02,  1.4035e-02, -8.2821e-03,  1.0321e-02, -1.5470e-02,
        -3.6985e-02, -4.8149e-02, -7.2713e-03, -9.8095e-04, -1.2152e-01,
        -1.8350e-01,  2.9212e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2568, -0.0814, -0.0455, -0.0254, -0.0531, -0.0875, -0.0244, -0.0039,
        -0.1217,  0.0715,  0.0342, -0.1237, -0.0728, -0.0523,  0.0501, -0.0409,
         0.0033, -0.0579,  0.0150, -0.0896, -0.3479,  0.0858, -0.0925, -0.0352,
         0.0036,  0.0053,  0.0230, -0.0922, -0.3230, -0.0956,  0.0133, -0.0370,
         0.0389,  0.1666,  0.3009,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0058, -0.1864, -0.1901,  0.0896, -0.2146,  0.0806,  0.0397,  0.0540,
        -0.0408, -0.1065, -0.0782, -0.0447, -0.0030,  0.0184,  0.0405, -0.0113,
         0.0592, -0.0148, -0.1268, -0.0353, -0.0988,  0.0081,  0.0004, -0.0723,
        -0.0313, -0.0476, -0.0510, -0.0612,  0.0934,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1238, -0.3181, -0.0893, -0.0404, -0.1106, -0.1701, -0.0479, -0.0069,
        -0.0895,  0.1533,  0.0212, -0.0675, -0.0585, -0.0828, -0.1557, -0.1366,
        -0.4702, -0.1645, -0.0576,  0.0451, -0.2596, -0.2412, -0.1026, -0.0245,
         0.0433,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2762,  0.0666, -0.1528, -0.2797, -0.3844,  0.0387, -0.2669, -0.1478,
        -0.1411, -0.1598, -0.0035,  0.1048, -0.0420,  0.2222, -0.1418, -0.0799,
        -0.0262, -0.0579,  0.0833, -0.0128, -0.0271,  0.3246, -0.0593,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4820,  0.0744, -0.0273,  0.0208,  0.0431, -0.0911, -0.1313, -0.1531,
        -0.0995, -0.0035,  0.0074, -0.0303, -0.1138, -0.2172,  0.1388, -0.1500,
         0.1426, -0.0274, -0.0693,  0.0287,  0.0292, -0.2005,  0.2133,  0.0235,
        -0.1192, -0.0168,  0.0060,  0.0145, -0.1117, -0.0309, -0.0052, -0.1082,
        -0.0885, -0.0507,  0.0434,  0.0795,  0.0375,  0.3619, -0.0588,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2886, -0.0422,  0.1351, -0.1022, -0.5358, -0.1903, -0.1698, -0.1601,
        -0.0903, -0.1495, -0.1331,  0.0468,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.0111, -0.2945, -0.7218, -0.0700, -0.0186,  0.0845, -0.0612, -0.0683,
         0.0411, -0.0383, -0.0203, -0.0830, -0.1046, -0.0138, -0.0880, -0.0676,
        -0.0084, -0.0732,  0.0093, -0.0354, -0.0314, -0.0956, -0.0139, -0.0358,
         0.0785,  0.0384, -0.0096, -0.0174, -0.1198,  0.0330, -0.0204, -0.0401,
        -0.0188, -0.0176, -0.1311, -0.1081, -0.1112, -0.0078, -0.0404, -0.0306,
         0.0276, -0.0199, -0.0575,  0.1174, -0.1302,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4354, -0.4941, -0.0566, -0.0450,  0.2100, -0.0384,  0.4199, -0.1056,
        -0.1044,  0.0016, -0.2251, -0.1276,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2715, -0.9131, -0.2210, -0.1659, -0.0166, -0.1318, -0.0144,  0.0273,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2217, -0.2896, -0.0486,  0.0402, -0.0004, -0.0055,  0.0401,  0.0332,
         0.0901, -0.2261, -0.0427,  0.0142, -0.0481, -0.0225, -0.0231,  0.0667,
         0.0838, -0.1193, -0.0245, -0.0451, -0.0076,  0.0284, -0.0305, -0.0748,
        -0.0325, -0.0014, -0.0180, -0.0141, -0.0139, -0.0121,  0.0291,  0.0107,
         0.0220,  0.0279, -0.0048, -0.0391,  0.0003,  0.0056,  0.0672,  0.0138,
        -0.0044,  0.0089,  0.0175, -0.0009,  0.0049,  0.0351, -0.1783, -0.1090,
        -0.1639, -0.1684, -0.0494,  0.0042, -0.0477,  0.0495, -0.0671,  0.0261,
        -0.0437,  0.0297,  0.0366, -0.0802,  0.0331], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1555, -0.1641,  0.0823, -0.0541, -0.2325, -0.0086, -0.0783, -0.0058,
        -0.0245,  0.0443,  0.0585, -0.0735, -0.0293, -0.0426, -0.1215,  0.0538,
        -0.0726, -0.0170, -0.0663, -0.0722, -0.1335, -0.1315, -0.1061,  0.0691,
        -0.0307, -0.0535, -0.0439,  0.0172, -0.0516, -0.0779, -0.0270,  0.0726,
        -0.0023,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6732e-01, -1.5214e-01, -9.0994e-02, -8.7678e-02,  1.5602e-02,
         3.1572e-02, -9.3565e-02, -3.9582e-02, -3.8762e-02,  6.7833e-02,
         1.4800e-01, -6.2772e-02, -4.5335e-02, -3.7378e-02, -1.5012e-04,
        -4.6032e-02, -9.1169e-02, -1.5231e-01, -7.7473e-01, -1.1947e-01,
         8.5741e-02, -5.9125e-02, -1.3739e-01, -1.5962e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0434, -0.1796, -0.1374, -0.0892, -0.0762, -0.2511, -0.1201, -0.0558,
        -0.0218, -0.1361, -0.0358, -0.2486, -0.3607, -0.0805,  0.0525, -0.0819,
         0.0263, -0.0442, -0.1176, -0.0153, -0.0717, -0.0986,  0.1395, -0.0653,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3178, -0.2048, -0.1078, -0.0226,  0.1013,  0.0063, -0.1198, -0.1119,
         0.0655, -0.0990,  0.0250, -0.0762,  0.0123, -0.0186, -0.0725, -0.1127,
        -0.0704,  0.0923,  0.0239, -0.0506, -0.0823, -0.0828, -0.1679, -0.1734,
         0.0014,  0.0384, -0.1065,  0.0212,  0.2423, -0.0190, -0.1061, -0.0015,
        -0.0136,  0.0454,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0229e-02, -2.7831e-01,  6.1870e-02,  1.0523e-02, -8.4645e-02,
         6.0117e-02,  2.0728e-03, -3.0435e-02, -1.6977e-02, -2.2646e-01,
        -5.9843e-02, -3.3315e-02, -1.8047e-02, -4.4565e-01, -1.6397e-01,
        -1.2785e-01, -3.5608e-02, -2.4137e-04, -6.1857e-02, -2.5121e-02,
         2.5802e-02, -3.1866e-02,  9.6040e-02, -1.0556e-01,  4.7762e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1435,  0.1095,  0.0996, -0.0011,  0.0772,  0.0626,  0.1337,  0.0587,
         0.1442,  0.6187,  0.1912, -0.0048,  0.0419, -0.0258, -0.1067, -0.0283,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0263,  0.3085,  0.0877,  0.1094,  0.0838,  0.0932,  0.7306,  0.1159,
        -0.1126,  0.0426,  0.0227, -0.0687,  0.1054,  0.0562,  0.0995, -0.0440,
         0.0499,  0.1222,  0.1175,  0.0024, -0.0611, -0.1362, -0.0286,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0866,  0.3702,  0.0326, -0.0145,  0.4061,  0.1864,  0.0857,  0.5196,
         0.2611,  0.3381, -0.0024, -0.0474, -0.5255,  0.4352,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.5683, -0.1281, -0.1131, -0.3841, -0.0778,  0.0666, -0.0128,  0.0249,
        -0.0358, -0.0536, -0.1134, -0.0787, -0.1923, -0.0087, -0.0191,  0.1148,
         0.0737, -0.2750, -0.1588, -0.1542, -0.1221, -0.0513,  0.0109, -0.1024,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ -2.3050,  13.6413,   6.7579,  -3.0281,   0.6754, -12.9872,  -0.0539,
         -1.2605,  -2.6362,  13.3042,   8.2561,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9575, -0.4859, -0.2201, -0.4142, -0.1052, -0.1766, -0.2045,  0.1459,
        -0.1558,  0.1472,  0.0372,  0.0215, -0.0694, -0.0826, -0.2844, -0.1153,
        -0.3369, -0.3025, -0.1104,  0.1384, -0.0263,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6838, -0.2253, -0.1417, -0.3376,  0.0360, -0.2306, -0.3251, -0.3402,
        -0.1368,  0.0296, -0.0380,  0.0453,  0.2251, -0.5123,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2961, -0.0515, -0.0633,  0.0231, -0.0304, -0.0094, -0.1463,  0.1333,
         0.2166, -0.0657, -0.2374, -0.2667, -0.3496, -0.0470, -0.0381, -0.0137,
         0.0389, -0.2604, -0.2218,  0.0036, -0.3783,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3801e-01, -2.8342e-01, -1.1844e-01, -6.9203e-02, -2.6113e-01,
        -4.0769e-01, -3.1240e-04, -3.9794e-01, -3.6760e-01, -1.3042e-01,
        -1.7360e-01, -1.2745e-01, -4.0205e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4084e-01, -4.6808e-02,  9.6780e-03,  7.8043e-04,  2.4986e-03,
        -6.7917e-03, -2.9192e-02, -5.6200e-02, -5.3003e-03, -8.6122e-02,
        -5.1706e-02,  2.3730e-02, -2.9890e-03, -2.8433e-02, -3.5284e-02,
        -4.1647e-02,  4.9693e-03,  3.4354e-02, -1.5603e-02,  6.6800e-03,
         1.3122e-02, -3.3672e-04, -8.5663e-02,  2.8945e-03, -2.0985e-01,
         4.9752e-03, -6.1367e-02, -8.2609e-02, -2.2858e-02,  2.1774e-06,
        -1.2448e-02, -4.9825e-02, -3.5842e-02,  2.6808e-02,  7.3342e-02,
        -2.9628e-03, -2.6701e-02, -1.2885e-03,  2.9665e-03, -2.1039e-02,
        -3.0361e-02,  2.8203e-02,  2.5554e-02,  1.5935e-02,  4.9563e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5966, -0.1291, -0.0719, -0.0655, -0.0237, -0.1536, -0.0655, -0.0406,
        -0.0228, -0.0500, -0.1158,  0.0286,  0.0446, -0.1152, -0.0978,  0.1034,
        -0.0456,  0.0167, -0.0349, -0.0838, -0.0090, -0.3763,  0.0143, -0.1129,
        -0.0064, -0.0054,  0.0431, -0.0935, -0.3069, -0.1442, -0.0066,  0.2241,
        -0.2029, -0.1617,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1648, -0.6693, -0.0988, -0.0418, -0.0111, -0.0231,  0.0782, -0.0370,
        -0.2256, -0.0516, -0.0541, -0.0197,  0.0094,  0.0582,  0.0514,  0.4045,
         0.0858, -0.0774,  0.0336, -0.1996, -0.4614, -0.1241, -0.1994, -0.0106,
         0.0107, -0.5664,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0668e+00, -1.8532e+00, -9.5764e-01, -1.8529e+00,  2.6196e+00,
        -2.5658e+00, -1.5038e-01, -4.3998e-01, -6.3253e-01, -4.0671e-01,
         1.7844e-01, -9.1657e-05,  1.8438e-02, -4.3189e-01,  1.0697e-01,
        -1.2945e-03, -3.1645e-01,  3.4242e-01, -5.5281e-01,  1.7823e-01,
        -5.9300e-01,  6.5570e-01, -5.5042e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4041e-01, -1.7011e-01,  8.4877e-02, -4.6507e-02, -1.5159e-02,
         1.1441e-02,  1.7598e-02, -3.1298e-02, -1.2570e-01, -4.5968e-02,
         3.9670e-02,  4.9990e-02,  3.0360e-02,  2.8655e-02, -3.1088e-01,
         1.9177e-04, -8.2154e-02, -6.0359e-02, -4.9204e-02, -2.0705e-02,
         9.7995e-03, -3.6390e-02,  5.1289e-03,  1.3867e-01, -4.1688e-03,
         2.9540e-02, -1.6255e-01, -7.9422e-02, -8.4017e-03, -2.0785e-04,
        -3.2484e-02,  6.3706e-03,  4.2034e-02,  4.4784e-04,  2.4894e-02,
         4.1621e-02,  4.2917e-02,  2.9904e-02,  9.5856e-03,  1.1646e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0518e-01, -1.4209e-01,  2.9118e-02, -8.3292e-02, -7.7651e-02,
        -8.7657e-02, -7.6458e-03, -9.5232e-02,  1.8962e-03, -6.7034e-02,
        -3.0589e-02, -1.7025e-02, -2.0551e-02, -1.1800e-01, -6.0609e-02,
        -7.5320e-02, -6.9582e-03,  6.0569e-02,  2.4489e-02,  4.4667e-04,
        -8.4696e-02, -3.0316e-02, -8.4282e-02, -9.1481e-02, -3.9882e-02,
        -3.3077e-02,  1.6393e-04, -7.8006e-02, -3.1722e-02,  8.2793e-02,
        -1.9473e-01,  7.7429e-02, -3.1072e-01,  4.8709e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.3875, -0.0498, -0.0604,  0.0160,  0.0058,  0.0427, -0.0095,  0.0385,
         0.0141,  0.0794, -0.0178,  0.0233,  0.1109, -0.0461, -0.2018,  0.0098,
        -0.0278,  0.0084, -0.0716,  0.0281, -0.0423, -0.0113, -0.0366,  0.0051,
        -0.0886, -0.2427, -0.0929,  0.0258,  0.0337, -0.0498,  0.0314, -0.0348,
        -0.0085, -0.0221, -0.0120, -0.0058, -0.0226, -0.0339, -0.0254, -0.0451,
        -0.0550,  0.0140, -0.0489, -0.0352, -0.0284, -0.0005,  0.0072, -0.0138,
        -0.0095, -0.0438,  0.0131,  0.1109], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4159, -0.0111,  0.0853,  0.0986, -0.2263, -0.0888, -0.0459, -0.0173,
         0.0435, -0.0738, -0.0329,  0.0171, -0.1190, -0.1740, -0.1553, -0.0514,
        -0.2931,  0.1339, -0.0291,  0.3147, -0.0539, -0.0273, -0.0576, -0.1718,
        -0.0368, -0.0440,  0.1067, -0.0009, -0.0079,  0.1281,  0.0319,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1313, -0.0752, -0.0804, -0.0411, -0.0574,  0.0225, -0.1665,  0.0047,
        -0.0721,  0.1172, -0.1171, -0.0631, -0.0693,  0.0101,  0.0919, -0.1447,
         0.1897,  0.0338, -0.1938, -0.0126, -0.0757, -0.0499, -0.2054,  0.1386,
        -0.0850,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5351,  0.1300, -0.0369, -0.0787, -0.2362, -0.1926, -0.3701,  0.0973,
         0.2894, -0.1807, -0.0609, -0.1781,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1460, -0.6592, -0.0467, -0.2429, -0.1046, -0.0182, -0.0487, -0.0046,
        -0.0671, -0.1327,  0.0308,  0.0879, -0.1412, -0.1230, -0.2017, -0.1052,
        -0.0181,  0.0035, -0.0320,  0.0492, -0.0503, -0.0530,  0.0767,  0.1025,
        -0.0144, -0.1113,  0.0639, -0.0013, -0.2904,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0985, -0.6366,  0.0454, -0.2766,  0.0176, -0.1275, -0.3715,  0.1230,
        -0.1439, -0.0439,  0.0627, -0.1171,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3461, -0.2385, -0.3539, -0.2404, -0.1462, -0.0615, -0.4810,  0.0010,
        -0.2646, -0.1862, -0.2315, -0.0654,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4297,  0.0833,  0.5372, -0.4709,  0.4452,  0.3235,  0.1380,  0.0058,
         0.2963,  0.0691, -0.0442,  0.2423,  0.0142,  0.2085,  0.2553,  0.0875,
         0.1089,  0.0192, -0.4455,  0.4218,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7537e-01, -6.2875e-01,  1.2135e-03,  8.5265e-03, -4.1827e-01,
        -9.4834e-02,  1.2291e-01,  1.3897e-01, -1.9955e-02, -9.5992e-02,
         7.9914e-02,  2.6794e-02, -4.9974e-02, -6.6605e-02, -6.1146e-04,
         3.8294e-02, -2.8095e-02,  3.0206e-02, -4.4359e-02, -2.9604e-02,
        -1.0310e-01, -1.7506e-02, -4.5905e-02,  5.2959e-02, -1.6384e-01,
         7.3213e-02,  2.5425e-05, -9.1535e-03, -5.0333e-02,  1.1815e-01,
        -7.3471e-03,  3.6436e-02, -5.1635e-02, -1.5292e-01,  1.1033e-02,
         1.3714e-01,  2.0155e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9305, -0.9457, -0.1652, -0.5237, -0.2546,  0.0451,  0.0735,  0.0027,
        -0.2882,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-32.9178,   5.6446, 170.3633,   1.6841, -20.6142,  -1.8383,  21.7524,
         -3.5314,  -5.5602,  -5.0929,  -4.4357, -13.2616,  -3.1904,  -4.0455,
        -14.9119,  -9.7319,   3.0440, -33.2163,  19.9115,   9.7067,   1.2549,
         -7.0921, -12.9861,  -7.3866,   8.1388,  55.3477,  15.4236,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0579, -0.4141, -0.0448, -0.0350, -0.0602, -0.0740,  0.0227, -0.0416,
        -0.0173,  0.0138, -0.0284, -0.0634, -0.0548,  0.0688, -0.0500,  0.0598,
        -0.0736,  0.0183, -0.0076,  0.0659,  0.1699,  0.0104, -0.0259,  0.0541,
        -0.0400,  0.0373, -0.0300, -0.0532, -0.4455, -0.0108, -0.0250,  0.0081,
        -0.0186, -0.0638,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.4994, -0.0041,  0.0141,  0.0824, -0.0746, -0.0501, -0.0546, -0.0248,
        -0.1048,  0.0441, -0.2111, -0.3010, -0.1264, -0.0466, -0.0952, -0.0244,
        -0.0927, -0.0403, -0.0538, -0.0075,  0.0528, -0.0740, -0.1410,  0.0011,
        -0.0444, -0.0487, -0.0143, -0.0918,  0.0289, -0.0279, -0.0464, -0.1346,
        -0.1930,  0.0629,  0.0509, -0.0681, -0.0863,  0.0215,  0.0314,  0.0299,
        -0.0730, -0.2422,  0.0319,  0.0434,  0.3207,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1113,  0.0956,  0.0913, -0.1163, -0.0726, -0.1182, -0.4531, -0.0080,
        -0.1943, -0.1422,  0.1380, -0.0458, -0.1209, -0.0303, -0.0225, -0.1508,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0429, -0.1360, -0.1494, -0.1000, -0.0497, -0.0777,  0.0235,  0.1005,
        -0.0716, -0.1212, -0.1026, -0.0062, -0.2284, -0.0520, -0.0096,  0.0601,
        -0.0255, -0.0120,  0.0346, -0.0065, -0.0291,  0.0819, -0.0262, -0.0998,
         0.0568,  0.0318, -0.2307,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0187,  0.1388, -0.0934, -0.0300, -0.1003, -0.0411, -0.0745, -0.0160,
        -0.0290, -0.0319, -0.0299, -0.0273, -0.0353, -0.0164,  0.0167, -0.0207,
        -0.0278,  0.0219, -0.0323, -0.0641,  0.0234, -0.0164, -0.0306, -0.1657,
         0.0248,  0.0213, -0.0580, -0.0084,  0.0218,  0.0193,  0.0105, -0.0762,
        -0.0321, -0.0074, -0.0790, -0.0434, -0.0555, -0.0960, -0.1026, -0.0805,
        -0.0452, -0.0781, -0.0100,  0.0139, -0.0936, -0.0244], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2177,  0.0671, -0.0352, -0.0334, -0.0397, -0.0382, -0.0503, -0.0503,
         0.0274, -0.0042, -0.0168,  0.0007, -0.0359,  0.0150, -0.0098,  0.0608,
         0.0127, -0.0354, -0.1665, -0.5637, -0.1625, -0.0582,  0.0485,  0.0418,
        -0.0019,  0.0580, -0.1092, -0.1263, -0.0848,  0.2930,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0878, -0.0632, -0.1723, -0.0516, -0.1326, -0.1341, -0.1402,  0.1150,
         0.2571, -0.0340, -0.0251,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0276, -0.3232,  0.0700,  0.0609, -0.0946,  0.0339, -0.1042, -0.2826,
         0.0545, -0.1763,  0.1055, -0.0656, -0.0650, -0.0549,  0.0766,  0.0340,
        -0.0706, -0.0617,  0.0134, -0.2174,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2724,  0.6776,  0.2798,  0.2374, -0.2658,  0.2064,  0.3401,  0.0151,
         0.0518,  0.6884, -0.0971,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0185, -0.2488, -0.1627, -0.2116, -0.0671,  0.2484, -0.1583, -0.1564,
        -0.1986, -0.0109,  0.0302, -0.0810, -0.0712,  0.1546, -0.1230, -0.1946,
        -0.0042, -0.0887,  0.0186, -0.0399,  0.0219,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0676, -0.1987, -0.2023,  0.0212, -0.1617, -0.4474,  0.1127, -0.0276,
         0.0052, -0.0698,  0.0190, -0.0346,  0.0136, -0.0385, -0.0013, -0.0849,
         0.0226,  0.0591,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1765, -0.2601, -0.2780, -0.0460,  0.0221, -0.0526,  0.0459,  0.0700,
        -0.0190, -0.0401,  0.0572, -0.0151,  0.0638, -0.0103, -0.1489, -0.0320,
        -0.0670, -0.1890, -0.0728, -0.0104, -0.0160, -0.0725, -0.2258, -0.0168,
        -0.0211, -0.0764, -0.0790, -0.0399, -0.0117, -0.0935, -0.0405, -0.0255,
        -0.1055, -0.0393, -0.0253, -0.0106, -0.1153,  0.0513,  0.0013,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0315, -0.6224, -0.0734, -0.1077, -0.0402, -0.0537,  0.0027, -0.0299,
         0.2543,  0.0364,  0.0704, -0.1678, -0.0023, -0.0713, -0.0773,  0.0123,
        -0.0581,  0.0334, -0.0065, -0.0519, -0.0504, -0.0356,  0.0175,  0.0116,
         0.0759, -0.0789, -0.0476, -0.0082, -0.1242, -0.1475,  0.0046,  0.0141,
        -0.1728, -0.0299,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.2600, -0.4820,  0.1429, -0.0715, -0.0531, -0.2462, -0.0173,  0.6623,
        -0.1064,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2713, -0.3119, -0.0738,  0.1319,  0.1113,  0.0253, -0.0617,  0.0018,
        -0.0032, -0.0105, -0.0651, -0.0198, -0.0008, -0.0536, -0.0621, -0.0712,
        -0.0169, -0.6076, -0.0849, -0.0832, -0.0572, -0.0048,  0.0544, -0.0274,
        -0.0430, -0.0296, -0.0121, -0.0583, -0.0969, -0.0788, -0.0160, -0.0693,
        -0.0463, -0.0265,  0.0187, -0.0260,  0.0089, -0.0881, -0.1154,  0.0360,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2660, -0.0329, -0.3257, -0.0087, -0.0197, -0.0723,  0.0279,  0.0278,
        -0.0093, -0.1067,  0.0056,  0.0096,  0.0308,  0.0403,  0.0592,  0.0607,
         0.0264,  0.0656, -0.0736, -0.0232, -0.0307, -0.0718,  0.0450, -0.0436,
         0.0608, -0.0745, -0.0783,  0.0470, -0.1805, -0.0892, -0.0753, -0.0686,
         0.0093, -0.0509, -0.0408, -0.1528, -0.0678, -0.0835, -0.0083,  0.0482,
        -0.0513,  0.0375, -0.0510,  0.0089,  0.0502,  0.0203,  0.0249,  0.0346,
        -0.0212, -0.1095, -0.0575,  0.0008,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5733e-01, -4.1378e-01, -9.5397e-02, -1.0912e-01, -3.2312e-02,
         1.5991e-01, -1.0327e-01, -3.6971e-02, -8.5337e-02,  1.0029e-04,
         2.3172e-02,  4.6504e-02, -5.9692e-02,  2.2745e-03, -1.0984e-01,
        -1.5594e-01, -1.8432e-01, -9.2196e-02, -1.1660e-01, -1.1702e-01,
         1.1806e-01,  3.0385e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2204,  0.8610, -0.1301, -0.0203, -0.0513,  0.1645,  0.2802,  0.1064,
        -0.0677,  0.0463,  0.1989,  0.0169, -0.0241, -0.1107,  0.0099,  0.1339,
         0.1286, -0.0183, -0.1623, -0.1425,  0.1170,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7397e-01, -9.8037e-02, -8.6696e-04, -9.3680e-02,  7.3149e-02,
        -4.0505e-03, -5.7792e-02,  2.7912e-02, -4.7824e-02, -1.7263e-02,
        -1.8416e-02, -3.8728e-02, -1.7426e-02,  3.0261e-02,  4.3254e-02,
         6.5573e-03,  1.1516e-02,  1.5620e-02, -3.5902e-02, -1.4277e-02,
        -1.3045e-02,  4.3013e-02,  2.1969e-03, -1.0298e-02, -4.2279e-02,
         3.8820e-02,  6.1178e-03, -5.3092e-02, -1.4260e-04,  2.3696e-02,
        -1.3962e-02, -3.3059e-02, -1.4855e-02,  2.9043e-03, -5.1089e-02,
        -5.9837e-03, -1.0162e-03, -7.0575e-03, -2.2487e-02, -3.2664e-02,
        -5.6546e-01, -4.9707e-02, -2.0823e-02,  2.5722e-03, -8.5235e-02,
        -3.9697e-03, -6.0184e-02, -6.7524e-02, -2.1608e-02,  2.6880e-02,
        -2.1236e-02,  3.1890e-02, -1.1345e-02, -2.8819e-02, -2.5069e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2939, -0.1681,  0.1593, -0.1067, -0.0602, -0.0779, -0.0522, -0.0117,
         0.0342,  0.0204,  0.0369,  0.0160, -0.0754, -0.0320,  0.0293, -0.0389,
         0.0220, -0.0126, -0.0708, -0.0747, -0.0709, -0.0515, -0.0390,  0.0388,
         0.0460,  0.1174, -0.1298, -0.2554, -0.0712, -0.0641, -0.0350, -0.0350,
        -0.1013, -0.3246, -0.0609, -0.1338, -0.0179, -0.0709, -0.0125, -0.0202,
        -0.0362,  0.0236,  0.1924,  0.0553, -0.0647,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0308,  0.4990,  0.1297,  0.1958, -0.1143,  0.0512,  0.0852,  0.0281,
         0.0892, -0.0390, -0.0966, -0.0031,  0.0321,  0.1084,  0.1033,  0.0606,
         0.0436,  0.1808,  0.0654,  0.0622,  0.1980,  0.1617, -0.0343,  0.0409,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0004,  0.1177,  0.0563, -0.0321,  0.1172,  0.0763,  0.0723,  0.0360,
         0.0552, -0.0202,  0.0153,  0.0339, -0.0058,  0.0418,  0.0341, -0.0440,
         0.1958,  0.0255, -0.0090,  0.0745,  0.0613, -0.0647,  0.0954,  0.0393,
        -0.0002,  0.0600,  0.1510,  0.1040,  0.0080,  0.1455, -0.0149,  0.0716,
         0.0217,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3907, -0.5580, -0.2302, -0.2093, -0.0848, -0.1527, -0.0262,  0.0049,
        -0.3287, -0.1064,  0.0344, -0.0153, -0.0816, -0.0124, -0.0039, -0.0856,
        -0.0162, -0.2406, -0.1739,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5674, -0.6570, -0.1597, -0.0608, -0.0199,  0.0644, -0.1193, -0.0600,
        -0.2867, -0.0478,  0.0591, -0.0332,  0.0049, -0.0201,  0.0262, -0.0095,
         0.0239,  0.0387, -0.0094, -0.0989, -0.1087, -0.2236, -0.0874,  0.0562,
         0.2110,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1235, -0.2338, -0.0133, -0.1074, -0.1757, -0.0850, -0.2780, -0.0240,
        -0.1054, -0.0940,  0.0449, -0.0329, -0.0253, -0.0513, -0.0819,  0.0158,
         0.0695,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0079, -0.0054,  0.0227,  0.0549,  0.1023,  0.0344, -0.0681,  0.0568,
        -0.2456,  0.2277,  0.0346,  0.0444,  0.0593, -0.0895,  0.1624,  0.0938,
         0.1470,  0.1198, -0.0460,  0.0838,  0.0642,  0.1627,  0.4513,  0.0074,
         0.2351,  0.0499,  0.0272,  0.1000,  0.2007, -0.1077, -0.0384,  0.0689,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1095, -0.4079, -0.1965, -0.0729, -0.0919, -0.0675, -0.1499, -0.1630,
        -0.0525, -0.0736,  0.0258, -0.0207,  0.1901, -0.0814, -0.0039, -0.0165,
        -0.0777, -0.0257, -0.0686, -0.0072, -0.0843, -0.0208, -0.0713, -0.3044,
         0.0434, -0.0758, -0.2395,  0.0747,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1380, -0.3961,  0.0926, -0.0418,  0.0626, -0.0992, -0.2748, -0.0923,
        -0.0689, -0.1000, -0.0316, -0.0319, -0.0209, -0.0756, -0.0361, -0.1797,
        -0.0570, -0.0797, -0.0138, -0.1223, -0.1151,  0.0285,  0.0484, -0.1023,
        -0.1079,  0.0920, -0.1273, -0.0334, -0.1969,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3203,  0.0612, -0.0656, -0.0291, -0.5818, -0.1152,  0.0695, -0.3968,
         0.0607,  0.1651,  0.3318, -0.0933, -0.1147, -0.4945, -0.3011, -0.0449,
        -0.1148, -0.4108, -0.1691, -0.0366,  0.2102,  0.0091,  0.0072, -0.0028,
        -0.1053, -0.0135, -0.0140, -0.0859, -0.0467,  0.0189, -0.0081,  0.1007,
        -0.4203,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2350, -0.2520, -0.0716, -0.0314, -0.0758, -0.1261,  0.0217, -0.0330,
        -0.0754, -0.1448, -0.1046, -0.0036, -0.0446, -0.0147, -0.1091, -0.0103,
        -0.0151, -0.0834, -0.2556,  0.0420, -0.2718, -0.0624, -0.1459,  0.3042,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2708, -0.2147, -0.1495, -0.0312, -0.1148, -0.2003, -0.1185, -0.1152,
        -0.0554, -0.0955, -0.0474, -0.0085, -0.0983, -0.0656,  0.0028, -0.0064,
        -0.0165, -0.0647, -0.0261, -0.3648, -0.0953, -0.0195, -0.0171,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0032, -0.1843, -0.0553,  0.0589, -0.0287, -0.1554, -0.0523,  0.0086,
         0.0032, -0.0567,  0.0172, -0.0766, -0.0705, -0.1055, -0.0555, -0.0680,
        -0.0710, -0.1133, -0.0695, -0.1017, -0.0059, -0.1041, -0.0310,  0.0228,
        -0.0763, -0.0219, -0.0042, -0.0289, -0.1085, -0.1038, -0.1336, -0.0374,
        -0.0429,  0.0690, -0.0381, -0.0521, -0.0511, -0.1035, -0.0324,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0927, -0.2982, -0.1453,  0.0053, -0.2371, -0.0447,  0.0408,  0.0256,
        -0.0572, -0.0220, -0.0294, -0.0296, -0.0182, -0.0093,  0.0233, -0.0546,
        -0.0583, -0.0045,  0.0271,  0.0108, -0.1226, -0.0145, -0.0205, -0.0409,
        -0.0131, -0.0693, -0.0770, -0.0384, -0.0362, -0.0492, -0.0699, -0.0132,
        -0.0809, -0.0401, -0.0675, -0.0050, -0.0596, -0.1419, -0.0312, -0.0230,
        -0.0529, -0.1136, -0.1088,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1348, -0.4750,  0.0402,  0.0265, -0.0717,  0.0262, -0.0473, -0.0168,
        -0.0902, -0.0323, -0.0358,  0.0137, -0.0631, -0.0344,  0.0066, -0.0242,
        -0.0391, -0.0704,  0.0013, -0.0094,  0.0186, -0.0409, -0.0433, -0.0609,
        -0.0426, -0.0320, -0.0669, -0.0226,  0.1048, -0.1020,  0.0953,  0.0577,
        -0.1049,  0.1044,  0.0380, -0.0219,  0.0744,  0.0460, -0.0899, -0.0049,
        -0.0722, -0.1025, -0.0081,  0.0551,  0.0530, -0.0618, -0.0030, -0.0222,
         0.0383, -0.0098, -0.0265, -0.0546, -0.0083, -0.0254, -0.0646, -0.0358,
         0.0271], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1562, -0.6508,  0.0076, -0.1450, -0.1104, -0.1619,  0.0013, -0.2634,
        -0.2243,  0.1052, -0.1066,  0.0510, -0.0187, -0.1671,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2027, -0.3957, -0.1916, -0.0740, -0.0314, -0.0835, -0.0325, -0.0498,
        -0.0859, -0.0570, -0.0769, -0.4319, -0.0861, -0.0573, -0.1008, -0.1391,
         0.0137, -0.0992, -0.0022, -0.0824, -0.2791, -0.0932, -0.0690, -0.0168,
        -0.0458,  0.0489, -0.0012, -0.0054,  0.1636,  0.2771,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3244,  0.3705,  0.1172,  0.6348,  0.1677, -0.0733,  0.0618, -0.1200,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.5453,  0.1903,  0.1290,  0.0718,  0.0518,  0.3494, -0.0398, -0.0368,
        -0.0584,  0.0132,  0.0890,  0.0586,  0.0817,  0.2230,  0.0489,  0.0116,
         0.0860,  0.0103, -0.0427,  0.0261, -0.0963,  0.0502,  0.1096,  0.1053,
         0.1871,  0.1214,  0.0788, -0.0603,  0.1132,  0.2689, -0.1393, -0.2925,
        -0.2467,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4247e-02, -2.5154e-01, -1.4125e-01, -3.3480e-02, -1.2775e-02,
        -1.7953e-02,  6.8487e-03, -3.0335e-02, -5.5649e-02, -5.0026e-02,
         4.1710e-03,  1.7062e-03, -3.4572e-02, -4.5169e-02,  1.3431e-03,
         2.9833e-03, -3.3054e-02, -3.2663e-02, -2.4820e-02, -5.5090e-03,
         3.1460e-03,  2.3710e-03, -4.4594e-02, -3.7570e-02,  4.3822e-02,
        -4.6713e-02, -1.5591e-02, -8.6224e-02,  8.0678e-03,  5.8975e-05,
        -1.6071e-02, -2.2388e-02, -5.4800e-02, -2.2356e-01, -3.2527e-02,
         1.0528e-03, -4.8276e-02, -3.1352e-02, -2.2627e-02, -2.5610e-03,
         1.0631e-02, -2.6360e-02, -1.7930e-02,  5.5077e-03, -2.7978e-02,
         8.9933e-02, -8.4044e-02, -1.3716e-02, -9.9669e-02, -9.1362e-02,
        -3.0383e-02, -5.1320e-02, -9.4790e-03, -8.8958e-03,  6.8474e-03,
         3.5314e-02, -2.4334e-02, -2.7527e-03, -6.4574e-02, -9.4909e-02,
        -5.6862e-02, -3.9005e-03, -1.3557e-01, -9.2895e-02, -1.3969e-02,
        -3.1701e-02, -7.7952e-03,  2.4886e-03, -6.1057e-02,  4.6108e-02,
         6.2616e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6405e-01, -1.3161e-01, -8.5086e-02,  8.7554e-02, -1.2392e-01,
        -1.0437e-02, -6.6011e-01, -7.1159e-02, -3.8253e-02,  2.7905e-02,
         2.7318e-02,  4.8530e-03, -3.3279e-02,  2.7195e-02, -3.2249e-02,
        -8.1711e-02, -8.4510e-03, -3.0110e-02, -6.4056e-02, -8.4338e-02,
         1.3004e-01, -2.8920e-03,  1.7977e-01, -1.4545e-01,  6.5278e-03,
        -1.5125e-02, -7.1065e-02,  7.1060e-05,  1.0284e-01, -7.7560e-02,
        -1.2949e-02, -5.2706e-02, -3.3836e-02, -4.3165e-02, -3.3611e-02,
        -5.2536e-02, -6.4289e-02, -1.6373e-02, -3.9897e-02,  3.5241e-02,
        -1.3134e-01, -1.3760e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0652,  0.5394,  0.0763, -0.0015,  0.1236, -0.0949,  0.0399,  0.0508,
        -0.1236,  0.1252,  0.2761,  0.0491,  0.2302,  0.0175,  0.0252, -0.0740,
        -0.0384, -0.0014,  0.1911,  0.0622, -0.0010,  0.0217,  0.0814, -0.0291,
         0.2459,  0.0851,  0.0959,  0.0977,  0.2211,  0.0465,  0.0566, -0.0273,
         0.0187,  0.1425,  0.0172, -0.5671,  0.1656,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0395, -0.6676, -0.0051, -0.1752, -0.0881,  0.0096, -0.0433, -0.0029,
         0.0888, -0.0077, -0.0433, -0.0975, -0.1191, -0.0492, -0.0150,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1940,  0.3727, -0.0647,  0.1002,  0.1414,  0.1083, -0.0042, -0.0138,
         0.4156,  0.0128,  0.0090, -0.1177,  0.2146, -0.0057, -0.0552, -0.0042,
         0.1810, -0.0142, -0.0294,  0.1662,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3483,  0.0980,  0.0811,  0.0982, -0.6654, -0.0972, -0.0509, -0.0038,
         0.0123, -0.0499, -0.0130, -0.0595, -0.0476, -0.0634, -0.0459, -0.0630,
        -0.0295, -0.0439, -0.0744, -0.0539, -0.0647,  0.0553, -0.1040, -0.0339,
        -0.0505, -0.0192, -0.3349, -0.0291,  0.1011, -0.0172, -0.0493,  0.0011,
         0.0715,  0.0080,  0.0340, -0.1034,  0.0417, -0.0135, -0.0550, -0.0183,
        -0.0591,  0.3034,  0.3009,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0649,  0.0779, -0.0288, -0.1110, -0.0171, -0.0585, -0.0675, -0.2917,
        -0.0600,  0.0155, -0.0065, -0.0184, -0.0416, -0.0310, -0.1205, -0.0169,
        -0.0008,  0.0304, -0.0066, -0.0221, -0.0882, -0.0272, -0.0323, -0.0463,
        -0.0598, -0.0110, -0.0554, -0.0792, -0.1321, -0.0469, -0.0090, -0.0764,
        -0.0598, -0.0239, -0.0466, -0.0455, -0.1141, -0.1274,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1099, -0.3941, -0.0092, -0.4544, -0.0241, -0.1513, -0.0829,  0.2132,
         0.0726, -0.3944, -0.0333, -0.0255, -0.0955, -0.0082,  0.0311,  0.1558,
         0.0317, -0.0261,  0.0479, -0.0086,  0.1345, -0.1746,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1152, -0.0608,  0.1175,  0.0224, -0.0074,  0.0608, -0.0390, -0.0680,
         0.0800, -0.1357,  0.0260,  0.0146, -0.0558,  0.0179, -0.0434, -0.0590,
        -0.4370, -0.0378, -0.1348, -0.0446, -0.0428,  0.0791, -0.1473, -0.0702,
        -0.1139, -0.0432, -0.0145,  0.1053, -0.0037, -0.0226,  0.0220, -0.2238,
         0.0051, -0.0302, -0.0304, -0.0890, -0.0168,  0.0510, -0.0549, -0.2285,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1530,  0.1524, -0.0469, -0.2109, -0.0629, -0.0462,  0.0416,  0.0548,
        -0.1106, -0.1197,  0.0300, -0.0005, -0.0704, -0.1466, -0.2150, -0.1392,
         0.1180, -0.0496, -0.2372, -0.0636, -0.0732, -0.0118, -0.0290, -0.0480,
         0.0075, -0.2608, -0.0228, -0.1028, -0.0956, -0.0057, -0.0565, -0.0105,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0695e-01,  1.2142e+00, -6.5476e-02,  3.6364e-02,  9.3357e-02,
         7.5979e-02,  5.1220e-02,  1.3496e-01,  2.8208e-01,  1.0943e-01,
         2.9347e-02,  6.5888e-02,  2.9011e-02,  4.4133e-02,  3.3175e-02,
         9.0311e-02,  6.8465e-02,  6.9596e-02,  1.1192e-02,  3.2989e-02,
         2.7417e-02,  1.3254e-01,  6.7826e-02,  1.4800e-01, -2.2839e-02,
         4.7570e-02,  9.4724e-02,  1.2571e-01, -4.8793e-02,  5.2335e-02,
        -5.9871e-02,  8.6329e-02,  5.2617e-03,  1.5824e-01,  1.2100e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.3411, -0.1591, -0.2360, -0.2288, -0.0065, -0.0565, -0.1722, -0.2507,
         0.0795, -0.1358,  0.0921, -0.0875, -0.0261, -0.1298, -0.0234,  0.0530,
        -0.0313, -0.0502, -0.0811, -0.1761, -0.2623,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2114,  0.7407,  0.0265,  0.2297,  0.2758,  0.1718, -0.0476,  0.2298,
         0.2837, -0.0917, -0.0376, -0.2240, -0.0487, -0.0634,  0.1694, -0.0294,
         0.1004,  0.1754, -0.0869,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2107, -0.4759, -0.1143, -0.0775,  0.0146, -0.0973,  0.0171,  0.0009,
         0.0208, -0.1913, -0.3150,  0.0140, -0.0993, -0.3129, -0.0338, -0.0229,
        -0.0522, -0.0729,  0.2041,  0.3883,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0590, -0.5582,  0.0595, -0.2169, -0.0474,  0.0042,  0.1508, -0.0349,
         0.0096, -0.0607, -0.0685, -0.0661,  0.0444, -0.0727, -0.0612, -0.0900,
        -0.1561, -0.0658,  0.0052, -0.0880,  0.0172, -0.0837,  0.0173,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0048, -0.2113, -0.0019,  0.0211,  0.0186, -0.0980, -0.0415, -0.0345,
         0.0226, -0.3658, -0.1823, -0.0747, -0.0690, -0.3704, -0.0067,  0.0745,
         0.1245, -0.0408, -0.0557,  0.0699, -0.0874,  0.0230, -0.1084,  0.1347,
        -0.2099,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2403,  0.7472,  0.0736,  0.0876,  0.0057,  0.0904,  0.0321,  0.1289,
        -0.0598, -0.0481,  0.1042, -0.0096,  0.0112,  0.0165, -0.0771,  0.0317,
        -0.0106, -0.0252, -0.1045,  0.0214,  0.0428,  0.0663,  0.1215,  0.0876,
         0.0684,  0.0665,  0.0357, -0.0187,  0.1207, -0.0309,  0.0534, -0.0160,
         0.0192,  0.0896,  0.1316,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2243,  0.1369,  0.1579, -0.1023, -0.0224,  0.0034, -0.0799, -0.0252,
        -0.1910, -0.0265, -0.1806, -0.0422,  0.0277, -0.0622, -0.0995, -0.2908,
        -0.0782, -0.0638, -0.0995, -0.0984, -0.2125,  0.0708,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3889,  0.0831,  0.0056,  0.0167, -0.0051,  0.0897,  0.0190, -0.0315,
        -0.1649,  0.1066,  0.1380, -0.0077,  0.1049,  0.1652, -0.1954,  0.0453,
         0.0793,  0.0320,  0.0761, -0.0255,  0.2188,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3842, -0.4593, -0.0824,  0.1334, -0.0677, -0.0951, -0.0336, -0.2509,
        -0.0985, -0.0462,  0.0461, -0.0137,  0.0397, -0.1202,  0.0657, -0.1059,
         0.0853, -0.0058,  0.0389,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0805, -0.0448, -0.1672,  0.0505, -0.0939, -0.1343,  0.0654,  0.0151,
        -0.0086,  0.0268,  0.0235, -0.0228,  0.0014, -0.0476, -0.0037,  0.0135,
        -0.0192, -0.0790, -0.0251, -0.0197,  0.0129, -0.0916,  0.0190, -0.0119,
         0.1076,  0.0715,  0.0230, -0.1099, -0.0009, -0.1282,  0.0174,  0.0294,
        -0.0785, -0.1730, -0.0683, -0.0015,  0.0260,  0.0523,  0.0348, -0.0330,
         0.0083, -0.0189, -0.0170,  0.0106, -0.0056, -0.1125, -0.0035],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1614, -0.3424, -0.1726, -0.0120,  0.0080, -0.0523, -0.0401,  0.0168,
        -0.0768,  0.1028,  0.0100, -0.0394, -0.0330,  0.0455, -0.0805, -0.0586,
        -0.0588, -0.0439, -0.0884,  0.0554, -0.0476, -0.0552, -0.0720, -0.4563,
         0.0100,  0.0272, -0.1542,  0.0748, -0.0512, -0.1223,  0.0521, -0.0017,
         0.0659, -0.0020,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0123, -0.8517, -0.0332,  0.0168, -0.0833, -0.0520, -0.0421, -0.0182,
        -0.0489, -0.0297,  0.0711,  0.0138, -0.0841, -0.1708,  0.1288,  0.0217,
        -0.0079, -0.0071, -0.2011, -0.0776,  0.0318, -0.1287, -0.0376, -0.0871,
        -0.0128, -0.0372, -0.0388, -0.0599,  0.0267, -0.1015, -0.0650, -0.0852,
        -0.0366,  0.0052,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.0404, -0.7412, -0.5038, -0.0116, -0.1227, -0.0728,  0.1532,  0.2432,
        -0.1251, -0.1136, -0.0553,  0.0611, -0.0527, -0.0780, -0.0442, -0.2400,
        -0.0802,  0.0428,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2647, -0.4153, -0.1391,  0.0894, -0.0449,  0.0520, -0.0463, -0.0766,
         0.0126, -0.4052, -0.0552,  0.0042, -0.0836, -0.1140, -0.0797,  0.0049,
        -0.0225,  0.1219,  0.0697, -0.3886,  0.0994,  0.0170, -0.1029, -0.0404,
        -0.0694,  0.2047, -0.2143,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0450,  0.0486, -0.0058,  0.0703, -0.0169, -0.3331,  0.1642,  0.1787,
        -0.4359,  0.1001,  0.1625,  0.1603, -0.1827,  0.0456, -0.0199, -0.0055,
         0.0035, -0.0113, -0.0996, -0.3262, -0.0130, -0.0177,  0.0176,  0.1366,
        -0.0241, -0.1282,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1950,  0.4942,  0.0480,  0.0881,  0.2845, -0.0140,  0.0053, -0.0351,
         0.1102,  0.1091,  0.1460, -0.0340,  0.0586,  0.1634, -0.0251,  0.0499,
         0.0281, -0.0230,  0.0794, -0.3699, -0.0659,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1782, -0.3616, -0.2939, -0.2273, -0.0815, -0.2439,  0.1215, -0.1584,
        -0.0678,  0.1330, -0.0751, -0.1278, -0.0514, -0.2333,  0.1935,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4671, -0.0425, -0.1778,  0.2745, -0.1806,  0.1753, -0.1455, -0.1510,
         0.0198, -0.0415, -0.0341,  0.5150,  0.2878,  0.1043, -0.1975, -0.0354,
        -0.2131, -0.2184,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3784, -0.4898, -0.1840, -0.2372, -0.1163, -0.0372,  0.0838, -0.1246,
         0.0257, -0.0964, -0.1229, -0.0617,  0.0661, -0.1954, -0.0391, -0.0827,
         0.0796,  0.0225, -0.1447,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0768e-01, -1.2097e-01, -6.8256e-02,  3.5251e-03, -2.7034e-02,
        -6.4225e-02,  1.3384e-01, -1.6811e-02, -3.6683e-02, -3.0684e-02,
        -7.5684e-03,  5.5993e-03, -2.5832e-02, -1.4554e-02,  5.3155e-02,
        -3.2914e-02, -8.4972e-02, -2.8604e-02, -2.6151e-02,  9.5904e-02,
        -1.9867e-01,  1.0444e-01, -1.0969e-01, -6.3032e-02,  8.7463e-03,
        -2.3024e-02, -7.8678e-02,  3.8381e-02, -1.3204e-01, -7.2881e-02,
         4.0489e-05, -9.3747e-02, -8.4582e-02, -5.7439e-02, -2.6867e-02,
         1.2383e-02, -6.2010e-02,  1.9358e-02, -1.1079e-02,  1.2860e-01,
         3.6037e-02,  1.2431e-01, -9.6402e-03, -1.7829e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4768,  0.1234, -0.0299, -0.0130,  0.0048, -0.0651, -0.1057, -0.0081,
        -0.1141,  0.1079, -0.0503, -0.0171, -0.0140, -0.0321,  0.0177, -0.0803,
        -0.1279, -0.0803,  0.0222, -0.0196, -0.0284, -0.0295,  0.0413,  0.0062,
        -0.0018, -0.0143,  0.0092, -0.0776,  0.0921, -0.0542, -0.1057,  0.0756,
        -0.0360, -0.0407, -0.1117,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2049, -0.9052, -0.0463, -0.0083, -0.1815, -0.3931, -0.1489,  0.0130,
        -0.0514,  0.0395, -0.0126, -0.0418, -0.3969, -0.0525, -0.2152, -0.0763,
        -0.2103, -0.1144, -0.2736,  0.1317, -0.0603,  0.5079,  0.9102,  0.0707,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1463, -0.2280, -0.0834, -0.0623, -0.0387, -0.0768,  0.0457,  0.0032,
        -0.0325, -0.0227, -0.0043,  0.0428,  0.0270, -0.0854, -0.1754,  0.0537,
         0.0393,  0.0010, -0.1558, -0.0110,  0.0699, -0.0585, -0.0076, -0.0228,
        -0.0430, -0.0906, -0.0614, -0.0531,  0.0042,  0.1085, -0.0903,  0.0048,
        -0.1047,  0.0452,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5408,  0.3006, -0.0615,  0.0271,  0.0601,  0.1171,  0.2486,  0.0190,
         0.1262, -0.0179,  0.0905,  0.0468,  0.0792,  0.0819,  0.1032,  0.5295,
         0.0268, -0.0810,  0.1621,  0.0305, -0.0209,  0.0024, -0.0687,  0.0716,
         0.0564, -0.0130,  0.0095, -0.0457, -0.1135, -0.1204,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.1643, -0.7134, -0.2691, -0.4254, -0.0920, -0.1543, -0.1166, -0.0534,
         0.0124, -0.1569, -0.1876, -0.2629, -0.0535,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1159,  0.0632,  0.1515, -0.0351, -0.0795, -0.1275,  0.0735, -0.0850,
        -0.3500,  0.0799, -0.1497, -0.1661, -0.0287, -0.1877, -0.2594, -0.2313,
        -0.0443,  0.1806,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3376, -0.1924, -0.1116, -0.3953, -0.3554,  0.0131, -0.0954, -0.3134,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0485, -0.3118, -0.0786,  0.0347, -0.0170, -0.0473,  0.0228,  0.0323,
         0.1073, -0.0122,  0.0225, -0.0012, -0.0027, -0.0633, -0.0016,  0.0004,
         0.0005,  0.0267, -0.0275,  0.0108,  0.0375, -0.0213, -0.0448, -0.0241,
         0.0111, -0.0080,  0.0230,  0.0975, -0.3015, -0.0602,  0.1103, -0.1458,
        -0.0987, -0.2606, -0.0594, -0.1502, -0.0746, -0.0272, -0.0739, -0.0120,
        -0.0459, -0.0890,  0.0125,  0.0112, -0.0228,  0.1819,  0.1368],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2431, -0.3548, -0.2717, -0.4237, -0.1769, -0.5822,  0.0429, -0.1365,
        -0.6052,  0.1778,  0.2041,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1143, -0.4082, -0.2343, -0.0613, -0.0154, -0.0509, -0.0827, -0.3586,
        -0.3471, -0.1573, -0.1322, -0.2430, -0.3377, -0.2997,  0.1983, -0.0442,
         0.1351,  0.0818,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2502, -2.1629,  1.0420, -1.8510, -0.1000,  1.1644, -0.0782, -0.5064,
        -0.1058, -0.1180, -0.2358,  0.5140, -0.8163, -1.1023, -3.2348,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1203,  0.2441,  0.0180, -0.0113, -0.0181,  0.0199,  0.4815,  0.0174,
         0.0972,  0.0343,  0.3045, -0.0100,  0.1490, -0.1113,  0.1634,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2587, -0.0869,  0.1227, -0.0997, -0.0774,  0.0514,  0.0643, -0.0110,
        -0.1436, -0.1601, -0.0590, -0.0728, -0.2006,  0.0451, -0.0273,  0.0761,
        -0.1910, -0.0424,  0.0441,  0.1146, -0.1026, -0.2290, -0.0193,  0.0264,
        -0.0311, -0.1415,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3527, -0.3277, -0.0898, -0.2351, -0.0147, -0.1214, -0.2091, -0.0410,
        -0.6051, -0.0457, -0.1023,  0.1157, -0.0230,  0.2002,  0.0586,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6627,  0.5831,  0.0914,  0.0532,  0.2814, -0.0524,  0.1701, -0.1066,
         0.0466,  0.8269, -0.0142,  0.0535,  0.1046,  0.0087,  0.1002,  0.0326,
         0.0464,  0.0133,  0.1133,  0.0337,  0.0016, -0.0860, -0.0066,  0.0478,
         0.2874, -0.4205,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3047,  0.1040, -0.0058,  0.0052,  0.0453, -0.0708,  0.0681, -0.0715,
         0.0058, -0.0669, -0.2169,  0.0565,  0.0063, -0.1375, -0.1722, -0.0682,
         0.1903, -0.1340,  0.0319, -0.1084,  0.3440,  0.0883,  0.1130,  0.0265,
         0.0950, -0.4660,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.1142, -0.5322, -0.0867,  0.0028, -0.0693, -0.0070, -0.0165, -0.0672,
        -0.0459, -0.0644, -0.0132, -0.0391, -0.1859, -0.0219, -0.0647,  0.0561,
        -0.0466,  0.0074, -0.0588, -0.0174,  0.0040, -0.0205,  0.0038, -0.0992,
        -0.1361,  0.0529, -0.0690,  0.0285, -0.0379, -0.0153,  0.0522, -0.0636,
        -0.0803, -0.0336, -0.0927, -0.0460,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2413, -0.2853, -0.1226, -0.0990, -0.0837, -0.0357, -0.0799, -0.2250,
         0.0386,  0.1751, -0.0141,  0.0511,  0.0366,  0.1127, -0.0318, -0.0257,
        -0.0535, -0.0187,  0.0416, -0.0232,  0.0422, -0.0821, -0.0301, -0.1052,
        -0.0921, -0.2718,  0.0398, -0.0112, -0.1749, -0.0198, -0.0683, -0.0739,
        -0.0357,  0.0971,  0.1877,  0.0688,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2036,  0.5966,  0.1998,  0.1065, -0.3799, -0.2152, -0.0009,  0.0079,
        -0.0711, -0.0395,  0.0533,  0.0745,  0.1298, -0.0768, -0.1381,  0.0532,
        -0.0482,  0.1354,  0.0234, -0.2116,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1320, -0.4661, -0.0784,  0.0341, -0.0227, -0.0067, -0.0476, -0.0565,
        -0.0053,  0.0136, -0.0100, -0.0968, -0.0150, -0.0080, -0.0999, -0.0541,
        -0.0021, -0.0443,  0.0350, -0.0044, -0.1217,  0.0353, -0.0272, -0.0377,
        -0.0814,  0.0058, -0.0437, -0.0629,  0.0603, -0.0996, -0.0508, -0.0517,
        -0.2344, -0.0694,  0.0018, -0.0423, -0.0032, -0.0506,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2863, -0.2455, -0.0781, -0.2821, -0.0509, -0.0668, -0.0890, -0.0287,
        -0.0468, -0.0945, -0.0901, -0.3964,  0.0135, -0.0077,  0.1006, -0.0454,
        -0.0028,  0.0127, -0.0449, -0.0366,  0.0338,  0.0079,  0.0684,  0.0477,
        -0.0289,  0.0061, -0.0081, -0.0484, -0.0681, -0.0317, -0.0472, -0.0027,
         0.0425,  0.0332, -0.0425,  0.0429, -0.0197,  0.0894, -0.0163, -0.0678,
         0.0370], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0526e-01, -1.5195e+00, -1.5134e-01, -1.0969e-01, -6.5411e-02,
        -1.1206e-01, -1.1791e-01, -1.3380e-01, -8.7301e-02,  5.6519e-02,
         8.7688e-03, -6.7201e-02, -6.6830e-02, -2.0304e-01,  4.9625e-02,
        -1.0063e-01,  4.0939e-03, -6.9402e-02,  3.3535e-04,  8.9667e-03,
        -1.0380e-01, -2.8624e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2323, -0.0689,  0.0048,  0.0282,  0.0216, -0.0917, -0.1166,  0.0339,
        -0.1332,  0.0228,  0.1259, -0.1334, -0.1300, -0.0928, -0.0217, -0.1179,
        -0.0763, -0.0028,  0.0246, -0.0777, -0.2135, -0.0304, -0.0080, -0.0412,
        -0.0553,  0.0175, -0.0255, -0.0842, -0.3608, -0.0083, -0.0486,  0.0054,
        -0.0773, -0.0004, -0.0292,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5655, -0.1982, -0.1681, -0.0227, -0.2749,  0.0364, -0.0430,  0.0268,
        -0.0813, -0.0266, -0.1838, -0.0242, -0.0102, -0.0155, -0.0135, -0.0474,
         0.1092,  0.0722, -0.1645, -0.0111, -0.0975, -0.0835, -0.0649, -0.0507,
        -0.0226,  0.0203, -0.1764, -0.0591,  0.0436,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2355, -0.5044, -0.1137, -0.0286,  0.0134, -0.0789,  0.0075, -0.0765,
        -0.0713, -0.0378, -0.0255, -0.1733, -0.0406, -0.1650, -0.1840, -0.1912,
        -0.1103, -0.0342, -0.0243,  0.0702, -0.1308, -0.1022, -0.0868,  0.0678,
        -0.3040,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4781, -0.0145,  0.0384, -0.0521, -0.4523, -0.1094, -0.0928, -0.2117,
        -0.1049, -0.0796,  0.0025,  0.0066,  0.0723,  0.0215, -0.0640, -0.3182,
        -0.0858, -0.0359, -0.0277, -0.0328, -0.0528,  0.0697,  0.0325,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2456,  0.1386,  0.0151,  0.0361,  0.0681,  0.1168, -0.1017, -0.0961,
        -0.0742, -0.0122,  0.0494, -0.1023, -0.0416, -0.2079, -0.0219, -0.0896,
        -0.0287, -0.0254, -0.0796,  0.0713,  0.0604, -0.0324,  0.0289, -0.0993,
         0.0748, -0.0275, -0.2816, -0.0824,  0.0332, -0.0375, -0.1113, -0.0142,
        -0.0115, -0.0809,  0.0387,  0.0057,  0.0094, -0.0489,  0.0292,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1255, -0.2860,  0.0769, -0.2893, -0.4974, -0.1150,  0.0363, -0.2801,
        -0.0361, -0.0834, -0.3108, -0.1192,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.1810e-01, -1.0338e-01, -4.4677e-01,  1.7960e-02, -3.4131e-02,
        -1.9307e-01, -8.9307e-02, -1.7191e-01, -3.3689e-03, -5.2245e-02,
        -2.7540e-02,  7.0365e-02, -9.5854e-03,  6.8571e-03, -5.6957e-02,
         2.8016e-02, -3.8818e-02, -9.6505e-02, -1.9623e-02, -1.0437e-01,
        -1.1081e-02, -6.1711e-02,  1.0927e-02,  1.3980e-02, -3.6515e-02,
         3.0964e-02, -7.1796e-02,  7.5191e-05,  2.2639e-02,  1.2810e-01,
         4.5009e-02, -4.3305e-02, -2.3829e-02, -1.8550e-02, -3.9162e-02,
        -5.9051e-02, -4.8757e-02,  9.9176e-03, -4.7229e-02, -4.8508e-04,
         1.1082e-02,  1.5761e-02,  7.1505e-02, -2.0528e-02, -1.9250e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3192, -0.9963, -0.1210, -0.2667,  0.0466,  0.2318,  0.0563,  0.1093,
        -0.4773,  0.0965, -0.0720, -0.0683,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6988, -0.9840,  5.7692, -0.1844, -0.9915, -1.8295, -0.9676, -0.0515,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8459e-01, -8.1663e-01, -4.4326e-02, -2.7908e-02, -2.3482e-02,
         3.7145e-02,  4.2748e-02, -3.3010e-02, -2.0132e-02,  1.3622e-01,
        -6.2905e-03, -2.7557e-03, -2.2006e-02,  1.6896e-02,  8.0507e-03,
         6.8187e-02, -2.0291e-03, -1.4516e-02, -1.3262e-01,  4.8633e-04,
        -7.2559e-03,  1.4696e-02, -3.6114e-02, -5.8519e-02, -1.4018e-02,
        -9.5285e-03, -1.7959e-02, -3.0516e-02, -6.6861e-02,  9.6051e-03,
        -4.4712e-02, -6.3793e-02, -1.5613e-02,  1.6641e-02,  3.6059e-02,
        -1.4481e-02, -4.2169e-02, -1.1680e-02,  1.2148e-02, -7.4959e-03,
        -4.9149e-03, -6.6936e-03, -2.1410e-02,  4.4406e-04, -1.8525e-01,
         2.3594e-02, -4.3733e-01, -5.8923e-02, -9.2362e-02, -3.0478e-01,
        -3.6495e-02,  1.8908e-01, -4.2056e-02,  5.7107e-03, -4.5071e-02,
        -2.4223e-02, -9.0974e-02,  4.8037e-03, -4.7265e-02, -2.2333e-02,
        -3.9979e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1765, -0.1819, -0.0167, -0.1028, -0.1838, -0.0239, -0.1202, -0.0378,
         0.0574, -0.1108, -0.0882, -0.0319, -0.0285, -0.0238, -0.0447, -0.0237,
         0.1277, -0.0307, -0.0384,  0.0716, -0.0886, -0.0158, -0.1284, -0.0749,
        -0.0559, -0.0945, -0.0228, -0.0098, -0.0281, -0.1149, -0.0845, -0.1125,
        -0.2387,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7246,  0.3733,  0.2769,  0.2224,  0.1270,  0.1612,  0.1336,  0.0188,
         0.1130,  0.1161, -0.0807,  0.0814,  0.0983,  0.1124,  0.2120, -0.4207,
         0.0351,  0.2995,  1.3119, -0.0545, -0.0975,  0.3348, -0.1744,  1.3930,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5615, -0.2683, -0.2285, -0.0632, -0.0487, -0.3153, -0.0026, -0.0590,
         0.0717, -0.2380, -0.1437, -0.1217, -0.3751, -0.0331,  0.0825, -0.0826,
         0.0321, -0.0037,  0.0300,  0.0147, -0.0940,  0.1154,  0.3416,  0.1540,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1734, -0.1970, -0.1201, -0.1149,  0.0479,  0.0205,  0.0592, -0.0460,
        -0.0016, -0.0383, -0.0370,  0.0537,  0.0140, -0.0103, -0.0095, -0.0919,
        -0.0577, -0.0285,  0.0050, -0.1367, -0.0188,  0.0232,  0.1398, -0.1028,
        -0.0154, -0.0388, -0.0230, -0.0205,  0.0261, -0.0118, -0.0911, -0.0098,
         0.2832, -0.0533,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0808, -0.0943, -0.0646,  0.0818, -0.1526,  0.0466, -0.0020,  0.0883,
         0.0060, -0.0557,  0.0379, -0.1056,  0.0431, -0.5003, -0.0421, -0.2560,
        -0.1074, -0.0982, -0.0032, -0.0250, -0.0357, -0.2320, -0.1345, -0.0066,
        -0.0544,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2348, -0.0460, -0.2182, -0.0542, -0.0011, -0.0646, -0.1209, -0.0760,
        -0.0667, -0.7855,  0.2443, -0.0464, -0.1094,  0.0886,  0.2130, -0.1203,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0136,  0.2802,  0.0776,  0.1438,  0.0909,  0.2321,  0.9361,  0.1126,
         0.0247,  0.0806,  0.0062,  0.0123,  0.1998,  0.0639,  0.1239,  0.0130,
         0.1329,  0.1114,  0.1218,  0.0586,  0.0360, -0.3405,  0.0546,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1095, -0.1164,  0.0705,  0.2265,  0.3887,  0.3483, -0.1546, -0.0575,
        -0.0934, -0.0456,  0.0480,  0.0820,  0.0607, -0.2343,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.4393, -0.2447, -0.0513, -0.4237, -0.1396,  0.3774, -0.1262,  0.1383,
        -0.1391, -0.0780, -0.1324, -0.1486, -0.0740, -0.0185, -0.0107,  0.0890,
         0.0590, -0.4725, -0.0194, -0.0234, -0.0911, -0.0573, -0.1083,  0.2766,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3154, -0.5633, -0.0054, -0.0393, -0.0969, -0.1497, -0.6563,  0.0579,
        -0.0947,  0.0462, -0.4104,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3482, -0.0983, -0.0358, -0.1406,  0.0022, -0.3096, -0.3925, -0.0675,
        -0.2028, -0.1137, -0.0704, -0.0406, -0.0899,  0.0030, -0.1099, -0.1060,
         0.0053, -0.2893, -0.1415,  0.0393,  0.0172,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4697, -0.3875, -0.0404,  0.1605, -0.0465, -0.0981, -0.2813, -0.3708,
         0.2356, -0.2700,  0.0704, -0.2781, -0.0743, -0.3679,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6925, -0.1411,  0.0351,  0.1508,  0.0374,  0.0068,  0.0604,  0.1306,
         0.0309, -0.0923, -0.2946, -0.0532, -0.3162, -0.0354, -0.0855, -0.0407,
         0.1423, -0.0127, -0.0080, -0.1337, -0.3008,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1524, -0.2899, -0.1632, -0.2182,  0.0736, -0.1250, -0.0493, -0.4450,
        -0.1800, -0.0767, -0.1983, -0.0134, -0.1607,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0669, -0.0962,  0.0209, -0.0048,  0.0045,  0.0342, -0.0317, -0.0429,
        -0.0899,  0.0070, -0.0726,  0.0200,  0.0417,  0.0303, -0.0370, -0.0252,
        -0.0106,  0.0199,  0.0208,  0.0733, -0.0934, -0.0468,  0.0040,  0.1490,
        -0.4238,  0.0602, -0.1033, -0.1733, -0.0990, -0.0564, -0.0772,  0.0957,
         0.0234,  0.0495,  0.0485, -0.0367, -0.0525, -0.0096,  0.0323,  0.0070,
        -0.0301,  0.0380, -0.0218,  0.1471,  0.0849], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0925, -0.3627, -0.1202, -0.0831,  0.0051,  0.0311, -0.0576, -0.0295,
        -0.0590, -0.1169, -0.2271,  0.0349,  0.0166,  0.0197, -0.0691, -0.0094,
        -0.3593, -0.0015,  0.0841,  0.0095, -0.0820, -0.0777, -0.0758, -0.1553,
        -0.0411, -0.1191, -0.0178, -0.1440, -0.1281, -0.0171, -0.0460, -0.0699,
        -0.0151, -0.1480,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2199, -0.7279, -0.1804, -0.0540, -0.1018, -0.0630, -0.1017, -0.0680,
         0.1967, -0.1177, -0.0417, -0.0325, -0.0206, -0.0038,  0.0174,  0.0925,
         0.0225,  0.1088, -0.0190, -0.3116, -0.7461, -0.0730, -0.1074,  0.1774,
        -0.0549,  0.3967,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1945, -0.8075,  0.1451, -0.1522,  0.0795, -0.2478,  0.2715, -0.0482,
         0.0174, -0.0908, -0.0179, -0.0212,  0.0499,  0.0478,  0.0636, -0.0681,
        -0.2843,  0.1042,  0.0789,  0.0901, -0.0475,  0.4751,  0.0193,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0630, -0.0623,  0.0163, -0.1197,  0.0172, -0.0499, -0.0597, -0.0677,
        -0.1277, -0.0453,  0.0452, -0.0061,  0.0461, -0.0784, -0.4023,  0.2069,
         0.0032, -0.1401, -0.0897, -0.0778,  0.0822, -0.0317, -0.0341,  0.0242,
         0.0155, -0.0071, -0.0781, -0.2636,  0.0689,  0.0453,  0.0422, -0.0119,
         0.0071, -0.0633,  0.1312,  0.0594, -0.0186,  0.0594, -0.0526,  0.0008,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0113, -0.1994, -0.0463, -0.0354, -0.1668, -0.0466, -0.1664, -0.1908,
        -0.0498, -0.0768, -0.0191,  0.0059, -0.0562, -0.0610, -0.0530, -0.0635,
        -0.0683,  0.0796,  0.0608, -0.1553, -0.0974, -0.0479,  0.0069,  0.0379,
        -0.0168, -0.1117,  0.0193, -0.0685, -0.0375, -0.0946, -0.1257,  0.0129,
         0.1065, -0.4231,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.1485, -0.1083, -0.0485,  0.0366, -0.0090, -0.0138, -0.0535,  0.0157,
         0.0068,  0.0837, -0.0179, -0.0311, -0.0430,  0.0574, -0.2986, -0.0367,
         0.0378, -0.0249, -0.1126, -0.0166, -0.0451,  0.0037,  0.0340, -0.0075,
        -0.1208, -0.2572, -0.2473, -0.0101, -0.0160, -0.0564,  0.0984, -0.0212,
         0.0777, -0.0378, -0.0462,  0.0015, -0.0439, -0.0759, -0.0343, -0.0658,
        -0.0371, -0.0261, -0.0097, -0.0095, -0.0176, -0.0112,  0.0010,  0.0157,
        -0.0304, -0.0488,  0.0663, -0.0132], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2086, -0.0907, -0.0445,  0.0708,  0.2517,  0.0120,  0.1645, -0.2099,
         0.0609,  0.1180,  0.0790, -0.0581,  0.0072,  0.1956,  0.1870,  0.1291,
         0.2102, -0.1612,  0.1195, -0.0262,  0.0878, -0.0074,  0.0914,  0.2229,
        -0.0070,  0.0007,  0.0539, -0.0153,  0.0710,  0.2394,  0.1666,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2118,  0.1000,  0.0468, -0.0203,  0.0306,  0.0657,  0.0319, -0.0955,
         0.0576, -0.0584,  0.0787,  0.1235,  0.1648, -0.0634,  0.0100,  0.0966,
        -0.3106,  0.0618,  0.3714,  0.3396,  0.1053,  0.1154,  0.1748,  0.0081,
        -0.1918,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4063, -0.7645,  0.0156,  0.3219,  0.0630,  0.2850,  0.8756, -0.2914,
         0.1601,  0.2650, -0.4555,  0.1461,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4235,  1.5713,  0.0472,  0.1687,  0.2475,  0.0887, -0.0357, -0.0223,
        -0.0130,  0.3541,  0.1014,  0.0102,  0.0776,  0.4289,  0.2317,  0.2808,
         0.1371,  0.1567, -0.0293, -0.0584,  0.0647,  0.1331,  0.1012, -0.0948,
         0.0926,  0.3067,  0.1960, -0.1263, -0.0444,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0264, -0.5975,  0.0263, -0.2760,  0.0877, -0.0910, -0.3038,  0.0195,
        -0.0122,  0.0223,  0.0508, -0.0948,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3835e-02, -7.5850e-01, -2.9376e-02, -1.9801e-01, -2.8829e-02,
        -7.9218e-02, -3.4971e-01, -2.2649e-01, -1.0870e-01, -3.5858e-02,
         2.4223e-04, -9.9491e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3971, -0.1461, -0.3407, -0.1212, -0.1466,  0.0868, -0.1066, -0.1728,
        -0.1950, -0.0357, -0.0903, -0.2828, -0.0113,  0.1214, -0.0436, -0.1756,
        -0.0740, -0.0717,  0.2053,  0.0909,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2968,  0.6630,  0.0582,  0.0310,  0.6506, -0.0098,  0.2022, -0.1514,
         0.0467, -0.0029, -0.0740,  0.0031,  0.0616, -0.0038, -0.0673,  0.0081,
         0.0339,  0.0095,  0.0067,  0.0946,  0.0849, -0.0386,  0.0356, -0.1068,
        -0.0211, -0.0835,  0.0433,  0.0983,  0.0054, -0.0266,  0.0099, -0.0118,
         0.0607,  0.3087,  0.0778, -0.1714, -0.0258,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 16.1402,  15.1851,  95.0671,  10.6649,  28.4706,   3.7100,  30.6812,
         32.8193, -85.2646,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5893, -0.0890, -0.0010, -0.2523, -0.1144, -0.3552, -0.2931, -0.0827,
        -0.0291, -0.0411, -0.0087, -0.2975, -0.2889, -0.1848,  0.0095,  0.1596,
        -0.0699,  0.2032, -0.2048, -0.2236, -0.1587,  0.0045, -0.0032,  0.0290,
        -0.0888,  0.0554, -0.0314,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1469,  0.3631,  0.1726,  0.0926,  0.0651,  0.0845,  0.0382,  0.0341,
        -0.0490,  0.2911, -0.1838,  0.0756, -0.0174,  0.0089,  0.0095,  0.0591,
         0.0742, -0.0249,  0.0647, -0.1074, -0.3555,  0.0963,  0.1005,  0.0762,
         0.0664,  0.1909,  0.0458,  0.0726,  0.6103, -0.0227,  0.0317,  0.0764,
        -0.0646, -0.0606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 4.0720e-01,  1.7762e-03, -4.1679e-02,  5.3947e-02, -5.1636e-02,
        -3.2473e-03,  1.7616e-02,  4.4024e-02, -1.0418e-04,  2.3594e-03,
        -1.7919e-02, -2.7556e-01, -1.0452e-01, -4.8716e-03, -7.8686e-02,
        -2.6895e-02, -1.0346e-01,  1.5179e-01, -6.3840e-02,  1.4284e-03,
         5.4574e-02,  4.9388e-02, -6.6508e-02,  9.0927e-03, -6.4751e-02,
        -4.5258e-02, -7.6073e-02, -1.0948e-01, -1.2840e-04, -2.8827e-03,
        -7.8487e-02, -1.6856e-01, -1.6810e-01,  1.3400e-02,  1.2992e-01,
        -6.7723e-02, -1.1841e-01, -3.2873e-02,  2.9232e-02,  1.5147e-02,
        -4.3641e-02, -2.0736e-01,  3.6873e-02, -1.0534e-01,  1.7546e-04,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1556,  0.0243,  0.0771, -0.0208,  0.2563,  0.1918,  0.2249,  0.0201,
         0.1324,  0.1899,  0.0604,  0.0097,  0.1449, -0.0903,  0.0121,  0.1396,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4849,  0.1351, -0.1363, -0.2100, -0.0322, -0.1133, -0.3163,  0.0662,
        -0.1462, -0.1409, -0.0632, -0.1430, -0.1599,  0.0502, -0.0395, -0.0098,
        -0.0230,  0.0520, -0.0272, -0.0090, -0.0076, -0.0468, -0.0896, -0.1773,
         0.0645,  0.1135,  0.1153,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4134e-01,  2.4649e-03, -6.9416e-02, -5.8459e-06, -1.6297e-02,
         2.0405e-02, -3.1697e-02, -1.0368e-02, -4.8834e-02, -1.4670e-03,
        -3.3441e-02, -5.8995e-02, -5.4831e-02, -6.2834e-03,  4.8348e-03,
         1.6606e-02, -2.9225e-02,  5.5420e-03,  7.7368e-02, -8.1707e-02,
         4.7733e-02, -3.6328e-02, -6.6742e-03, -2.9496e-01, -2.1827e-02,
         4.8803e-03, -7.3048e-03,  1.7341e-02,  1.4835e-03, -4.2061e-02,
         1.4066e-02, -1.4680e-02, -3.0112e-02,  6.7518e-03,  3.9679e-03,
         4.6279e-03, -9.6291e-03, -1.8248e-01, -4.3192e-02,  1.6304e-02,
        -3.4120e-02, -1.2408e-01, -1.8020e-02, -3.3693e-02,  1.1729e-01,
         1.0973e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1645,  0.1319, -0.0959, -0.1465, -0.0569, -0.2339, -0.1166, -0.0460,
         0.0013,  0.0297, -0.1759, -0.1546, -0.0762, -0.1007, -0.2288, -0.0469,
        -0.1771, -0.0686, -0.0517, -0.2147, -0.1105,  0.0811,  0.0264, -0.0424,
        -0.1022, -0.0196, -0.1259,  0.0066, -0.0774,  0.2907,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2979,  0.5630,  0.4360,  0.2581,  0.0437,  0.1732, -0.0323,  0.3714,
         0.2825, -0.2095,  0.2270,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1833, -0.5354, -0.1150,  0.0823, -0.1166, -0.1244, -0.1148, -0.4070,
        -0.0058, -0.1940, -0.0287,  0.0142, -0.0645,  0.0061,  0.1141, -0.1130,
        -0.0072, -0.0728, -0.0385, -0.2054,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0192, -0.5027, -0.1201, -0.2413,  0.1400,  0.0049, -0.6451, -0.0091,
         0.1122,  0.2398,  0.1509,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0604, -0.5581, -0.1333, -0.2809, -0.0884,  0.0362, -0.0424, -0.1173,
        -0.1388, -0.0665, -0.0952, -0.0147, -0.0789,  0.0563, -0.0829, -0.1625,
         0.1703,  0.0347,  0.0355, -0.1625,  0.1994,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2116, -0.1765, -0.0811,  0.0742, -0.0046, -0.9426,  0.0097, -0.0276,
         0.0083,  0.0268, -0.0568,  0.0179,  0.0964,  0.0461,  0.0583,  0.1684,
         0.1833, -0.0735,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0872e-02, -1.1339e-01, -5.3257e-01,  3.8159e-02, -3.4751e-02,
        -4.5329e-02, -4.0270e-02, -3.6345e-02, -2.7652e-02, -2.9157e-04,
        -3.4593e-02, -2.8915e-03, -1.4598e-02, -7.7857e-02, -1.2089e-01,
        -2.7285e-02, -6.4453e-02, -7.2208e-02, -6.2416e-02,  7.4154e-02,
         1.7682e-02, -7.5728e-02, -1.7991e-01, -2.9940e-02, -5.7973e-02,
         1.6273e-02, -6.2623e-02, -6.9494e-02, -4.7478e-02, -1.1236e-01,
         2.5046e-02, -6.0682e-02, -5.4640e-02,  4.8086e-02, -1.8224e-02,
        -1.7740e-03, -3.7909e-02,  1.1201e-02, -2.0532e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1759e-01,  1.4649e+00, -3.4600e-01, -2.5546e-01, -1.6612e-01,
         1.5880e-02, -4.5047e-02,  1.0894e-01, -2.0302e-01, -7.7272e-02,
        -7.5061e-02,  4.8769e-02, -8.5666e-02, -1.2579e-02, -1.7564e-01,
        -1.0759e-02, -2.7848e-02, -1.2818e-02, -7.3337e-02,  1.4342e-02,
        -1.4363e-02,  1.4919e-02,  2.0359e-03,  6.7507e-02, -6.2146e-02,
        -3.9996e-02,  2.4704e-02, -4.6741e-02,  1.5452e-01,  4.9258e-01,
         8.4322e-02,  1.2924e-03, -5.7372e-01,  2.8057e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.2504, -0.7250, -0.7036, -0.5137, -0.4759, -0.6422, -0.2696,  0.0333,
        -0.2660,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0528, -0.2274, -0.1227, -0.0227, -0.0722, -0.0140, -0.0235,  0.0246,
         0.0634, -0.0051, -0.1156, -0.0211,  0.0233, -0.0044,  0.0590, -0.0127,
         0.0614, -0.6359, -0.0764,  0.0728,  0.0126,  0.0288,  0.1357, -0.0060,
        -0.0553, -0.0237, -0.0384,  0.0847, -0.0272, -0.0082,  0.0138, -0.0956,
        -0.0059, -0.0652, -0.0353, -0.0200,  0.0231, -0.0272,  0.1314,  0.0708,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9795e-02, -1.2722e-01, -2.7161e-01, -6.0277e-02, -9.2347e-02,
         2.0207e-02, -4.0988e-02, -1.4832e-02, -2.3522e-02, -1.7411e-01,
        -1.8304e-02,  4.0788e-03, -5.8800e-03,  1.7242e-02, -5.7788e-04,
         5.0428e-02,  1.4958e-02, -6.5204e-02, -9.2793e-02, -3.6672e-02,
        -2.1029e-02, -7.0263e-02,  4.8583e-02, -5.0934e-02,  8.5298e-02,
        -1.4425e-02, -3.6495e-02, -1.0698e-01, -3.1054e-02, -1.9031e-01,
        -1.0122e-01, -8.3372e-02, -1.3086e-02, -5.1208e-02, -2.2690e-02,
        -8.7732e-03, -9.8874e-02, -1.3507e-01, -1.6338e-02, -1.2968e-03,
         3.6898e-02, -1.5808e-02, -3.9654e-02, -1.7928e-03,  1.8665e-02,
        -2.1456e-04,  1.3892e-03,  2.6270e-02,  2.6784e-02, -2.2913e-02,
        -6.0862e-02, -1.3859e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1599, -0.3661, -0.1303, -0.0304, -0.0886,  0.0209, -0.1011,  0.0360,
        -0.0209,  0.0601, -0.1827, -0.1996, -0.0437, -0.0081, -0.1130, -0.2123,
        -0.1054,  0.2102, -0.1227, -0.1043, -0.0537, -0.1370,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6557e-01, -1.1878e+00, -1.3757e-01, -1.1771e-01, -2.7887e-01,
        -6.8284e-02, -1.3313e-01,  2.0447e-02, -4.0959e-02, -2.8638e-01,
         2.8910e-02, -1.0987e-01,  1.9824e-02, -6.3769e-02, -1.0694e-03,
        -8.9475e-02, -1.4948e-01, -3.9996e-02,  8.9025e-02, -1.1658e-01,
         5.0224e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1474, -0.1262,  0.0218, -0.0145,  0.0781,  0.0075, -0.0808,  0.0073,
        -0.0393,  0.0128, -0.0144,  0.0092, -0.0033,  0.0166, -0.0081, -0.0150,
        -0.0312,  0.0196, -0.0222, -0.0171, -0.0100,  0.0326,  0.0115,  0.0315,
        -0.0269,  0.0358, -0.0418, -0.0512,  0.0071,  0.0135, -0.0435, -0.0424,
        -0.0221, -0.0258, -0.0070, -0.0291, -0.0872, -0.0339, -0.0189, -0.0415,
        -0.4160, -0.0009, -0.0803, -0.0729, -0.0587, -0.0167, -0.0833, -0.1214,
        -0.0045, -0.0065, -0.0675, -0.0580,  0.0089, -0.1307, -0.0693],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5036, -0.0925,  0.0586, -0.0272, -0.0028, -0.0902, -0.0285, -0.0009,
         0.0204,  0.0204,  0.1359,  0.0246,  0.0120, -0.0237,  0.0097,  0.0104,
         0.0188, -0.0575,  0.0401, -0.0083,  0.0386, -0.0524, -0.0244,  0.0078,
         0.0433,  0.0390, -0.0259, -0.3767, -0.0961, -0.0982, -0.0369, -0.0093,
        -0.1863, -0.2088, -0.0473, -0.0173,  0.0102, -0.0847, -0.0063, -0.0333,
        -0.0568,  0.0355,  0.0696, -0.0717, -0.0310,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3578,  0.5845,  0.2068,  0.3270,  0.0963,  0.0249,  0.0428, -0.0520,
         0.0901, -0.0098,  0.0030,  0.0953,  0.0765, -0.0008,  0.0797, -0.1261,
         0.0558,  0.3158, -0.0725,  0.0565,  0.4437, -0.0208, -0.1784,  0.0467,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2550, -0.1557, -0.0637, -0.0671, -0.0606,  0.0281,  0.0695, -0.0519,
        -0.0206, -0.0047, -0.0226,  0.0166, -0.0237, -0.0681, -0.1970, -0.0398,
        -0.1710, -0.1137, -0.0372, -0.0235,  0.1084, -0.0089, -0.0774, -0.1445,
         0.2195, -0.0590, -0.1387, -0.0040, -0.0248, -0.2673, -0.0248, -0.0499,
         0.0894,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0237, -0.8161, -0.1313, -0.0801, -0.0933,  0.0405, -0.2093,  0.0586,
        -0.3013, -0.0381,  0.0479,  0.0083,  0.2667, -0.1429, -0.0568, -0.0461,
        -0.0173, -0.0827, -0.0100,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1570,  0.1687,  0.2346,  0.2246,  0.1683,  0.0351,  0.2036,  0.0986,
         0.3617,  0.0018, -0.1372, -0.0135, -0.0278, -0.1088, -0.0044, -0.0322,
         0.1576,  0.1259,  0.0381, -0.0641,  0.0685,  0.3962,  0.1743, -0.1804,
        -0.1358,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7052,  0.2823,  0.1503,  0.2025,  0.0831,  0.1800,  0.9818, -0.0365,
        -0.0502,  0.1035,  0.0500,  0.1181,  0.0754,  0.1205,  0.1073, -0.0943,
         0.0011,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.3137,  0.0138, -0.1110, -0.0438,  0.0107, -0.0029,  0.0921, -0.2199,
         0.1157, -0.0787, -0.0250,  0.0981, -0.0130, -0.0689, -0.1311, -0.1525,
        -0.0701, -0.4012,  0.1147, -0.2464, -0.0928, -0.2032, -0.2298, -0.0281,
        -0.0055, -0.0383, -0.1118, -0.0932,  0.0035,  0.0149,  0.1533, -0.0484,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3772, -0.4041, -0.1816, -0.1199, -0.0950, -0.0269, -0.0497, -0.1250,
        -0.0442, -0.0140,  0.0668,  0.0509,  0.0649, -0.1068, -0.1030,  0.1204,
        -0.0421,  0.0341, -0.2064, -0.0632, -0.0818, -0.0073, -0.2323, -0.3744,
        -0.0300, -0.0346,  0.0182,  0.0121,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1511, -0.3753,  0.0491, -0.0769,  0.0398, -0.0546, -0.2311, -0.1300,
        -0.0423, -0.1033, -0.0575, -0.1844,  0.0174,  0.0436, -0.1116, -0.0529,
        -0.0371, -0.0962, -0.0295,  0.0269, -0.0181, -0.0435,  0.0396,  0.0193,
        -0.0256,  0.1208, -0.0235, -0.2794,  0.0936,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1809,  0.0173, -0.0661, -0.0649, -0.2073, -0.0586,  0.1044,  0.0094,
         0.0403,  0.0631, -0.0270, -0.0557, -0.1266, -0.1933, -0.0646, -0.0882,
        -0.0625, -0.1996, -0.0307, -0.0730,  0.0078,  0.0310, -0.0018, -0.0790,
        -0.0667,  0.0138, -0.0470, -0.0814, -0.0557,  0.0044,  0.0020,  0.1590,
        -0.0142,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0189, -0.3617, -0.0759, -0.1549, -0.0764, -0.1359, -0.0633, -0.0835,
        -0.0538, -0.0700, -0.0720, -0.0481, -0.0679, -0.0527,  0.0016,  0.0080,
        -0.0869, -0.0348, -0.2971, -0.1275, -0.3228,  0.1090,  0.1785, -0.0561,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2241, -0.3310, -0.1366, -0.0242,  0.0626, -0.2408, -0.1822, -0.1623,
        -0.0494, -0.0537, -0.0254, -0.0933, -0.0646, -0.0328,  0.0192, -0.0814,
         0.0185, -0.0638, -0.0104, -0.5731, -0.1486,  0.0070,  0.1640,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2277, -0.1880,  0.0123,  0.0804, -0.0144, -0.0433, -0.0545, -0.0318,
         0.0360, -0.0601,  0.1109, -0.0225, -0.0341,  0.0106, -0.0121,  0.0542,
        -0.1241, -0.1484, -0.0283, -0.0833,  0.0174, -0.0490, -0.0146, -0.0345,
        -0.1078, -0.0204, -0.0022, -0.0316, -0.0113, -0.0805, -0.4529, -0.0019,
        -0.0346,  0.0566, -0.0340, -0.0483, -0.0122,  0.1343,  0.0438,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1900, -0.5100, -0.0025, -0.0860, -0.4106, -0.0348,  0.0983,  0.0514,
        -0.0131,  0.1023, -0.0019, -0.0834,  0.0771,  0.0087,  0.1534, -0.0803,
         0.0374, -0.0195, -0.0244,  0.0664, -0.0862, -0.0664, -0.0673, -0.1100,
        -0.1403, -0.0897, -0.0580, -0.0355, -0.0400, -0.0916, -0.0320, -0.0257,
        -0.1824,  0.0212,  0.0139,  0.0105, -0.0856, -0.3164, -0.0637, -0.0517,
        -0.0839, -0.2213,  0.1045,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6110e-01, -6.1877e-01, -3.9273e-03, -4.0278e-02,  1.7767e-02,
        -4.6668e-02, -1.0676e-01, -4.2766e-02, -1.9825e-01,  7.7980e-02,
        -1.2685e-02,  8.3219e-03, -3.7785e-02,  8.0641e-03,  1.0996e-02,
         5.1968e-02,  8.6390e-03, -1.2932e-01,  1.3977e-03, -1.3947e-02,
         7.8326e-03, -7.2032e-02, -1.0727e-01, -7.3878e-02, -3.7992e-02,
        -5.4292e-02, -2.4348e-02, -3.9597e-03, -2.7653e-02,  2.4437e-02,
        -4.2453e-04, -1.5543e-02, -6.8463e-02,  1.3089e-02,  1.2229e-02,
         4.3292e-02,  1.5074e-01, -5.3259e-02, -5.3234e-02, -2.1346e-02,
        -2.9882e-02,  2.1027e-03, -2.2461e-02,  6.9839e-02, -9.5400e-03,
        -6.7395e-02, -2.1361e-02, -1.2529e-02,  1.2366e-02, -5.3622e-02,
        -4.2882e-02, -3.3094e-03,  6.7311e-02,  2.1943e-02,  9.8997e-03,
        -3.2385e-02, -1.3450e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2647, -1.0695, -0.1775, -0.0526, -0.0651,  0.0236, -0.1553, -0.3099,
        -0.1455, -0.1041, -0.0999, -0.1101,  0.1648, -0.3110,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7052, -0.3215, -0.0862, -0.0829,  0.0224, -0.0077, -0.0240, -0.0224,
         0.0366,  0.0248, -0.0784, -0.4723, -0.1790,  0.0032, -0.0966, -0.0206,
        -0.0042, -0.1332, -0.0111, -0.0477, -0.4423, -0.1496, -0.0740, -0.0024,
         0.1328,  0.0074,  0.0924,  0.0393,  0.1243,  0.1219,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8606e-01, -2.1688e-01, -6.0502e-04, -1.4679e-01,  1.0751e-01,
         2.8081e-02, -9.2262e-02, -2.4256e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2909, -0.1183,  0.0173, -0.0407, -0.0265, -0.2691,  0.0439,  0.0621,
         0.0468, -0.0126, -0.0376,  0.0228, -0.1144, -0.2641, -0.0348, -0.0085,
         0.0302, -0.0576, -0.0463, -0.0772,  0.0238, -0.0105, -0.0510, -0.1642,
        -0.1536, -0.0670, -0.0567,  0.2550, -0.1171, -0.2491,  0.1129, -0.0378,
         0.0715,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2201,  0.0928,  0.0703, -0.0179,  0.0357,  0.1121,  0.0137, -0.0544,
         0.0549,  0.0396,  0.0117,  0.0233,  0.0419, -0.0093, -0.0152, -0.0441,
         0.0387,  0.0194,  0.0526,  0.0147,  0.0270, -0.0211,  0.0297,  0.0109,
        -0.0033,  0.0106,  0.0460,  0.1582, -0.0257,  0.0446, -0.0148,  0.0203,
         0.0479,  0.2144,  0.0384,  0.0448,  0.0381,  0.0183,  0.0038, -0.0436,
         0.0050, -0.0067,  0.0099,  0.0913, -0.0498, -0.0730,  0.0199,  0.0087,
         0.1066,  0.0540,  0.0027,  0.0785,  0.0098,  0.0291,  0.0117, -0.0410,
         0.0666,  0.0290,  0.0376,  0.1130,  0.0906, -0.0028,  0.2259,  0.0538,
         0.0016,  0.0034, -0.0066, -0.0872,  0.0145, -0.0934, -0.1058],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2786, -0.2060, -0.0181,  0.0476, -0.1370,  0.0643, -0.6750, -0.0101,
        -0.0378, -0.0460, -0.0137, -0.0311, -0.0495,  0.0151, -0.0838, -0.0207,
         0.0129,  0.0344, -0.0725, -0.0584,  0.0080,  0.0046,  0.1831, -0.1784,
         0.0265, -0.0176, -0.0710, -0.0399,  0.0082,  0.0285, -0.0411, -0.0327,
        -0.0409, -0.1015, -0.0891, -0.1035, -0.0821, -0.0178, -0.0035, -0.0011,
        -0.1539,  0.0370,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2357, -0.9444, -0.1455, -0.0145,  0.0207,  0.1346, -0.0095, -0.0285,
        -0.0404, -0.0757, -0.1073, -0.0252, -0.1397,  0.0535, -0.0602,  0.0300,
        -0.0095, -0.0838, -0.2309, -0.0576, -0.0277, -0.0364, -0.0402, -0.0400,
         0.0144, -0.0655, -0.0520,  0.0108, -0.1048,  0.0527,  0.0371,  0.0075,
         0.0628, -0.0701, -0.0747, -0.0787,  0.0582,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3691, -0.8889,  0.1516, -0.1192, -0.0546,  0.0970, -0.1489, -0.0210,
        -0.0624, -0.0582, -0.1130,  0.0216, -0.0902,  0.2226,  0.0474,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1437,  0.2085,  0.1116,  0.0797,  0.1332,  0.0727,  0.1359,  0.0727,
         0.2549,  0.0120,  0.1055, -0.0800,  0.1761,  0.0773, -0.1041, -0.1525,
         0.1174,  0.0815, -0.0004, -0.1089,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1341, -0.1139,  0.0878,  0.1509, -0.3191, -0.1769, -0.0240, -0.0205,
        -0.0141, -0.0524, -0.0230, -0.1003, -0.0888, -0.0310, -0.0853, -0.0278,
         0.0104, -0.0103,  0.0218, -0.0626, -0.0054, -0.0432, -0.1019,  0.0406,
        -0.0139, -0.0574, -0.1709, -0.0524, -0.0317, -0.0239, -0.1566, -0.0041,
        -0.0455, -0.0903,  0.0108, -0.0792, -0.0224, -0.0186, -0.0273, -0.0889,
         0.1679,  0.0825,  0.1710,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1817, -0.3056,  0.0414, -0.1219, -0.0599, -0.0536, -0.0483, -0.3305,
        -0.0386, -0.0119, -0.0044, -0.0448, -0.0310,  0.0111, -0.1841, -0.0336,
        -0.0297,  0.0261, -0.0090,  0.0565, -0.0609, -0.0399,  0.0038, -0.0126,
        -0.0710, -0.0203,  0.0118,  0.0349, -0.1516, -0.0352,  0.0088, -0.0843,
        -0.0745, -0.0204, -0.1092,  0.0367, -0.1160,  0.2676,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6516e-02, -3.8780e-01, -2.2262e-01, -3.7687e-01, -4.2417e-02,
        -1.3929e-01, -2.2277e-02,  1.3429e-01,  3.1223e-02, -2.3452e-01,
        -7.2398e-02, -1.1275e-01, -1.0570e-01, -3.0256e-02,  9.4622e-05,
         2.7162e-02, -1.5306e-02, -3.5666e-03, -1.7602e-02, -2.2927e-02,
         7.9218e-02, -9.2667e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1495, -0.1851,  0.0187, -0.0043,  0.0059, -0.0586, -0.0710, -0.0145,
        -0.1745, -0.2414,  0.0026, -0.0074, -0.0835,  0.0068, -0.0606, -0.0922,
        -0.3687, -0.0655, -0.0807, -0.1259,  0.0223, -0.0112, -0.0898, -0.0034,
        -0.1465, -0.0220, -0.0196, -0.0598,  0.0253, -0.1082, -0.1468, -0.0233,
        -0.0620,  0.0306, -0.0284, -0.2946,  0.0641,  0.0548, -0.2242, -0.0952,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0121, -0.0464,  0.1484,  0.0088,  0.0069, -0.0535,  0.0515, -0.1131,
        -0.0697, -0.2385, -0.0494, -0.0447, -0.0886, -0.2125, -0.1938, -0.2353,
        -0.1094, -0.1074, -0.3329, -0.0669, -0.1134, -0.0379, -0.0808, -0.2171,
        -0.2305, -0.2627,  0.0244, -0.1350, -0.1928, -0.2345, -0.0846, -0.0540,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3516, -0.5277,  0.0217,  0.1876, -0.0910, -0.0214, -0.0358, -0.0870,
        -0.2720, -0.0425, -0.0271, -0.0604, -0.0034, -0.0371, -0.0215, -0.0133,
        -0.0841, -0.0439, -0.0288,  0.0152, -0.0418,  0.0208, -0.0103, -0.1686,
        -0.0177,  0.1436, -0.0503, -0.0123,  0.0338,  0.0077,  0.0788,  0.0410,
         0.0334,  0.2821, -0.3269,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.1951,  0.3241,  0.3778,  0.4723,  0.0963,  0.6475,  0.2249,  0.3520,
         0.2477,  0.3541, -0.0523,  0.1693,  0.0474,  0.2554,  0.1215, -0.0428,
         0.1692,  0.1424, -0.0752,  0.1934, -0.2000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1455, -1.0009,  0.0703, -0.3029, -0.0935, -0.0759,  0.0462, -0.2831,
        -0.3254,  0.0111, -0.0018,  0.0432, -0.0186, -0.0335, -0.0392,  0.0530,
        -0.0959,  0.1274, -0.0981,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4504, -0.8474, -0.2124, -0.0025, -0.0506, -0.1035,  0.0261, -0.1994,
         0.0211, -0.2283, -0.5336, -0.1962, -0.0506, -0.4585,  0.0240, -0.0081,
         0.0414, -0.0669,  0.0703,  0.0347,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2784,  0.6481,  0.1569,  0.1689,  0.1054,  0.0253, -0.0525,  0.0125,
         0.1402,  0.1122,  0.0515,  0.0689,  0.0622,  0.0914,  0.1271,  0.0959,
         0.1149,  0.0522,  0.0373,  0.1668,  0.0031, -0.2175, -0.2052,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0919,  0.4428,  0.1685, -0.0111,  0.0971,  0.0355, -0.0350, -0.0245,
         0.0752,  0.4331,  0.0066,  0.0470,  0.1121,  0.4911, -0.2420, -0.0921,
        -0.0378, -0.1036,  0.1466,  0.0816,  0.0925,  0.0632, -0.0145, -0.0126,
        -0.0699,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8474e-01,  9.7807e-01,  1.4917e-01,  4.0991e-02,  1.9557e-01,
         1.5800e-01, -1.8150e-01,  3.4022e-02,  8.8469e-04,  1.4413e-01,
         2.1553e-02,  8.7811e-02,  1.5322e-02, -9.5660e-02, -7.9990e-02,
        -1.8599e-01,  9.8074e-02, -3.1973e-02,  5.6927e-02,  8.5836e-02,
        -1.8339e-02,  4.3090e-02,  1.1444e-01,  5.9336e-02,  6.8351e-02,
         1.5484e-01, -3.5195e-02, -2.9715e-02,  9.0887e-03, -5.0138e-02,
         1.5985e-02, -3.6656e-03,  3.8154e-02, -3.3836e-01, -1.2862e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1910,  0.0804, -0.0007, -0.1347, -0.0202,  0.0433, -0.1134, -0.0138,
        -0.2338,  0.0134,  0.1605, -0.0639,  0.0768, -0.1247, -0.1196, -0.1528,
         0.0859, -0.1904, -0.4129, -0.2272, -0.1438, -0.1181,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0198e+00,  5.5145e-03,  1.9318e-01, -1.0722e-01, -2.7667e-02,
        -1.5302e-01, -7.7171e-02,  5.7040e-02,  1.3501e-02, -2.4345e-01,
        -2.7664e-01,  1.1723e-01, -3.5226e-02,  1.7806e-04, -3.5804e-02,
        -1.2475e-01, -8.0625e-02, -7.1833e-02, -8.2448e-03, -2.7375e-01,
         1.1138e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1239, -1.1217, -0.1165, -0.0230,  0.0139, -0.1802, -0.1535,  0.0275,
        -0.1156,  0.0914, -0.1024, -0.1159, -0.1167, -0.1072, -0.0238, -0.1091,
         0.0095,  0.3618, -0.0888,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3406,  0.1270, -0.2028, -0.0682,  0.0044, -0.0054, -0.0097,  0.0298,
         0.0577,  0.0066,  0.0034,  0.0544,  0.0146, -0.0814,  0.0142, -0.0322,
        -0.0052, -0.1518, -0.0094, -0.0552,  0.0524, -0.0133,  0.0775, -0.0279,
         0.0576,  0.0215, -0.0177, -0.1036, -0.0120, -0.3350, -0.0631, -0.0470,
        -0.1111, -0.0942,  0.0582,  0.0029, -0.0234, -0.0052,  0.0599, -0.0108,
        -0.0264,  0.0058, -0.0152, -0.0034,  0.0084,  0.0048,  0.0935],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0722, -0.3089, -0.1527, -0.0275, -0.0071, -0.0499,  0.0182,  0.0517,
        -0.0519,  0.0194,  0.0078,  0.0822,  0.0055,  0.0183,  0.1732, -0.0469,
        -0.1695, -0.1372, -0.0762,  0.0589,  0.0373, -0.0790, -0.1220, -0.3200,
        -0.1569,  0.0495,  0.1157,  0.0186, -0.0781,  0.0522, -0.0205,  0.0023,
        -0.0934, -0.0871,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0308, -1.3728,  0.0666,  0.0675, -0.1938, -0.1088, -0.0597,  0.0635,
        -0.0417, -0.0125,  0.0655,  0.0367, -0.1576, -0.0014,  0.0454,  0.2274,
         0.0155, -0.0125, -0.0754, -0.1293, -0.0175,  0.0270,  0.0194,  0.0146,
         0.0042, -0.0741, -0.1410, -0.0825, -0.0108, -0.0317,  0.0253, -0.1258,
         0.1919, -0.1438,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.2528, -1.1494, -0.0871,  0.0444, -0.0058, -0.1026,  0.0245,  0.0577,
         0.0527, -0.3788, -0.2356,  0.1289, -0.1365, -0.3351, -0.1131, -0.1337,
        -0.0977, -0.0854,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0538, -0.1244, -0.2039,  0.0025, -0.0263,  0.0966, -0.0513,  0.0192,
        -0.0761, -0.3659,  0.0530,  0.1741, -0.0077,  0.0645,  0.0609, -0.0418,
        -0.0415,  0.0457,  0.0820, -0.3708, -0.1184,  0.0562, -0.1024, -0.0452,
         0.0300,  0.0102, -0.1796,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1604, -0.1506, -0.3393,  0.0237, -0.1096,  0.0131, -0.0130, -0.1810,
        -0.4393,  0.2012,  0.0145, -0.1136, -0.2792, -0.0936, -0.0692,  0.0034,
        -0.0768, -0.0779, -0.0786, -0.7010, -0.0448, -0.0422, -0.0518,  0.0151,
        -0.0574, -0.0068,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8349,  0.4155,  0.1852,  0.1894,  0.5549,  0.0913,  0.1053,  0.3190,
         0.0907,  0.0937,  0.1108,  0.0534,  0.0148,  0.0034,  0.0822, -0.0111,
         0.0887,  0.0827,  0.0290, -0.1779, -0.1417,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3573,  0.6704,  0.4343,  0.8221,  0.1772,  0.2618,  0.1750,  0.0523,
         0.0556, -0.3864,  0.0174,  0.1110,  0.1284,  0.2982, -0.1607,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3378,  0.1227, -0.0255, -0.2250,  0.0655, -0.0397,  0.1836, -0.0648,
         0.0565,  0.0209,  0.0339, -0.0627, -0.3153,  0.1749, -0.2328, -0.0908,
         0.0461,  0.3120,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6006e-02, -5.6975e-01, -1.8852e-01, -4.3792e-01, -9.4161e-02,
         1.7026e-02,  3.0802e-03, -5.4178e-02, -9.6365e-03, -1.5767e-01,
        -1.0415e-01, -7.9915e-03,  2.2668e-02, -8.1271e-02, -2.3391e-02,
        -1.8155e-04,  2.8398e-03, -6.9775e-02, -2.2178e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2031, -0.1330, -0.1227, -0.1029, -0.0413, -0.1024, -0.0560,  0.0697,
        -0.0452, -0.0475,  0.0059,  0.0625, -0.0395,  0.0084,  0.0362,  0.0061,
        -0.0401,  0.0114,  0.1783,  0.0339, -0.2578,  0.0114, -0.2078, -0.0656,
        -0.0734, -0.0416, -0.0919,  0.0919,  0.0152, -0.0470, -0.0352,  0.0292,
        -0.1074, -0.0372, -0.0952, -0.0281, -0.1065,  0.0172, -0.0380, -0.1184,
        -0.0107,  0.0890,  0.0826, -0.1431], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0208, -0.3748, -0.1013, -0.0222,  0.0815, -0.1105, -0.0219,  0.0507,
        -0.3564, -0.0917, -0.0456,  0.0572,  0.0269, -0.0325,  0.1334, -0.0131,
        -0.0277, -0.3908,  0.0405, -0.0159, -0.0427, -0.0256,  0.0146, -0.0065,
         0.0315, -0.0626,  0.0275, -0.0560,  0.1056, -0.0098, -0.1115, -0.0128,
         0.0029, -0.0504,  0.3247,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0814, -0.9193, -0.0024, -0.0208,  0.0261,  0.0929, -0.2384, -0.1600,
         0.0373,  0.0164, -0.0088, -0.0956, -0.1847,  0.0118, -0.0237, -0.1621,
         0.0515, -0.0128,  0.0112,  0.0763, -0.0373, -0.0119,  0.0671, -0.1163,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0102, -0.4013, -0.1133, -0.0387, -0.0664, -0.0255,  0.0940, -0.0615,
        -0.1952,  0.0356,  0.0021,  0.0506,  0.0476, -0.1080, -0.0735,  0.0744,
         0.0717, -0.0916, -0.1787,  0.0203,  0.0385, -0.0627, -0.0395, -0.0361,
        -0.0264, -0.1181, -0.0694, -0.1003, -0.0049,  0.0588, -0.2243, -0.0600,
        -0.1037,  0.0572,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7977, -0.1367, -0.0557, -0.1319,  0.0304, -0.0318, -0.2005, -0.0147,
        -0.0909, -0.0635, -0.0120, -0.0449, -0.1339, -0.0075, -0.0051, -0.7434,
        -0.1498,  0.0147, -0.0418,  0.0112,  0.0390, -0.0457, -0.0039, -0.0649,
        -0.1082, -0.0524, -0.0864, -0.1055,  0.0696,  0.0193,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.2789, -0.5707, -0.3264, -0.4405, -0.1229,  0.0731, -0.1773, -0.1177,
        -0.0888, -0.3738, -0.0202, -0.1746,  0.0411,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0606, -0.2480,  0.1797,  0.1130, -0.1059, -0.0608, -0.0464,  0.0715,
        -0.4110, -0.1140, -0.1549, -0.1529, -0.0612, -0.1004, -0.2803, -0.0615,
         0.1594, -0.1148,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0872, -0.6109, -0.1559, -0.0663, -0.2897, -0.0416, -0.1739, -0.1474,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1759, -0.0944, -0.0626, -0.0043, -0.0335, -0.0047, -0.0159, -0.0387,
         0.0136,  0.0536, -0.0134, -0.0358, -0.0408,  0.0771,  0.0018, -0.0370,
        -0.0073, -0.0071, -0.0331,  0.0108,  0.0735, -0.0401, -0.0170, -0.0997,
        -0.0125, -0.0729, -0.0336, -0.1136, -0.3706, -0.0774,  0.1468, -0.2025,
        -0.0052, -0.3084,  0.0109, -0.1100, -0.1298, -0.0521, -0.0426, -0.0092,
        -0.0052, -0.0295,  0.0005,  0.0815,  0.0450,  0.1333, -0.0013],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7365, -0.3943,  0.2097, -0.9189, -0.0994, -0.0221, -0.0541,  0.0130,
         0.1659, -0.4912,  0.1242,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4005, -0.5027, -0.1840, -0.2813, -0.3025, -0.0746, -0.0430, -0.4157,
        -0.2338, -0.2053, -0.0788, -0.0720, -0.2335, -0.0821,  0.3443,  0.0598,
        -0.0352, -0.0578,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3632,  0.6519,  0.2680, -0.1697,  0.0482, -0.0893,  0.0401,  0.0963,
         0.0799,  0.1017, -0.1216,  0.0578,  0.3009,  0.0091,  0.6141,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0361, -0.8772, -0.2371, -0.0642, -0.0533, -0.0456, -0.2671,  0.2473,
        -0.0446, -0.0836, -0.3009,  0.0058,  0.2724, -0.0652, -0.0677,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8479, -0.1554, -0.1118,  0.1595, -0.0172, -0.2363, -0.2763, -0.2006,
        -0.0832,  0.0189,  0.0236, -0.0963, -0.3541, -0.0541,  0.0512,  0.1272,
         0.0324,  0.0272,  0.0012,  0.0321, -0.0407, -0.3307,  0.0107,  0.1085,
         0.2685, -0.0482,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1016, -0.0505, -0.2298, -0.1457, -0.1119, -0.1246, -0.0376, -0.0666,
        -0.6508, -0.0193, -0.0687, -0.0152, -0.2119, -0.0263, -0.0284,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1085, -0.4799, -0.1060, -0.0659, -0.1760,  0.1049, -0.1824, -0.0114,
         0.0433, -0.5715,  0.0731,  0.0031, -0.1041, -0.1296, -0.0791,  0.0246,
        -0.0043,  0.0612, -0.0797,  0.0423, -0.0351,  0.0414, -0.0584,  0.0403,
         0.2820,  0.3077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0880,  0.0814,  0.0338, -0.0519, -0.1150, -0.1057, -0.0263, -0.1156,
        -0.0260, -0.0447, -0.5182, -0.0407, -0.0611,  0.0312, -0.2028, -0.1125,
        -0.1605, -0.0945, -0.0912, -0.1667,  0.0848, -0.0615, -0.0309, -0.0188,
         0.4539, -0.4384,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.0815, -0.5588, -0.1772, -0.0238, -0.0656,  0.0591, -0.1269, -0.1122,
        -0.0160, -0.0926, -0.0368, -0.0219, -0.1439,  0.0265, -0.0408, -0.0330,
        -0.0400,  0.0310, -0.1201, -0.0394, -0.0396, -0.0524, -0.0675, -0.1229,
        -0.1644,  0.0928, -0.1360,  0.0150, -0.0198, -0.0582,  0.0167, -0.0518,
        -0.0167,  0.0435, -0.1799,  0.1012,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1988, -0.3304, -0.1075, -0.0734, -0.0555, -0.0605, -0.1022, -0.1806,
        -0.0064,  0.0982, -0.0103,  0.0065,  0.0043, -0.0321, -0.0591, -0.0182,
         0.0073, -0.0276,  0.0665,  0.0285, -0.0237, -0.1247, -0.0101, -0.1062,
        -0.0982, -0.1843,  0.0261, -0.0247, -0.1880, -0.0191, -0.0323, -0.0701,
        -0.0876,  0.1265, -0.0029,  0.1586,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1067,  0.8645,  0.0805,  0.1282, -0.0317, -0.0580,  0.1250,  0.0425,
         0.0073,  0.0141,  0.0208,  0.0995,  0.1342, -0.0302, -0.1625, -0.0596,
         0.1628,  0.3568,  0.1517, -0.2585,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4720, -0.1253, -0.0224,  0.0392,  0.1366, -0.0254,  0.0088,  0.0216,
         0.0413,  0.0491, -0.0277,  0.0133,  0.0301,  0.0648, -0.0744, -0.0138,
         0.0285, -0.1138, -0.1192, -0.0084, -0.0683, -0.0468, -0.0014,  0.0573,
         0.0064, -0.0318,  0.0208,  0.0403,  0.0127, -0.0016, -0.1608, -0.0229,
        -0.1453, -0.4150,  0.0509,  0.0729,  0.1172, -0.1256,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0246, -0.1558, -0.1060, -0.2775, -0.0192, -0.0193,  0.0418,  0.0223,
        -0.0094, -0.0695, -0.1378, -0.2092,  0.0139,  0.0311,  0.0172, -0.1134,
        -0.0209, -0.0979,  0.0309,  0.0352,  0.0196,  0.0615,  0.0133, -0.0294,
        -0.0393,  0.0744, -0.0332, -0.0206, -0.2045,  0.0230, -0.0366, -0.0198,
         0.0573,  0.0338, -0.0574, -0.0461,  0.0087,  0.0726, -0.0163, -0.1004,
         0.0689], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3665e-02,  1.1214e+00, -2.4757e-01,  3.6270e-01, -1.3368e-04,
         2.1255e-01, -1.1436e-01,  4.1351e-02,  6.7263e-02,  6.4128e-03,
         5.5756e-02,  3.3321e-03,  3.3393e-02, -4.4454e-02, -6.1425e-02,
         7.4863e-02,  2.7278e-02,  6.2127e-02, -3.9131e-02,  8.2724e-03,
         7.2529e-02,  4.0937e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0893, -0.0254, -0.0231, -0.0105, -0.0139, -0.0512, -0.0105,  0.0674,
        -0.1208, -0.0279, -0.0180, -0.1902, -0.0238, -0.0565,  0.0598, -0.1598,
        -0.0591, -0.0862,  0.0542, -0.1203, -0.2546,  0.0986,  0.0447, -0.0689,
        -0.0631,  0.0062,  0.0230, -0.0968, -0.2172, -0.0410, -0.0413, -0.0731,
        -0.1170, -0.0528,  0.0030,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1675, -0.0435,  0.1973,  0.0538,  0.1702,  0.0581, -0.0360, -0.0901,
         0.0838, -0.0065,  0.1602,  0.0880, -0.0084,  0.0098,  0.0770,  0.1100,
        -0.1679,  0.0225,  0.1581, -0.0293,  0.1249,  0.1794, -0.1487,  0.0593,
        -0.0086,  0.0008,  0.1484, -0.0700,  0.0276,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0117, -0.5323, -0.1363, -0.0979, -0.0184, -0.1417, -0.0327,  0.0703,
        -0.0848, -0.0898, -0.1270, -0.1043, -0.0077, -0.0836, -0.0323, -0.1553,
        -0.1657, -0.0196, -0.1367,  0.0040,  0.1707, -0.2249, -0.3149, -0.0750,
        -0.3082,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2386, -0.3520,  0.3535,  0.2987,  0.5711, -0.0020,  0.1007,  0.0997,
         0.0691,  0.0934, -0.1410,  0.1415,  0.0343, -0.0509,  0.1786,  0.2544,
         0.0390,  0.0620, -0.0771, -0.0196,  0.1327, -0.2296,  0.0260,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3785, -0.2648,  0.0090,  0.0083, -0.0592,  0.0741, -0.0287, -0.0669,
        -0.0315, -0.0352,  0.0390, -0.0460, -0.1567, -0.4724,  0.0240, -0.0174,
         0.0196, -0.0567, -0.0657, -0.0316, -0.1498, -0.0146,  0.0265, -0.1492,
         0.0630, -0.0287, -0.1909, -0.0464, -0.1528, -0.0804, -0.2681, -0.0780,
        -0.0497, -0.0938, -0.0252, -0.1009,  0.1060, -0.2334, -0.0501,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0725, -0.3934,  0.0997, -0.3851, -0.4836, -0.1493, -0.4311, -0.2482,
        -0.3069,  0.0232, -0.0011, -0.0730,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.2363, -0.2283, -0.5139, -0.0526, -0.0741,  0.0988, -0.1486, -0.0556,
        -0.0823, -0.0826, -0.0300, -0.0055, -0.0146,  0.0519, -0.0903, -0.0907,
        -0.0254, -0.1162, -0.0322, -0.1243, -0.0719, -0.0700, -0.0624, -0.0296,
         0.0040,  0.0239,  0.0765,  0.0015, -0.0294,  0.0905, -0.0333,  0.1865,
        -0.0609,  0.0058, -0.1550, -0.1503, -0.0314, -0.0146, -0.0196, -0.0078,
        -0.0175,  0.0052,  0.0171, -0.0122,  0.0672,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0905, -0.9326, -0.1150, -0.2055, -0.1376, -0.2136, -0.4647, -0.0304,
        -0.2895, -0.3169,  0.5956, -0.3372,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0419,  0.8696,  0.3055,  0.3627,  0.0285, -0.1475,  0.2528, -0.0803,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7771e-01, -6.1295e-01, -7.4932e-03,  2.6759e-01, -1.5111e-01,
        -3.4848e-02,  2.7418e-02, -6.0795e-02, -1.2482e-01, -2.8136e-01,
        -1.4775e-01, -3.2770e-02,  4.9545e-03, -2.4125e-02, -2.0011e-02,
         1.9787e-02,  6.2634e-02, -1.3031e-01,  3.5153e-02, -2.1548e-03,
        -3.8190e-02, -2.1243e-02,  3.4082e-02,  2.3705e-02,  6.0805e-03,
        -8.3457e-02, -9.0115e-02, -2.6681e-02, -1.1617e-01,  5.7164e-02,
        -7.3082e-02,  1.5426e-03, -5.7003e-03, -1.4429e-01,  3.1113e-02,
         1.3857e-02, -1.2520e-01, -3.8479e-02, -3.4478e-02, -8.2505e-03,
        -7.4732e-02,  3.8054e-02, -4.2994e-02,  4.8279e-02, -1.4078e-02,
         7.3934e-01, -1.9324e-01,  2.2311e-02, -1.6997e-01, -1.9560e-01,
        -4.4146e-02, -3.9397e-02, -7.4098e-02,  2.0712e-02, -1.1558e-02,
         9.0870e-03, -5.2762e-02,  3.5335e-04, -1.5113e-02,  8.3073e-02,
         4.8091e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2903, -0.1855, -0.0101, -0.0921, -0.4475, -0.0597, -0.0546, -0.0183,
         0.0726, -0.0084, -0.0573, -0.0460, -0.0409, -0.0391, -0.2394, -0.0007,
        -0.0514, -0.0838, -0.0769,  0.0358, -0.1230, -0.0361, -0.0283, -0.0091,
        -0.0552, -0.0838, -0.0479,  0.0199, -0.0590, -0.1098, -0.0947, -0.1104,
        -0.0077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5862, -0.0724, -0.1588, -0.0379, -0.0093, -0.0291, -0.1338,  0.0310,
         0.0763, -0.0393,  0.1870,  0.0432, -0.0325, -0.0120, -0.0107,  0.0493,
         0.0336, -0.2493, -0.8573,  0.0105,  0.1936, -0.1069,  0.3976, -0.0803,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3507, -0.2197, -0.1229,  0.0172, -0.1807, -0.2485, -0.1275, -0.1224,
         0.0634, -0.2810, -0.0839, -0.1338, -0.4178, -0.1047,  0.0368, -0.0363,
        -0.0430, -0.1069, -0.0177,  0.0083, -0.0782,  0.1241,  0.1006,  0.3350,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3577, -0.2011, -0.1891, -0.0814,  0.0381,  0.0608, -0.0486, -0.0860,
         0.0373, -0.0717, -0.0147, -0.0175,  0.0530,  0.0220, -0.0880, -0.1290,
        -0.0713,  0.1105,  0.0634, -0.1363, -0.1182, -0.0710, -0.0498, -0.2644,
         0.0510,  0.0008, -0.1496, -0.0371,  0.0281,  0.0119, -0.1368, -0.0417,
        -0.0359,  0.0999,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1838, -0.0202, -0.0167, -0.0041, -0.0665, -0.0587, -0.0429,  0.0953,
        -0.1282, -0.0513,  0.0220, -0.1860,  0.0066, -0.7448, -0.0770, -0.2874,
        -0.1602, -0.0595,  0.0318, -0.0537,  0.0110, -0.1032, -0.1041,  0.0617,
        -0.0686,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0276,  0.0854,  0.2359,  0.1577,  0.2437, -0.0852,  0.1674,  0.0627,
         0.1068,  0.9368,  0.0546,  0.0697, -0.0931,  0.0699, -0.1917, -0.1389,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0336, -0.4378, -0.0737, -0.1702, -0.0899, -0.0601, -0.2396, -0.0596,
        -0.0479, -0.0826, -0.0425, -0.0729, -0.1075, -0.1334, -0.0570,  0.0251,
        -0.0184, -0.0409, -0.1536, -0.0565, -0.0072, -0.0439,  0.0517,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3359,  0.5912, -0.0841,  0.3674,  0.5025,  0.2101, -0.0084,  0.1524,
         0.1051, -0.0017,  0.0261,  0.1302,  0.0994,  0.0178,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.4351, -0.1247, -0.0455, -0.5187, -0.0894,  0.0996,  0.0149,  0.2195,
        -0.0144,  0.0089, -0.2067, -0.1306, -0.1606,  0.0129, -0.0654, -0.0345,
         0.0471, -0.4651, -0.1514, -0.0834, -0.1599, -0.0791,  0.2191,  0.4723,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4659, -0.1417, -0.3103,  0.0242, -0.2769, -0.0078, -0.4168, -0.0754,
        -0.1361, -0.2161, -0.3451,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3455, -0.0903, -0.0602,  0.0618, -0.1506, -0.1729, -0.2863,  0.0491,
        -0.1848, -0.0102, -0.1036,  0.0799, -0.0056,  0.0243, -0.0589,  0.0669,
        -0.1147, -0.4262,  0.1181,  0.0539,  0.2393,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4526, -0.3404,  0.0307, -0.0918, -0.0609, -0.1271,  0.0023, -0.2686,
        -0.0635, -0.0974, -0.0527, -0.0898, -0.2157, -0.1316,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4716, -0.0063, -0.0454,  0.2049, -0.4151,  0.0554, -0.2097,  0.0095,
         0.0868, -0.4459,  0.1235, -0.5071, -0.5037, -0.0563, -0.1632,  0.2594,
         0.2663,  0.0402,  0.1081,  0.5272,  0.1624,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5096, -0.4271,  0.0831, -0.1897, -0.2392, -0.2586, -0.1364, -0.2822,
        -0.1097, -0.2047, -0.1511, -0.0219, -0.2353,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2118, -0.0783, -0.0243, -0.0071, -0.0236, -0.0512, -0.0468, -0.1755,
        -0.0633, -0.0740, -0.0150,  0.0113, -0.0286,  0.0644, -0.0203, -0.0293,
        -0.0161,  0.0493, -0.0015,  0.0199, -0.0541, -0.0244, -0.1145, -0.0304,
        -0.5645,  0.0710, -0.1620, -0.1006, -0.0889, -0.1432, -0.0300,  0.0645,
         0.0375,  0.0428,  0.0984, -0.0609, -0.0608, -0.0180, -0.0288, -0.0197,
        -0.0182,  0.0591,  0.0097,  0.2097,  0.2908], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2093, -0.3975, -0.0989,  0.0037,  0.0051,  0.0488, -0.0558,  0.0063,
        -0.0226, -0.0879, -0.1340, -0.0435, -0.0567,  0.0666, -0.1658, -0.0280,
        -0.5115,  0.0562,  0.0100,  0.0447, -0.0372, -0.1374, -0.0898, -0.2869,
         0.0213,  0.0324,  0.0713, -0.0925, -0.3227, -0.2034,  0.0102, -0.0087,
        -0.1436, -0.1981,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7129,  1.4058,  0.2211,  0.0237,  0.1057,  0.0138, -0.0383,  0.2000,
         0.0615,  0.0750,  0.1222,  0.1779,  0.0565,  0.0095, -0.0940, -0.2346,
        -0.0215, -0.1500,  0.0295,  0.2477,  0.3031, -0.0351,  0.0928,  0.2046,
        -0.2406, -0.5521,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0659, -0.5683, -0.0710, -0.2363,  0.1140, -0.1473, -0.0402, -0.1917,
        -0.1465, -0.0720, -0.0534, -0.0426, -0.0330, -0.1034, -0.0754, -0.0615,
        -0.3368, -0.0298, -0.0585,  0.0035, -0.0177, -0.0497,  0.0009,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1874, -0.1618,  0.0681, -0.0211, -0.0327, -0.0237, -0.0617, -0.0273,
        -0.1177,  0.0056, -0.0321, -0.0341, -0.0305, -0.0354, -0.4751, -0.0201,
         0.0354, -0.1494, -0.0805, -0.1136, -0.0391, -0.0226,  0.0007,  0.0542,
        -0.0109,  0.0142, -0.0415, -0.1183,  0.0056,  0.0225, -0.0057, -0.0356,
         0.0217, -0.0090,  0.0190, -0.0045, -0.0047, -0.1372,  0.0238, -0.1051,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2194, -0.3491, -0.1402, -0.1086, -0.1473, -0.1261,  0.0977, -0.1269,
        -0.0269, -0.0602, -0.0494, -0.0112, -0.0369, -0.1147, -0.1033, -0.1024,
        -0.0512, -0.0743,  0.0903, -0.0613,  0.0882, -0.0874, -0.0203,  0.0300,
         0.0253,  0.0106, -0.0154, -0.1300, -0.0424,  0.1983, -0.1890, -0.0377,
        -0.0568, -0.2710,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.3830, -0.1296, -0.0106, -0.0420, -0.0543, -0.0585, -0.0820, -0.0942,
         0.0019,  0.0674, -0.0349, -0.0103, -0.0106,  0.0045, -0.1089, -0.0746,
         0.1224, -0.0194, -0.0502,  0.0565, -0.0409, -0.0046, -0.1061, -0.0231,
        -0.0874, -0.2760,  0.0881, -0.0362,  0.0376, -0.0489,  0.0320,  0.0326,
         0.0071,  0.0062, -0.1243,  0.0442, -0.0180, -0.0384, -0.0047, -0.0452,
        -0.0683, -0.0143, -0.0680, -0.0120, -0.0129,  0.0228,  0.0342,  0.0022,
        -0.0134, -0.0065, -0.0084, -0.1118], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2579, -0.0070, -0.2167, -0.2236,  0.2409,  0.0890, -0.0965, -0.0784,
        -0.1345,  0.1079,  0.0352, -0.0323, -0.0097,  0.1996,  0.0873,  0.0927,
         0.2646, -0.1501,  0.1125, -0.0820,  0.0551,  0.0219,  0.1693,  0.0120,
        -0.0332, -0.0747, -0.0139, -0.0196,  0.0641, -0.0736,  0.0757,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9319,  0.1911, -0.0201, -0.0154,  0.0777, -0.0172,  0.1870,  0.0722,
         0.1929, -0.1146,  0.0314,  0.2006, -0.0061,  0.0171, -0.1323,  0.0370,
        -0.2486,  0.0898,  0.1994,  0.2776,  0.0679,  0.1844,  0.2094, -0.3033,
         0.1816,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0638,  0.0983,  0.0871, -0.0277,  0.2025,  0.3893,  0.7504, -0.0212,
         0.1539,  0.1871, -0.1875, -0.0848,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6264e-01,  7.6943e-01,  4.2293e-03,  1.3650e-01,  1.1732e-01,
         1.8583e-01,  7.9028e-02,  4.3853e-02, -1.4012e-02,  1.1168e-01,
         9.4365e-02, -1.9165e-02,  5.6633e-02,  2.6806e-01,  1.3701e-01,
         2.1830e-01,  7.9236e-02,  8.5706e-02, -1.1698e-01,  1.7998e-04,
         5.9923e-02,  6.0387e-02,  2.6273e-03, -7.8393e-02, -1.1732e-01,
         1.8337e-01,  1.4556e-01, -9.2209e-02,  1.6591e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0629,  0.7465, -0.0861, -0.1012, -0.0525,  0.1697,  0.7009, -0.2369,
        -0.0098,  0.0978,  0.0447,  0.4239,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4078, -0.7086, -0.0215, -0.1364,  0.1862, -0.2158, -0.3023,  0.0128,
        -0.0665, -0.1051,  0.0070, -0.3256,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6937,  0.0510,  0.0271,  0.1620,  0.1972, -0.1048,  0.0793,  0.1369,
         0.3451, -0.0341,  0.0232,  0.2202,  0.0860, -0.0993, -0.0729,  0.2680,
         0.1348,  0.0131, -0.2055, -0.1596,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0489, -1.1535, -0.0232, -0.1528, -0.4169,  0.0904,  0.0446,  0.0160,
        -0.1075,  0.0599,  0.1415,  0.0535,  0.0372,  0.1081, -0.0695,  0.2589,
        -0.0138,  0.0098,  0.0895, -0.2074, -0.0264,  0.1607,  0.1142,  0.2604,
         0.0671,  0.1136,  0.2034, -0.1084, -0.1329, -0.1135, -0.0709, -0.0528,
        -0.0069, -0.0611, -0.2474,  0.3972, -0.2571,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5758, -1.6110,  0.1336,  0.3692, -0.1464, -0.1282,  0.0128,  0.3559,
        -0.7057,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0725e-01, -7.8487e-01,  2.3520e-01, -2.9810e-01,  1.5920e-02,
        -5.1305e-02, -1.0242e-01, -2.8514e-01,  1.0873e-01, -1.7031e-01,
        -6.7829e-02,  1.6834e-01, -9.1155e-02, -2.7117e-01,  2.1793e-02,
         1.3809e-02,  4.5741e-04,  6.4476e-02, -1.7208e-01, -2.2458e-01,
        -1.3196e-02,  3.0136e-02, -2.3286e-02,  1.0575e-02, -5.2947e-03,
         3.1830e-01, -1.4063e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0744,  0.5216,  0.1508,  0.0817,  0.2919,  0.1190,  0.0040, -0.0286,
        -0.0108,  0.0913,  0.0565,  0.1640,  0.0517, -0.0984,  0.0528,  0.0129,
         0.0560, -0.1708,  0.0801,  0.0408,  0.0641,  0.2492,  0.1665, -0.1211,
        -0.0029,  0.0937,  0.0319,  0.0891,  0.9139,  0.0129,  0.0614, -0.0038,
         0.0372,  0.1365,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 4.0343e-01,  7.4224e-02,  9.9660e-02,  1.0596e-02, -5.1388e-02,
         4.4754e-02,  5.8078e-03, -2.9873e-03, -1.0448e-01,  2.0151e-02,
        -1.2482e-01, -3.0235e-01, -7.0946e-02, -5.8810e-02, -6.4049e-02,
        -1.0915e-01, -1.3144e-01, -3.2881e-02, -6.1196e-02,  1.1435e-02,
         4.1384e-02,  3.7041e-02, -1.1141e-01,  9.0756e-03, -1.2663e-01,
        -8.4044e-02, -5.4807e-02,  1.2061e-03, -6.0770e-03, -2.3896e-02,
        -8.1214e-02, -1.4888e-01, -2.5409e-01, -6.1144e-02,  3.2333e-02,
        -6.1608e-02, -1.2948e-01, -3.6973e-02,  6.0195e-03, -4.3082e-02,
         4.5530e-05, -1.9307e-01,  4.6879e-02,  7.4881e-02,  2.4819e-01,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2452, -0.1381,  0.0145,  0.0222, -0.1280, -0.1639, -0.6035, -0.0098,
        -0.1279, -0.0994, -0.0428,  0.0591, -0.1191, -0.1344,  0.0128,  0.3383,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9136, -0.0160,  0.0926,  0.0053, -0.0790, -0.0054, -0.2273, -0.0177,
        -0.1136, -0.0910,  0.0066, -0.1464, -0.0417,  0.0332, -0.1508,  0.1344,
        -0.0889, -0.0358,  0.0459, -0.0309, -0.0194, -0.0511, -0.0649, -0.0401,
         0.0671, -0.0613, -0.1052,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2166, -0.0449, -0.0453,  0.0094, -0.0538, -0.0296, -0.0259, -0.0398,
        -0.1094, -0.0772, -0.0413, -0.0365, -0.0533, -0.0124, -0.0306, -0.0088,
        -0.0109,  0.0047,  0.1276, -0.0872,  0.0274, -0.0695, -0.0730, -0.3752,
         0.0171, -0.0634, -0.0326, -0.0234,  0.0339, -0.0330, -0.1067, -0.0053,
        -0.0502,  0.0687,  0.0006,  0.0498, -0.0768, -0.0993,  0.1239,  0.0943,
        -0.0447, -0.1332, -0.0252, -0.0966,  0.1184, -0.0644], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4229,  0.0336,  0.0924,  0.0827, -0.0659,  0.1818,  0.0277,  0.1016,
         0.0507,  0.1848,  0.0693, -0.0311,  0.0506,  0.0241, -0.0174,  0.0037,
         0.0863,  0.0315,  0.2177,  0.2284,  0.1419,  0.1091,  0.0186,  0.0670,
         0.1099,  0.2086,  0.0543,  0.1113,  0.0472, -0.1295,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4004, -0.0035, -0.0115, -0.0171, -0.0159, -0.0770, -0.1686,  0.0023,
         0.0481, -0.2146,  0.4573,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0570, -0.9940,  0.0302, -0.3358, -0.1638, -0.0435, -0.1632, -0.4220,
         0.0125, -0.3035,  0.0704, -0.0722, -0.0161,  0.0246,  0.0493, -0.1404,
        -0.1216, -0.0389, -0.2146, -0.2815,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2222, -0.4290, -0.1892, -0.1341,  0.3098, -0.1565, -0.3731,  0.0587,
         0.0228, -0.2809, -0.1956,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1285, -0.4794, -0.2248, -0.4166, -0.2402,  0.0710, -0.0482, -0.0315,
        -0.2133, -0.1200, -0.0575, -0.1131, -0.0020, -0.0822, -0.0233, -0.1962,
        -0.1067, -0.0475, -0.0242,  0.0238, -0.0830,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0043,  0.3482,  0.0939, -0.0590,  0.0402,  0.9057,  0.0496, -0.1103,
         0.1513,  0.0142,  0.1678,  0.1446, -0.0106,  0.0425,  0.0547,  0.1417,
        -0.1400, -0.0287,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1061, -0.2593, -0.3777,  0.0638, -0.0248,  0.0647,  0.0465,  0.0266,
        -0.0353, -0.0135,  0.0182,  0.0175, -0.0466, -0.0838, -0.1013, -0.0633,
        -0.1691, -0.1996,  0.0020,  0.0010, -0.0543, -0.1048, -0.2334, -0.0391,
        -0.1183,  0.0242, -0.0610, -0.0317, -0.0399, -0.2530, -0.0132,  0.0141,
        -0.0539, -0.0158, -0.0363, -0.0232, -0.0064,  0.0315,  0.0603,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2307,  1.2840,  0.2879,  0.0872,  0.0339,  0.0388,  0.0301, -0.1837,
        -0.0544,  0.0433,  0.0113,  0.1005,  0.0804,  0.0367, -0.0142, -0.0142,
         0.0617, -0.0122,  0.0523, -0.0201,  0.0149, -0.0080, -0.0139, -0.0105,
        -0.0731,  0.1269,  0.0379,  0.0455,  0.1883,  0.1927,  0.0228,  0.0462,
         0.2399,  0.2405,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.2248, -0.5354,  0.1665, -0.2613,  0.0245, -0.4641, -0.3909, -0.0908,
        -0.5951,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2262,  0.2946,  0.2000, -0.0501, -0.3066, -0.1324,  0.0301, -0.0542,
        -0.0574,  0.0291,  0.0461,  0.0189, -0.0142,  0.0405, -0.1069,  0.1384,
         0.1534,  0.7132, -0.0456,  0.0095,  0.0105, -0.0048, -0.0324, -0.0250,
        -0.0091, -0.1576,  0.0458, -0.0091, -0.1377,  0.0182,  0.0144,  0.1094,
         0.0449,  0.0192, -0.0165, -0.0100,  0.0221,  0.0217,  0.0345, -0.1218,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1997e-01, -4.2429e-01, -2.2805e-01,  3.3140e-02, -4.7465e-02,
         5.9877e-02,  3.9458e-02, -2.7235e-02, -9.0300e-03, -8.7950e-02,
        -3.0415e-02,  2.1377e-02,  1.2338e-03, -1.0727e-02, -4.8635e-02,
         3.0051e-02, -3.5268e-02, -6.2873e-03, -5.2663e-02,  5.9548e-03,
         2.1970e-02, -3.3974e-02,  1.1550e-01, -1.0166e-01,  3.8681e-02,
        -5.9231e-02, -2.5002e-02,  1.3562e-01, -1.3193e-01, -5.3474e-02,
        -2.0037e-02, -9.2154e-02, -1.0382e-04,  1.8697e-02,  1.0130e-02,
        -7.4923e-02, -3.4785e-02, -8.1179e-02,  7.2369e-02,  5.7640e-02,
         1.3818e-01, -9.8675e-03,  1.0480e-02, -3.2707e-02,  3.9368e-02,
         4.8625e-02, -5.5346e-03,  3.3784e-02,  4.5340e-02, -5.6617e-02,
         1.8927e-02, -2.5149e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0097, -0.4148, -0.1979,  0.0284, -0.1373, -0.0938, -0.1206, -0.0969,
        -0.0796, -0.0213,  0.1774, -0.1797, -0.1040, -0.0557, -0.0874, -0.2025,
        -0.1472, -0.0064, -0.2378, -0.1869, -0.1829,  0.2934,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0649, -1.1305, -0.1336, -0.1125, -0.0249, -0.1733, -0.1921, -0.0245,
         0.0542, -0.1737, -0.0712, -0.1103, -0.0227, -0.1835, -0.0901, -0.1776,
        -0.0583,  0.0044, -0.2067,  0.0445, -0.0633,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0895e-01, -1.1828e-01, -6.7747e-03, -4.7095e-02,  1.4432e-04,
        -1.0996e-02, -5.6842e-03,  1.5015e-02, -4.3600e-02, -2.7534e-02,
        -2.9681e-02, -5.3847e-03, -1.4363e-02,  2.9812e-02, -1.5977e-02,
        -1.6688e-03, -8.6833e-03, -1.6419e-03, -7.6085e-02, -3.3752e-02,
         5.5504e-03, -8.2423e-02,  3.2956e-02, -2.9387e-02, -2.9405e-02,
         2.4489e-02, -5.5872e-02, -5.1542e-02, -1.7490e-02, -2.9105e-02,
        -6.5454e-02, -2.7315e-02, -3.2283e-02, -1.3315e-02, -6.8660e-02,
        -1.8109e-02,  3.7886e-03,  7.7906e-03, -2.4180e-02, -2.3636e-02,
        -6.2202e-01, -8.2144e-02,  1.8022e-02, -9.2948e-02, -4.9594e-02,
        -9.1142e-03, -4.3341e-02, -6.3098e-02,  6.9161e-03,  1.5719e-02,
        -7.2489e-02, -5.9744e-02,  8.5466e-02, -1.2292e-01, -1.3291e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3864, -0.1626, -0.0238, -0.0639, -0.1120, -0.1012, -0.0683, -0.0964,
         0.0810,  0.0163,  0.1461, -0.0292, -0.1089, -0.0546, -0.0619, -0.0607,
         0.0136, -0.0622, -0.0203, -0.1538, -0.1452, -0.1402, -0.0853, -0.0371,
         0.1502,  0.0204, -0.0870, -0.3346, -0.0038, -0.0564,  0.0033, -0.1090,
        -0.1277, -0.3079,  0.0547, -0.1122, -0.0778, -0.0975, -0.1153, -0.0376,
        -0.1150,  0.0776, -0.0828,  0.2034, -0.0442,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6503e-01,  4.9466e-01,  2.0693e-01,  2.3516e-01,  1.5008e-02,
         7.0125e-04,  1.1572e-01,  4.7395e-02,  1.3649e-01, -6.4279e-02,
        -8.5831e-02,  1.5490e-01,  1.8676e-01,  1.5504e-01,  1.1579e-01,
        -5.3850e-02, -4.3600e-04,  2.7284e-01, -9.9896e-02,  1.1012e-01,
         3.4787e-01,  1.2714e-01,  2.5984e-02,  3.5731e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1205, -0.1669, -0.0169,  0.0046, -0.1966,  0.0486, -0.0483, -0.0865,
         0.0003,  0.0158, -0.0223, -0.0413, -0.0453, -0.1335, -0.1880, -0.0950,
        -0.2316, -0.0962, -0.0494, -0.1391, -0.1708,  0.0494, -0.1094, -0.2688,
        -0.0154, -0.1186, -0.1476, -0.1122, -0.0303, -0.2955,  0.0168, -0.0064,
        -0.1372,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1531, -0.8945, -0.1163, -0.1903, -0.0361, -0.0867,  0.0367,  0.0014,
        -0.6070, -0.0436,  0.0188,  0.0313,  0.5024, -0.1164,  0.0724, -0.1918,
         0.0267,  0.0535, -0.0581,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3243e-01, -1.9047e-01, -1.0197e-01, -1.2275e-01, -1.9819e-01,
         1.6376e-01, -1.9129e-01, -5.3560e-02, -3.8461e-01,  2.2062e-02,
         4.3548e-02,  7.2846e-02, -6.2828e-02, -1.0302e-02, -1.6104e-02,
        -1.6903e-04, -3.2428e-02, -1.3028e-01, -1.8879e-02,  2.9780e-02,
        -1.0867e-01, -1.3882e-01, -3.5674e-03,  1.2148e-02,  1.7737e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7575, -0.4802, -0.2829, -0.0750, -0.2212, -0.2035, -0.9606,  0.0163,
        -0.0311, -0.1133,  0.0497, -0.0451, -0.1363, -0.0807, -0.2135,  0.1997,
         0.1381,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.2221,  0.1285, -0.0974, -0.0350, -0.1213, -0.0142,  0.0620, -0.0268,
        -0.0394, -0.1692, -0.1256, -0.2032, -0.0613, -0.0090, -0.1793, -0.0876,
        -0.1232, -0.3322,  0.0481, -0.0586,  0.0033, -0.2118, -0.5164, -0.0052,
         0.1329, -0.1561, -0.0929, -0.0271, -0.0638, -0.0528, -0.1367, -0.2052,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2202,  0.3092,  0.0905,  0.0301,  0.0987,  0.0590,  0.0449,  0.2659,
        -0.0014,  0.0463, -0.0239,  0.0334, -0.0739,  0.1193,  0.0681, -0.0176,
         0.1260,  0.1067,  0.1281,  0.1654,  0.0922,  0.0201,  0.1868,  0.3179,
         0.0114,  0.0611,  0.3701, -0.0849,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1103, -0.6829, -0.0269,  0.0503, -0.0666, -0.1377, -0.3539, -0.0483,
        -0.0269, -0.0639, -0.0584, -0.1557, -0.0385, -0.1106, -0.0341, -0.0272,
        -0.0351, -0.1303, -0.0014,  0.0210, -0.0205, -0.0333,  0.0227, -0.0232,
        -0.0563,  0.0250,  0.0509, -0.0627,  0.2185,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1386,  0.0207, -0.0343, -0.0600, -0.2928, -0.1431,  0.0093, -0.0446,
         0.0251,  0.0961, -0.0726, -0.1386, -0.1241, -0.2176,  0.0196, -0.1150,
        -0.0097, -0.3324, -0.0238, -0.1613, -0.1110, -0.1013, -0.0313, -0.0472,
        -0.0631, -0.0061, -0.0437, -0.1638, -0.0545,  0.0599, -0.0095, -0.1793,
        -0.0227,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4048, -0.3181, -0.0284, -0.0620, -0.0899, -0.1479,  0.0258, -0.1382,
        -0.0370, -0.0886, -0.0912, -0.0893, -0.0458, -0.0420, -0.0979,  0.0079,
        -0.0621, -0.0738, -0.3445,  0.0145, -0.0886, -0.0681,  0.0300, -0.0507,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0445, -0.2933, -0.2322, -0.0522, -0.0402,  0.0219, -0.1023, -0.1615,
        -0.0261, -0.1095, -0.0470, -0.0262, -0.0570, -0.0153,  0.0360, -0.0092,
         0.0049, -0.1562, -0.0508, -0.4355, -0.0445,  0.0328,  0.0474,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1485, -0.4395, -0.0662,  0.0171, -0.0298, -0.2406,  0.0067,  0.0249,
        -0.0309, -0.1625, -0.0582, -0.0803, -0.0096,  0.0479, -0.0502,  0.0118,
        -0.1574, -0.0683, -0.0257, -0.1721,  0.0361, -0.1321,  0.0609, -0.0583,
        -0.0678, -0.0519, -0.0187,  0.0128, -0.0740, -0.1238, -0.1007, -0.0366,
        -0.0688, -0.0405, -0.0720, -0.0461, -0.0089,  0.1022, -0.1358,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0557e-02, -3.0860e-01, -9.6646e-02, -7.9515e-02, -2.8760e-01,
        -2.7770e-02,  9.7901e-06, -3.2920e-02, -6.1675e-02,  1.1085e-02,
        -9.6164e-03, -3.9055e-02, -7.5871e-03, -3.6272e-02,  1.2645e-01,
        -1.0993e-01,  4.3383e-02,  3.4313e-03,  4.6194e-03, -2.5892e-02,
        -1.0290e-01, -7.0621e-02,  1.3616e-02, -1.0202e-01, -1.6440e-01,
        -9.1501e-02, -5.9496e-02, -1.4184e-02, -4.4762e-02, -8.3088e-02,
         3.7218e-02,  7.9801e-03, -9.8877e-02, -3.1759e-02, -4.8105e-02,
         8.7363e-04, -8.4494e-02, -2.8263e-01, -3.2689e-02, -5.3401e-02,
        -4.9503e-02, -1.8380e-01, -1.5403e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1037, -0.8461,  0.0792,  0.0654,  0.0424, -0.1142, -0.0789, -0.0026,
        -0.1317,  0.1134,  0.0766, -0.0397, -0.0727, -0.0118,  0.0224, -0.0491,
        -0.0883, -0.0978,  0.0653, -0.0594,  0.0403, -0.0697, -0.0458, -0.0417,
        -0.0127, -0.0462, -0.0169, -0.0129,  0.0665, -0.0999,  0.0066, -0.0103,
        -0.2018, -0.3055, -0.0014, -0.0475, -0.1093, -0.0646, -0.1187, -0.0462,
        -0.0660, -0.1332, -0.0174,  0.0499,  0.0065, -0.1057, -0.0135, -0.0275,
         0.0087, -0.1406, -0.1044, -0.0503, -0.0709,  0.0172,  0.0351, -0.0190,
         0.0503], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2360,  1.8159,  0.4737,  0.1149,  0.3455,  0.1279,  0.1191,  0.3893,
         0.1572, -0.0413,  0.1716, -0.1809,  0.0678,  0.1153,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4166, -0.2319, -0.1682,  0.0098, -0.0712, -0.0872, -0.0867,  0.0172,
        -0.0613,  0.0007, -0.0469, -0.1671, -0.0061,  0.0864, -0.1397, -0.1349,
        -0.0645, -0.1288,  0.0510, -0.1300, -0.2041,  0.0083, -0.0556, -0.0010,
         0.0225, -0.0299, -0.0252,  0.0784,  0.1038,  0.0944,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.3191, 0.5254, 0.0138, 0.3954, 0.0885, 0.0043, 0.2392, 0.2382, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0102, -0.5253, -0.0254, -0.0626, -0.1210, -0.3345, -0.0624,  0.0120,
         0.1292,  0.0166,  0.0591, -0.1126, -0.0941, -0.1641, -0.0544, -0.0051,
        -0.0174, -0.0070, -0.0015, -0.0862,  0.0874, -0.1021, -0.0576,  0.0102,
        -0.0645, -0.0494, -0.1173, -0.0365, -0.1469, -0.2913,  0.1141,  0.1308,
         0.0828,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8526e-01, -4.5854e-01, -2.1299e-01, -1.4921e-02,  5.6799e-02,
        -5.8036e-02,  1.3452e-02, -4.2898e-02,  2.9269e-02, -7.0949e-02,
        -4.4196e-03,  2.3795e-04, -3.7201e-02, -1.4025e-02, -9.1920e-03,
         8.9784e-03, -1.0594e-02,  6.0670e-02, -2.0357e-03,  1.9802e-03,
        -3.3247e-03,  4.9411e-02,  1.4773e-02, -1.9276e-02,  3.9054e-03,
        -1.1317e-02, -5.2011e-02, -2.9667e-01, -1.0191e-02,  5.2621e-03,
         4.7010e-02, -6.5177e-02, -5.4502e-02, -3.2799e-01,  6.3183e-02,
        -4.9698e-02, -4.9064e-02,  4.9833e-03, -7.8256e-03,  4.0987e-02,
         3.3122e-02, -2.2573e-02, -2.8952e-02, -3.4522e-02,  1.6431e-02,
         3.4975e-02,  4.2775e-02,  2.2214e-02,  5.2331e-02,  3.0049e-02,
        -4.9282e-02, -8.8638e-02, -2.6557e-02, -4.4625e-02,  1.2813e-02,
         6.0530e-02,  6.2620e-03, -5.1064e-02, -5.9477e-02, -5.8895e-02,
        -3.7312e-02, -3.6079e-02, -1.2448e-02, -3.8695e-02,  1.0342e-02,
        -8.6525e-03,  6.6830e-03,  6.1173e-02, -4.4146e-02, -2.0234e-02,
        -1.3166e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4077, -0.1136, -0.0271, -0.0332, -0.1172, -0.0371, -0.2327, -0.0369,
        -0.0563, -0.0149, -0.0449, -0.0327, -0.0230, -0.1220, -0.1185, -0.0067,
        -0.0173,  0.0214, -0.1347, -0.2773,  0.0631,  0.0087,  0.0609, -0.2305,
        -0.0163, -0.1027, -0.0821, -0.0170,  0.0935, -0.0213, -0.0519, -0.0555,
        -0.0166, -0.1435,  0.0078, -0.0330, -0.0669, -0.0207, -0.0403, -0.0400,
        -0.0508,  0.1222,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0413, -0.4929,  0.0342, -0.0714, -0.0129,  0.0333,  0.0051, -0.0199,
        -0.1140, -0.1682, -0.1000, -0.0501, -0.1532, -0.0325,  0.0589,  0.0420,
        -0.0034, -0.1157, -0.1609, -0.0391, -0.0495, -0.0153, -0.0230, -0.0163,
        -0.0891, -0.2090, -0.0149, -0.0415, -0.0991,  0.0423, -0.0594, -0.0260,
        -0.0103, -0.1590,  0.0636,  0.1136, -0.0153,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0075, -0.6308,  0.1271, -0.1339, -0.0650, -0.1437, -0.0580, -0.1204,
        -0.0469, -0.0326, -0.1013,  0.0134, -0.0770, -0.1715,  0.1441,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9288,  0.2292,  0.0360,  0.0035,  0.0669,  0.1526,  0.1881,  0.0910,
         0.2670,  0.0266, -0.0090,  0.0590,  0.1512,  0.0445,  0.0534, -0.0925,
         0.2878,  0.4106,  0.1583,  0.0971,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5207,  0.0829, -0.0293,  0.0080, -0.4323, -0.0597, -0.0301, -0.0096,
        -0.0631, -0.0361,  0.1416, -0.1227, -0.0409, -0.0133, -0.1183, -0.0814,
        -0.0079, -0.0631,  0.0521, -0.0546,  0.0606, -0.0448, -0.0182, -0.0223,
         0.0390, -0.0500, -0.2675, -0.0144, -0.0892,  0.0176, -0.1046, -0.0610,
        -0.0076, -0.0459, -0.0007, -0.2357, -0.0071, -0.0347, -0.0081, -0.0119,
         0.1546,  0.0795,  0.1734,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0280,  0.0908,  0.0471, -0.0975, -0.1003, -0.1289, -0.0282, -0.0980,
        -0.0073, -0.0624, -0.1468, -0.1210, -0.1342, -0.0931, -0.3009, -0.0440,
        -0.0377, -0.0541, -0.0350,  0.0213, -0.0965, -0.0706, -0.1809, -0.0287,
        -0.0889, -0.0701,  0.1066,  0.0295, -0.2735, -0.0905, -0.0522, -0.1230,
        -0.0264, -0.0226, -0.0468, -0.0413, -0.0459,  0.1282,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0544, -0.4737, -0.2360, -0.4516, -0.0566, -0.1136,  0.1754,  0.0369,
        -0.0629, -0.4623,  0.0543, -0.0063, -0.0721,  0.0514,  0.1076,  0.0516,
        -0.0511, -0.1276,  0.2394,  0.0172,  0.2144,  0.0460,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1755, -0.1998, -0.0031,  0.1212,  0.0640, -0.1677, -0.0178,  0.0926,
        -0.0471, -0.0607, -0.0845,  0.0449, -0.0317, -0.0013, -0.0734, -0.0651,
        -0.3508, -0.0708, -0.1161, -0.1535, -0.0431, -0.0848, -0.1120, -0.1489,
        -0.0340,  0.0428, -0.1171, -0.0091,  0.0255, -0.1421, -0.1994,  0.0216,
         0.0097, -0.0065, -0.1112, -0.3277,  0.0268, -0.0160, -0.0381, -0.1745,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2741, -0.0642, -0.0312, -0.0712, -0.1282, -0.0796,  0.0295, -0.0574,
        -0.1343, -0.1186, -0.0631, -0.0089,  0.0135, -0.2090, -0.0862, -0.1708,
        -0.0491, -0.0492, -0.1487, -0.0511, -0.0265, -0.0326, -0.0463, -0.0979,
        -0.0210, -0.4410,  0.0182, -0.1531, -0.1685, -0.0604,  0.0809,  0.0245,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0124, -0.9010,  0.0562,  0.0035, -0.1595, -0.0338, -0.0959, -0.0930,
        -0.2277, -0.0690, -0.0521, -0.0361,  0.0208, -0.0493,  0.0152, -0.1073,
        -0.0313,  0.0038,  0.0294, -0.0455,  0.0315, -0.0991, -0.0872, -0.1332,
        -0.0197,  0.0520, -0.0128, -0.0488,  0.0199, -0.0213,  0.0312,  0.0069,
         0.0444,  0.0555, -0.5112,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.2515, -0.1460, -0.2118, -0.0528, -0.1228, -0.1610, -0.3213, -0.2294,
        -0.0027, -0.3416, -0.1195, -0.0590,  0.0326, -0.0935, -0.0723, -0.0307,
        -0.1058, -0.1024, -0.0411, -0.1014, -0.0484,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7260, -1.3740,  0.0396, -0.1969, -0.2325, -0.2452, -0.1003, -0.2407,
        -0.5119,  0.1275,  0.0581,  0.0884,  0.0377, -0.0539, -0.0040, -0.0226,
        -0.0588,  0.0384,  0.0835,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0598, -0.3269, -0.0735, -0.1245, -0.0535, -0.0393,  0.0346, -0.3356,
         0.0592, -0.1193, -0.1030,  0.0653, -0.1310, -0.4282, -0.0265, -0.0892,
        -0.1342, -0.1556,  0.1465, -0.2916,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0954, -0.6515, -0.0544, -0.0445,  0.0260, -0.0493, -0.1193, -0.1495,
        -0.0477, -0.1554, -0.0265, -0.1111,  0.0270, -0.1501, -0.0601, -0.0861,
        -0.3083, -0.1077, -0.0686, -0.1492,  0.0348,  0.2425,  0.2913,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2394, -0.3528, -0.1544, -0.1120,  0.1204, -0.0321, -0.0601, -0.0311,
        -0.0578, -0.4350, -0.0159, -0.0516, -0.0104, -0.2699,  0.2229,  0.0946,
        -0.0270, -0.0268, -0.2324, -0.0530, -0.0347,  0.0009, -0.0734, -0.2010,
         0.0148,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5962e-01, -9.4503e-01,  1.7656e-02,  1.6658e-01,  1.1105e-02,
        -9.1602e-02,  1.2682e-01,  7.5347e-03,  8.8795e-02, -1.8730e-01,
        -9.6427e-02, -1.9327e-03, -1.1190e-03, -4.6473e-02,  8.8104e-02,
         4.8776e-02, -8.6371e-03, -2.0335e-02,  3.3945e-03, -5.4906e-03,
        -1.5708e-01, -7.3206e-02, -8.6251e-02, -1.4696e-01, -1.0628e-01,
        -3.4053e-02,  2.8104e-02,  9.2098e-03,  2.0959e-02, -9.2812e-04,
         5.3489e-02,  9.6071e-04, -1.1775e-02,  1.0430e-02,  1.0799e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2629,  0.0106,  0.0578, -0.1805, -0.0840,  0.0434, -0.0600,  0.0300,
        -0.3288, -0.0736, -0.0374,  0.0713,  0.2673, -0.0700, -0.0622, -0.2220,
        -0.0496, -0.1764, -0.1927, -0.0482, -0.1030, -0.1505,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3714, -0.1415,  0.0766,  0.0006,  0.0057, -0.1717, -0.1123,  0.0393,
        -0.0268, -0.3286, -0.3339,  0.1564,  0.1120,  0.0053, -0.2624, -0.1805,
        -0.0202,  0.0358,  0.1404, -0.0769, -0.0587,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0772, -0.5757,  0.0255,  0.0209, -0.0375,  0.0650, -0.0867, -0.2150,
        -0.0460, -0.0262, -0.0123, -0.0141,  0.0066, -0.1493, -0.0024, -0.2062,
         0.0258, -0.0469, -0.0067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2202, -0.0130, -0.0484, -0.0322, -0.0430, -0.1419,  0.0104, -0.0573,
        -0.0008, -0.0749,  0.0152, -0.0014, -0.0282, -0.0668, -0.0171,  0.0095,
        -0.0498, -0.1223, -0.0322, -0.0713,  0.0282, -0.0686,  0.1094, -0.0144,
         0.1008,  0.0652, -0.0774, -0.3046, -0.1073, -0.2939, -0.0720,  0.0060,
        -0.0841, -0.1814, -0.0047, -0.0008,  0.1153,  0.0043,  0.0840, -0.0176,
        -0.0596, -0.0281,  0.0160,  0.0333, -0.0354, -0.0519,  0.2249],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0109, -0.4641, -0.0999, -0.0140,  0.0283, -0.0278,  0.0224,  0.1351,
        -0.0230,  0.0107,  0.0848,  0.0638,  0.0432,  0.0178,  0.0628, -0.0790,
        -0.3957, -0.0218, -0.1411,  0.0106, -0.0418, -0.0876, -0.0652, -0.3031,
        -0.0545,  0.1184,  0.0706, -0.0179, -0.0239,  0.0090, -0.1340,  0.0602,
        -0.0250,  0.3056,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4121, -1.1909,  0.0102,  0.0033, -0.1642, -0.0891,  0.0101, -0.0040,
        -0.0369, -0.0509,  0.0945,  0.0223, -0.0597, -0.1069,  0.0117,  0.1160,
        -0.1718,  0.0972, -0.1337, -0.1671, -0.0295,  0.0744,  0.0158,  0.0571,
        -0.0038, -0.0096, -0.0112, -0.0206,  0.0081, -0.0668,  0.0020, -0.1419,
         0.1606, -0.1778,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.1183, -0.7545,  0.1598, -0.0432, -0.1239, -0.1501, -0.0614,  0.0257,
        -0.0315, -0.2383, -0.2613,  0.0585, -0.1980, -0.2464, -0.0841, -0.0981,
         0.0410,  0.2863,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1314, -0.2995, -0.2208, -0.0379, -0.0478,  0.0111, -0.0473, -0.0418,
        -0.1578, -0.1286,  0.0074, -0.0725, -0.0551, -0.0997, -0.0393,  0.0661,
         0.0859,  0.2506,  0.0101, -0.2119,  0.0597,  0.0698, -0.0274,  0.0363,
        -0.0455, -0.2252, -0.0382,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3002, -0.3553, -0.3231, -0.1393, -0.0891,  0.0523, -0.1562, -0.2856,
        -0.3310, -0.0409, -0.1227, -0.1105, -0.2278, -0.2258,  0.0286, -0.0871,
        -0.1609, -0.0929, -0.0151, -0.3126, -0.0519, -0.0679, -0.1118,  0.0850,
        -0.1859, -0.0619,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0821,  0.9567,  0.0919,  0.1600,  0.2231,  0.0754,  0.0019,  0.2729,
         0.0512,  0.1000,  0.1987,  0.0107, -0.0198, -0.0740,  0.1676,  0.0813,
         0.0756, -0.0278, -0.1191,  0.3189, -0.1806,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1070, -0.5930, -0.3474, -0.4307, -0.0275,  0.0703, -0.1604, -0.0278,
        -0.0448, -0.0337, -0.1194, -0.1333,  0.0928,  0.3379, -0.2497,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2360,  0.0403,  0.1727,  0.0698, -0.0277,  0.3870, -0.0033,  0.0702,
         0.1074, -0.0123,  0.0994, -0.0754,  0.5923,  0.1459,  0.3097, -0.0225,
         0.0410,  0.0212,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8287, -0.3883, -0.1989, -0.6257, -0.0842,  0.0154, -0.0226, -0.0318,
         0.0029, -0.1500, -0.1428, -0.0708,  0.1570, -0.1112, -0.1144,  0.0514,
         0.0015, -0.1486,  0.0882,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0020, -0.1527, -0.0535,  0.0067, -0.0531, -0.0420, -0.0088, -0.0239,
        -0.0518, -0.0067, -0.0295, -0.0156, -0.0475, -0.0400,  0.0063, -0.0722,
        -0.0306, -0.0309,  0.0262,  0.1836, -0.0461,  0.0117, -0.1370, -0.0303,
        -0.0797, -0.0706, -0.1188, -0.0111, -0.0208, -0.0250,  0.0247, -0.0755,
        -0.1613, -0.0453, -0.0785, -0.0636, -0.1924, -0.0318, -0.0566, -0.0185,
         0.0118,  0.0847, -0.1048, -0.1398], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2738, -0.1957, -0.1167, -0.0923, -0.0923, -0.1157, -0.1243,  0.0126,
        -0.2571, -0.0603, -0.0486, -0.0717,  0.0222, -0.0159,  0.0856, -0.0921,
        -0.1277, -0.3485, -0.0015, -0.0327, -0.0726, -0.0398, -0.0136, -0.0072,
        -0.0312, -0.0481, -0.0012, -0.1749,  0.0506,  0.0043, -0.1448, -0.0749,
         0.0282, -0.0027,  0.1167,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0214, -1.0409, -0.0169, -0.0911,  0.0539,  0.0689, -0.1762, -0.0964,
         0.0904, -0.0141, -0.0360, -0.0686, -0.0702, -0.0139, -0.0013, -0.2861,
        -0.1112,  0.0425,  0.0437, -0.0122, -0.0559, -0.0662, -0.0676, -0.1088,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0240, -0.4758, -0.0965, -0.0524, -0.0835, -0.0265, -0.0407, -0.1077,
        -0.2420,  0.0061,  0.0074, -0.0041,  0.1162, -0.0742, -0.1616,  0.1175,
         0.1551,  0.0293, -0.3042,  0.0240, -0.0648, -0.1106, -0.0413, -0.0328,
         0.0130, -0.1312, -0.1068, -0.0700,  0.0018,  0.0893, -0.1045, -0.0602,
        -0.2649, -0.0918,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0616, -0.1891, -0.1168, -0.0707,  0.1296, -0.0204, -0.2491, -0.1161,
        -0.0268, -0.1604, -0.1603, -0.0041, -0.1247, -0.1019, -0.1566, -0.4387,
        -0.0096, -0.0517, -0.0074,  0.1293, -0.0161, -0.0076, -0.0338, -0.0699,
        -0.0044,  0.0043, -0.0664,  0.0105,  0.0964, -0.2185,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-3.3354e-01, -8.6520e-01, -2.9060e-01, -5.2865e-01, -6.4077e-02,
        -1.0544e-01, -1.7288e-01, -8.2087e-02, -1.5399e-01, -6.3498e-01,
        -2.8125e-01,  8.4864e-04, -3.8435e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0972, -0.1737,  0.2210, -0.1007, -0.1382, -0.2089,  0.2877, -0.1268,
        -0.4470, -0.2625, -0.0634, -0.3053,  0.0866, -0.1810, -0.3452,  0.0711,
        -0.0726, -0.0243,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3916, -0.6907, -0.3434, -0.2864, -0.3696,  0.2807, -0.0597, -0.5213,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8212e-01, -2.3137e-01, -5.8026e-02,  2.6023e-02,  1.5609e-02,
        -1.0095e-02, -2.2876e-02,  3.5888e-02,  5.2531e-02,  1.3232e-02,
         2.5123e-05, -6.1278e-02, -9.0659e-02, -2.6054e-03, -4.8201e-02,
        -2.1912e-02,  2.3326e-02,  5.6015e-02, -4.3374e-02, -4.2753e-02,
        -2.5067e-02, -2.2487e-03, -5.6948e-02, -1.3452e-01, -8.4300e-02,
        -1.3203e-02, -3.9286e-01, -1.9997e-01, -5.6832e-01, -1.2570e-01,
         7.6302e-02, -2.0854e-01,  3.6049e-02, -2.7926e-01,  2.5099e-02,
        -1.6100e-01, -3.3200e-01, -6.1517e-02,  7.0212e-02, -4.6793e-02,
        -4.2082e-02, -6.7199e-02, -2.1191e-02,  2.3757e-02, -7.9101e-02,
         1.0363e-01,  1.5393e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2887, -0.2950, -0.7594, -0.9698, -0.0077,  0.0622, -0.1349,  0.2772,
        -0.1998,  0.3429, -0.2110,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0739, -0.6440,  0.1101,  0.0062, -0.1924, -0.2057, -0.2367, -0.8220,
        -0.0635, -0.3280, -0.2317, -0.1864, -0.2677, -0.0394, -0.0049, -0.1529,
        -0.0912, -0.2792,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1554, -0.3330, -0.5308, -0.1344, -0.0705, -0.0635, -0.3894, -0.1039,
        -0.1084, -0.0395,  0.1237,  0.0352, -0.0768,  0.2074, -0.0233,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1165,  0.7766, -0.0166,  0.0091, -0.2904,  0.1570,  0.3159, -0.2541,
         0.2838,  0.1532,  0.4045,  0.0331, -0.0021, -0.0861, -0.0771,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0419, -0.0742, -0.0953,  0.0157,  0.0023, -0.1226, -0.1747, -0.1430,
        -0.2098, -0.1336, -0.0934, -0.2403, -0.2116, -0.0444, -0.0276, -0.0326,
        -0.1126,  0.0275,  0.0076,  0.0684, -0.0989, -0.1538, -0.0511,  0.1152,
        -0.0688, -0.0733,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2791, -0.3569, -0.2142, -0.1455, -0.0796, -0.0732, -0.1127, -0.0957,
        -1.0403, -0.0052, -0.1203,  0.1316,  0.0695,  0.0958, -0.1828,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1292, -0.6154, -0.2750, -0.0512, -0.2122, -0.0211, -0.1170,  0.0942,
        -0.0773, -0.3193, -0.0166,  0.1736, -0.0988, -0.0316, -0.0127, -0.0723,
        -0.0471,  0.1742, -0.1193,  0.1105, -0.1229,  0.0330,  0.1644,  0.0422,
        -0.0875,  0.0557,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1545,  0.0155,  0.0752,  0.0515, -0.0848, -0.0692, -0.0947, -0.0152,
        -0.0630, -0.1286, -0.3009, -0.0717, -0.0459,  0.1466, -0.1953, -0.0647,
         0.1731, -0.1437, -0.0489, -0.1158,  0.0197, -0.0892,  0.0111,  0.0074,
         0.0244, -0.3643,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.0518, -0.7122,  0.0369, -0.0802, -0.0964, -0.0331, -0.0442, -0.1004,
         0.0330, -0.1173, -0.0558, -0.0697, -0.1685, -0.0411, -0.1112, -0.0985,
         0.0146, -0.1351, -0.0286,  0.0287,  0.0961, -0.0824,  0.0558, -0.0646,
        -0.1693, -0.1018, -0.0681,  0.0155, -0.0229,  0.0361,  0.0081, -0.0553,
        -0.0209, -0.0257, -0.0363,  0.0446,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0017, -0.3859, -0.1847, -0.0566, -0.1586, -0.0860, -0.0568, -0.0222,
         0.0765,  0.0483, -0.0385,  0.0156,  0.0844,  0.1321, -0.0460, -0.0105,
         0.0577, -0.0202,  0.0752, -0.0148, -0.0495, -0.1557, -0.0530, -0.0512,
        -0.0947, -0.3216,  0.0141, -0.0295, -0.1128, -0.0595, -0.0381, -0.0859,
        -0.2592,  0.0127, -0.0208,  0.0872,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1938, -0.7704, -0.2304, -0.0556,  0.0466,  0.0283, -0.1025,  0.0241,
        -0.0113,  0.0074,  0.0124, -0.0690, -0.1717, -0.0819, -0.0271, -0.0672,
        -0.2249,  0.0103,  0.0999,  0.0991,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0286, -0.3072, -0.0092, -0.0347,  0.0218, -0.0365, -0.1830, -0.0848,
         0.0812, -0.0430, -0.0116, -0.0085, -0.0361, -0.0321, -0.2788, -0.0454,
        -0.0015, -0.0460,  0.0007, -0.0848,  0.0272,  0.0049,  0.0215, -0.0310,
        -0.1143,  0.0138,  0.0016, -0.0914, -0.0628, -0.0725, -0.0783, -0.1026,
         0.0054, -0.1776,  0.2061, -0.1806, -0.1450,  0.1828,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0522e-02, -1.0348e-01, -1.1613e-02, -1.7529e-01, -7.6096e-03,
        -3.6188e-02,  3.2698e-02,  3.3348e-02, -6.1121e-02, -1.2870e-01,
        -1.3410e-01, -2.3144e-01, -3.3291e-03, -1.0990e-02, -1.9706e-03,
        -1.0494e-01,  9.2756e-03, -1.5456e-01, -4.4653e-02,  2.3240e-04,
        -3.0773e-02,  1.3555e-02,  1.3869e-02, -3.5090e-02,  8.9382e-02,
        -1.8839e-02,  7.4926e-03, -6.7527e-02, -1.9147e-02, -2.2232e-03,
        -2.8320e-03, -2.3906e-02,  7.0342e-03,  2.7491e-02,  8.8531e-03,
        -1.1963e-02, -2.5483e-03,  1.8598e-01,  3.8240e-03,  3.1162e-01,
        -1.3553e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1168, -0.9998,  0.0460, -0.0019,  0.0838, -0.1165, -0.2949, -0.0651,
        -0.0564, -0.0399,  0.0056, -0.0607, -0.1099,  0.0432, -0.0776, -0.1430,
        -0.0325, -0.0962,  0.0136, -0.0621, -0.0117, -0.1321,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3347, -0.0437, -0.0305, -0.0333, -0.0874, -0.0742, -0.0604, -0.0076,
        -0.0790, -0.0589,  0.1005, -0.1985, -0.0071, -0.1123,  0.0598, -0.1072,
        -0.0294,  0.0073, -0.0067, -0.1304, -0.3488,  0.0331, -0.0118, -0.0930,
        -0.0491, -0.0144,  0.0078, -0.0944, -0.2303, -0.0126,  0.0056, -0.0126,
         0.0031, -0.1564,  0.0281,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4478e-01,  4.4287e-03, -2.6456e-01, -1.9236e-01, -2.3179e-01,
        -5.1957e-03,  7.8574e-02,  5.5090e-02,  7.3958e-02, -9.5523e-02,
        -8.7390e-02,  6.2177e-02, -4.2890e-02, -5.3166e-02,  4.0024e-02,
        -8.9855e-02,  1.5866e-03, -1.3772e-02, -1.3474e-01,  2.9708e-02,
        -1.2445e-01, -7.3738e-05, -8.7733e-02, -1.0694e-01,  1.3735e-02,
         6.5745e-02, -4.9598e-02, -6.6490e-02,  7.4498e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5257, -0.7506, -0.1678,  0.0370, -0.1597, -0.1770,  0.0150, -0.0098,
        -0.1045, -0.0245, -0.1822, -0.2289, -0.0241, -0.0948, -0.0785, -0.1056,
        -0.0791,  0.0190, -0.1038, -0.0036, -0.0896, -0.3419, -0.0915, -0.0647,
         0.6939,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0324, -0.0679, -0.1842, -0.1306, -0.3133, -0.2012, -0.1554, -0.2409,
        -0.1070,  0.0263,  0.0170,  0.1678,  0.0800,  0.0459, -0.2010, -0.1642,
        -0.0823, -0.0478, -0.0701, -0.0780, -0.1225,  0.0098, -0.0273,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3678,  0.0110,  0.0066, -0.0981, -0.0054,  0.1276, -0.1138, -0.0859,
        -0.0149,  0.0180,  0.0054,  0.0122, -0.1002, -0.2490,  0.0757, -0.1661,
        -0.0290, -0.0400, -0.0765, -0.1514,  0.1126, -0.1977, -0.0068,  0.1183,
        -0.0118, -0.1467, -0.3579, -0.0226, -0.1383, -0.1034, -0.0479, -0.0987,
        -0.1248, -0.0405, -0.0310, -0.0156, -0.0475, -0.0737,  0.2105,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1802,  0.1618, -0.1078, -0.3118, -0.6888, -0.1200, -0.2142, -0.4219,
        -0.2296,  0.2019, -0.1846, -0.2368,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.0133, -0.2291, -0.3863,  0.0074, -0.0685, -0.1709, -0.2370, -0.0342,
        -0.0457, -0.0420, -0.0411, -0.0051, -0.0051,  0.0278, -0.1018, -0.0101,
         0.0079, -0.1343,  0.0109, -0.0731, -0.0073, -0.0711, -0.0295,  0.0154,
         0.0233,  0.0092,  0.0183,  0.1145, -0.2372,  0.1969,  0.0503, -0.0031,
        -0.1032,  0.0285, -0.1155, -0.1198, -0.0106, -0.0953,  0.0295, -0.0414,
         0.0304,  0.0589,  0.0186,  0.1821, -0.4603,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2522,  0.6820, -0.1631,  0.2180,  0.2796,  0.2470,  0.5488,  0.3356,
         0.5057,  0.5381, -0.2024,  0.1414,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5427, -1.1171, -0.4725, -0.0910, -0.2061,  0.2524,  0.2039,  0.3661,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9955e-01, -4.0515e-01, -4.5695e-03,  1.0803e-02, -4.2822e-03,
         1.0724e-02,  9.2058e-02,  9.9496e-03, -2.1901e-02, -3.9505e-01,
        -3.0538e-04,  4.8633e-02, -4.4437e-02, -8.0637e-03, -4.0653e-02,
         4.7521e-02,  9.8109e-02, -6.2238e-02, -2.9945e-03, -9.9640e-03,
         8.4417e-03,  4.2075e-02, -2.0593e-02, -3.3327e-02, -3.0167e-02,
        -2.6401e-02, -1.5770e-02, -7.5808e-02, -1.2267e-01,  3.5673e-03,
        -1.3824e-02, -4.1362e-02, -2.2980e-02, -6.1328e-02, -2.7390e-02,
        -1.4614e-02,  5.1008e-03, -2.3173e-02, -9.8027e-03,  1.4850e-02,
         2.1175e-03, -8.6206e-03,  1.3125e-02, -4.9683e-02, -5.8739e-02,
        -2.8430e-02, -4.1098e-01,  1.9221e-02, -1.3557e-01, -2.0843e-01,
        -3.0530e-02,  1.9538e-02, -5.7082e-03,  2.4239e-02, -8.5715e-02,
        -4.1349e-03, -9.8193e-02,  2.5178e-02, -2.5533e-02,  1.5169e-03,
         1.3953e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2745, -0.2094,  0.0213, -0.0951, -0.3313,  0.0752, -0.1233, -0.0568,
        -0.0413, -0.0970, -0.2347, -0.1270, -0.0188, -0.0421, -0.2841, -0.0128,
         0.0963, -0.0149, -0.0787, -0.0688, -0.2187, -0.0890, -0.1502, -0.0402,
        -0.0857, -0.0849, -0.0551, -0.0532, -0.0724, -0.0850, -0.0639, -0.2357,
        -0.0507,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1592e-01, -2.2062e-01, -3.0560e-01, -1.0055e-01, -5.8972e-02,
        -5.7620e-02, -1.6674e-01,  7.2860e-02, -1.5405e-02,  3.4807e-02,
        -6.1247e-03, -7.5071e-02, -1.4691e-01, -3.2975e-03, -6.0988e-02,
         2.8047e-02,  1.3683e-05, -1.3535e-01, -6.4922e-01, -8.1200e-02,
         1.2698e-01, -1.0007e-01, -2.3737e-01, -5.6087e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0875, -0.2986, -0.1878, -0.1060, -0.1676, -0.2864, -0.0881, -0.1811,
        -0.0418, -0.3111, -0.2062, -0.1166, -0.5617, -0.0850, -0.0243, -0.0505,
         0.0084, -0.0439,  0.1873,  0.0996, -0.0318,  0.0387,  0.2549, -0.1547,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3589,  0.0068, -0.2526, -0.0147,  0.0360,  0.0710, -0.0561, -0.1095,
         0.0713, -0.0399, -0.0928, -0.0099,  0.0739,  0.0427, -0.1137, -0.2325,
        -0.1350, -0.0831, -0.0619, -0.0619, -0.0794, -0.0404, -0.0692, -0.4068,
         0.0758,  0.0594, -0.1932, -0.0519,  0.0579, -0.0413, -0.1976, -0.0788,
        -0.0263,  0.2416,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1359, -0.1547,  0.0759,  0.0339, -0.1447, -0.0491, -0.0248,  0.0090,
        -0.0390, -0.0318,  0.1021, -0.1445,  0.1098, -0.2109, -0.0811, -0.0178,
        -0.2797, -0.0650,  0.0292, -0.1083, -0.1094, -0.1761,  0.0108, -0.0350,
        -0.0166,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0188,  0.1209, -0.1252, -0.0376, -0.1645,  0.0017, -0.2383, -0.0222,
        -0.1225, -0.8019, -0.2401, -0.0324, -0.0133,  0.0286, -0.0267, -0.0981,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2655,  0.3824,  0.0200,  0.1658,  0.0795,  0.1093,  0.6107, -0.0499,
        -0.1112,  0.0023, -0.0816, -0.2453,  0.0772,  0.1077,  0.0088, -0.0059,
         0.0315,  0.1126,  0.0921,  0.0189,  0.0335, -0.0547,  0.1159,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0846,  0.4702,  0.0662,  0.0663,  0.7006,  0.2236, -0.1149,  0.0029,
        -0.0264, -0.0182, -0.0718,  0.0354,  0.1034,  0.0897,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.6183, -0.2239, -0.1176, -0.4792, -0.0219,  0.0461,  0.0055,  0.1237,
        -0.0600, -0.0351, -0.2066, -0.0678, -0.1126,  0.0382, -0.0259, -0.0964,
         0.0611, -0.4962, -0.1465, -0.0798, -0.1425, -0.1842,  0.3497,  0.0575,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2416, -0.1524, -0.4858, -0.1139, -0.1202, -0.0077, -0.5589, -0.3055,
        -0.0688, -0.0817, -0.0330,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0723, -0.2882, -0.0400, -0.0921, -0.0156, -0.2450, -0.3913,  0.0340,
        -0.4705, -0.0397, -0.1368, -0.0672, -0.1011,  0.1276, -0.1328,  0.0098,
        -0.0734, -0.2649, -0.2177, -0.1608, -0.2562,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5299, -0.1477, -0.1810,  0.0870, -0.1854, -0.2009, -0.0516, -0.4499,
        -0.0905, -0.2490,  0.0677, -0.1567, -0.1160, -0.6347,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0871,  0.3632, -0.0738, -0.0781,  0.0324,  0.0231, -0.0474, -0.1795,
        -0.1316,  0.0860,  0.1344,  0.2395,  0.4557,  0.0324,  0.0097, -0.0880,
        -0.1758,  0.0746, -0.0354,  0.0047,  0.2295,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6182, -1.0119, -0.1241,  0.0627, -0.0678, -0.1318, -0.0261, -0.1309,
        -0.1600,  0.0362, -0.1379, -0.0596, -0.1376,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0324, -0.1270,  0.0043,  0.0127,  0.0197, -0.0639, -0.0555, -0.1876,
        -0.0350, -0.1033, -0.0362,  0.0129,  0.0511,  0.0379, -0.0306, -0.0393,
        -0.0103,  0.0468,  0.0065, -0.0237, -0.0575, -0.0086, -0.0567, -0.0553,
        -0.4136, -0.0832, -0.2679, -0.2667, -0.1980,  0.0693, -0.0317,  0.0154,
         0.0523,  0.0073,  0.1065,  0.0661, -0.0505, -0.0009, -0.0125, -0.0276,
        -0.0498,  0.0293, -0.0511,  0.1425,  0.0199], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3424, -0.1104, -0.2131, -0.1026, -0.0610,  0.0501, -0.0153, -0.0442,
         0.0197, -0.0322, -0.1468,  0.0204,  0.0279,  0.1243, -0.0386, -0.0154,
        -0.5586, -0.0222,  0.0136,  0.0665, -0.0313, -0.2928, -0.1011, -0.3304,
         0.0499, -0.1734,  0.0035, -0.0794, -0.2187, -0.0920,  0.1369,  0.0930,
         0.0441,  0.0921,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1830, -0.0599, -0.1397, -0.1482, -0.1400, -0.0259, -0.0709, -0.1288,
         0.1447, -0.0739, -0.0213, -0.0445, -0.0448,  0.0824, -0.0327,  0.1181,
         0.0569,  0.0167,  0.0025, -0.2267, -0.5806, -0.1418, -0.1049,  0.1140,
         0.1484,  0.0067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2443, -0.8209, -0.0412, -0.0368,  0.0375, -0.2001, -0.0416, -0.2368,
        -0.1446, -0.0499, -0.0910, -0.0274,  0.0144, -0.0463, -0.0509, -0.1038,
        -0.3254,  0.1073, -0.0558,  0.0521, -0.0752,  0.0840,  0.0952,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5737, -0.2096, -0.0143, -0.0620,  0.0027, -0.1513, -0.0239, -0.0114,
        -0.1011, -0.0110, -0.0006, -0.1854, -0.0366, -0.0497, -0.4295,  0.2066,
         0.0568, -0.1080, -0.1692, -0.0924,  0.0061, -0.1283, -0.0226, -0.0388,
        -0.0446, -0.0184, -0.0428, -0.1165,  0.0841,  0.1414,  0.1988, -0.0553,
         0.0836,  0.0302,  0.0295, -0.0097,  0.0008,  0.0850,  0.1314,  0.0766,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3222, -0.2835, -0.0379, -0.1228, -0.1976, -0.0341,  0.0499, -0.1449,
        -0.0444, -0.2022, -0.0646, -0.0987, -0.1691, -0.1007, -0.0836, -0.1527,
        -0.0856,  0.0610,  0.1014, -0.0370, -0.0539, -0.1187, -0.0201,  0.0444,
        -0.0207, -0.1290,  0.0193, -0.1599, -0.0199,  0.0499, -0.3261, -0.0021,
         0.0351, -0.1730,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0753,  0.0171,  0.0054, -0.0151, -0.1638, -0.0728, -0.0473,  0.0138,
        -0.0197,  0.0558, -0.0412, -0.0270,  0.0861, -0.0254, -0.3243, -0.0342,
        -0.0261,  0.0338, -0.1087,  0.0092, -0.0383, -0.0488, -0.0274,  0.0257,
        -0.0950, -0.2572, -0.0616, -0.1039, -0.0669, -0.0989,  0.0098, -0.0196,
         0.0438, -0.0666, -0.0599, -0.0084, -0.0264, -0.0593, -0.0522, -0.0313,
        -0.0363,  0.0221, -0.0322, -0.0258, -0.0273,  0.0076,  0.0291,  0.0109,
        -0.0562, -0.0395, -0.0088,  0.1968], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2650, -0.0253,  0.1692,  0.0511, -0.2080, -0.0435, -0.0030,  0.0966,
        -0.0150,  0.0296, -0.0031,  0.0284, -0.0267, -0.1394, -0.0686, -0.0974,
         0.0305,  0.0631, -0.1259,  0.1249, -0.0173, -0.0629, -0.0473, -0.3906,
         0.0395,  0.0496,  0.0234,  0.0272,  0.0209,  0.0102, -0.1524,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2672, -0.1493,  0.1056,  0.0200, -0.0415,  0.0641,  0.0305,  0.0329,
        -0.0948,  0.0014, -0.0163, -0.1108, -0.0279,  0.1295, -0.0329, -0.1096,
         0.3258, -0.0404, -0.1370, -0.1920, -0.0873, -0.0318, -0.1055,  0.0502,
         0.1369,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3668, -0.3266, -0.1387,  0.1732, -0.3078, -0.3499, -1.3245, -0.0191,
        -0.1582, -0.1023,  0.3100,  0.5164,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1459, -0.4997, -0.0801, -0.1137, -0.1622, -0.1577,  0.0971, -0.0294,
         0.0318, -0.0251, -0.0805,  0.1039, -0.0590, -0.4745,  0.0170, -0.2299,
        -0.0264,  0.0498,  0.0501, -0.0298, -0.0884, -0.0672,  0.1328,  0.1053,
         0.0916, -0.2099, -0.0406,  0.0446, -0.1049,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1234, -0.7301,  0.3164,  0.1188,  0.2193, -0.2136, -0.3992,  0.3716,
         0.4098,  0.0014, -0.0184, -0.1300,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2292, -0.9686,  0.3626, -0.0940,  0.1796, -0.3217, -0.2767, -0.0456,
        -0.1164, -0.0851, -0.1983, -0.0432,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4736,  0.2568, -0.3355, -0.0393, -0.5423, -0.0570,  0.0384, -0.0918,
        -0.3290, -0.0532, -0.0542, -0.5061, -0.0910,  0.3872, -0.3137, -0.1877,
         0.1751, -0.0661,  0.1180, -0.0844,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3706, -1.2564, -0.0443, -0.0187, -0.3500,  0.0304,  0.0181, -0.0287,
         0.1052,  0.1249,  0.0309, -0.0464,  0.0086, -0.1049, -0.0887,  0.0410,
        -0.1003,  0.0539,  0.0484, -0.1471, -0.0246,  0.0250,  0.0329,  0.1400,
         0.0466, -0.0264,  0.0445,  0.0054, -0.1158, -0.0387, -0.0252, -0.0566,
        -0.0511, -0.1706, -0.1082,  0.2797, -0.2230,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3903,  1.8619,  0.1495,  0.2653, -0.1492, -0.1161,  0.2076, -0.4522,
        -0.1248,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2826, -0.0854, -0.1692, -0.0098, -0.0268,  0.0295, -0.1672, -0.2911,
         0.0174, -0.0936, -0.0242, -0.0162, -0.1783, -0.4620,  0.0607,  0.1213,
        -0.0358,  0.1532, -0.1145, -0.2041, -0.1249,  0.0174, -0.0389, -0.0090,
         0.0666, -0.0471, -0.0541,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2670,  1.0573,  0.0276, -0.0193, -0.0296,  0.1458,  0.0307,  0.0311,
         0.0121,  0.0089, -0.1042,  0.1812,  0.0686, -0.0909,  0.0613, -0.1933,
         0.1892, -0.0942, -0.1853, -0.0504, -0.2031,  0.1054, -0.0130, -0.1087,
         0.0635, -0.1224,  0.0356,  0.0635,  0.5267, -0.0023, -0.0185, -0.2043,
         0.0299, -0.0676,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.1509, -0.0250,  0.0037, -0.0014, -0.0848, -0.0416,  0.0264,  0.0848,
        -0.0525, -0.0103, -0.0289, -0.4120, -0.0774, -0.0939, -0.0006, -0.0884,
        -0.3266,  0.0674,  0.0165,  0.0301,  0.0791, -0.0545, -0.1462,  0.0223,
         0.0176, -0.0855, -0.0245, -0.1218,  0.1290, -0.0835, -0.1167, -0.2413,
        -0.2913,  0.0293,  0.0128, -0.1265, -0.0679,  0.0897,  0.0603, -0.0394,
        -0.0639,  0.1059,  0.0906, -0.1732,  0.2864,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0093, -0.2363, -0.2832, -0.1950, -0.1877, -0.1373, -0.3880, -0.0133,
        -0.1475, -0.1766, -0.1032, -0.1174, -0.1714,  0.3239, -0.2053,  0.0197,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8528,  0.1624, -0.0762, -0.1230, -0.0187, -0.0705, -0.0597,  0.0978,
        -0.1701, -0.1755, -0.0834, -0.1803, -0.0633, -0.0539, -0.1161,  0.0904,
        -0.1508, -0.1132,  0.0400,  0.0270, -0.0861,  0.1439, -0.0462,  0.0295,
         0.0212,  0.1617, -0.1079,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1639, -0.1813, -0.0362, -0.0194, -0.0367, -0.0306, -0.0583, -0.0259,
        -0.0413, -0.0207,  0.0107, -0.0212, -0.0125, -0.0256, -0.0092,  0.0270,
        -0.0244,  0.0349, -0.0226, -0.0703,  0.0328, -0.0797, -0.0641, -0.1857,
         0.0267,  0.0323, -0.0392, -0.1126,  0.0318, -0.1580, -0.0419, -0.0903,
        -0.0387,  0.0323, -0.0917, -0.0139,  0.0137, -0.1311, -0.0007,  0.0688,
        -0.0405, -0.0890, -0.0107,  0.0070, -0.0391,  0.0783], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1707,  0.0821, -0.0493, -0.1070, -0.0338, -0.3131, -0.0336, -0.1349,
        -0.0488, -0.0942, -0.0759, -0.0009, -0.0079,  0.0188, -0.0192,  0.0782,
        -0.0514, -0.1648, -0.3587, -0.7469, -0.1538, -0.0836, -0.0415, -0.0625,
        -0.0736, -0.0742, -0.1852, -0.3001,  0.0528,  0.1693,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3817,  0.0527,  0.1488,  0.0946, -0.0383,  0.2070,  0.3025,  0.1354,
        -0.3321, -0.0184,  0.0029,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3937, -0.5647,  0.0417,  0.1455, -0.0786, -0.1279, -0.1551, -0.3712,
        -0.0776, -0.0531,  0.0090,  0.0299, -0.3173, -0.1078,  0.1412, -0.2003,
        -0.0435, -0.0385, -0.1157, -0.1436,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9948,  0.6577,  0.5455,  0.3386,  0.3298,  0.4965,  0.5510, -0.2419,
        -0.0348,  0.2748,  0.2560,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1180, -0.5718, -0.1781, -0.4347, -0.3654,  0.1213, -0.0817, -0.0739,
        -0.2098, -0.0970,  0.0177, -0.0225, -0.0847, -0.0207, -0.0565, -0.1286,
         0.1016,  0.0361, -0.0974, -0.0751, -0.1540,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6635e-01,  3.9498e-01,  9.6214e-02, -1.9350e-01,  7.7974e-02,
         2.0361e-01, -3.5362e-01,  1.2869e-01,  9.8869e-02,  3.2189e-01,
         4.7095e-02,  8.2875e-03,  9.9695e-02,  7.6966e-02,  2.3296e-02,
         1.7167e-01, -5.6615e-05, -3.1776e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1245, -0.1627, -0.2235,  0.1084, -0.0099, -0.0044,  0.1991,  0.0187,
        -0.0239, -0.0092,  0.0826,  0.0104, -0.0517, -0.0637, -0.1536,  0.0677,
        -0.0207, -0.1561, -0.0104, -0.1245, -0.1081, -0.1392, -0.2496, -0.0288,
        -0.0900, -0.0068, -0.0681, -0.0531, -0.0484, -0.2717, -0.0666, -0.0428,
        -0.0621,  0.0202, -0.0766, -0.0424,  0.0018, -0.2476, -0.0976,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0957, -0.9043, -0.0572, -0.0776, -0.0542,  0.0313, -0.1159,  0.0876,
        -0.0575, -0.0654, -0.0569, -0.0272,  0.0548,  0.0150,  0.0330, -0.0482,
        -0.1349, -0.0904, -0.0147, -0.0080, -0.0423, -0.0471, -0.0104,  0.0087,
        -0.0455, -0.0398, -0.0809,  0.0151, -0.1097, -0.0035, -0.0147,  0.0100,
        -0.0159,  0.1813,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.0731, -1.0006,  0.4690, -0.2691,  0.2002, -0.1750,  0.0091, -0.6158,
        -0.0582,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3888,  0.3859, -0.1224, -0.0126,  0.0373,  0.1276,  0.0107,  0.0153,
        -0.0636,  0.0122,  0.1138,  0.0351,  0.0099, -0.0040, -0.0197,  0.2740,
         0.0362,  0.5954,  0.0030, -0.0303, -0.0541,  0.0303,  0.0923, -0.0353,
         0.0722,  0.0182, -0.0184,  0.0454, -0.0204, -0.0064, -0.0269,  0.1055,
         0.0607,  0.0358,  0.0312,  0.0324, -0.0173, -0.0312, -0.0643, -0.2095,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0121, -0.1490, -0.4806,  0.0160, -0.0101,  0.0161, -0.0268,  0.0079,
        -0.0375, -0.1951, -0.0182,  0.0136,  0.0033,  0.0046, -0.0361,  0.0222,
         0.0383, -0.0458, -0.1096,  0.0097, -0.0397, -0.0443,  0.0650, -0.0344,
         0.0647, -0.0414, -0.0893, -0.0112, -0.0630, -0.1732, -0.0557, -0.0392,
        -0.0083,  0.0036,  0.0085, -0.0968, -0.0565, -0.1198, -0.0064,  0.0358,
        -0.0146,  0.0507, -0.0041, -0.0233,  0.0090, -0.0049,  0.0884, -0.0171,
         0.0900, -0.0322,  0.0479, -0.0693,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0919, -0.8033, -0.2218, -0.1404, -0.0363, -0.5476, -0.1814, -0.0022,
        -0.0975, -0.0224, -0.1451, -0.3169, -0.1312, -0.1006, -0.0132, -0.3662,
        -0.1627, -0.0481, -0.3516, -0.2453, -0.4593, -0.1664,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0802,  1.1635,  0.2906,  0.1292,  0.1066, -0.1162,  0.1258,  0.0928,
        -0.0816,  0.0945,  0.0151,  0.1015,  0.0060,  0.0601, -0.0975,  0.2238,
         0.0391, -0.0860, -0.0923,  0.0461,  0.1898,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0796, -0.1038,  0.0265, -0.0503,  0.0047,  0.0290, -0.0188,  0.0271,
        -0.0691, -0.0420, -0.0170, -0.0156, -0.0075,  0.0110, -0.0468,  0.0194,
        -0.0046,  0.0064, -0.0503,  0.0084, -0.0287,  0.0054, -0.0283,  0.0063,
         0.0309,  0.0005, -0.0718, -0.0760, -0.0063,  0.0013, -0.1251, -0.0726,
        -0.0361, -0.0342, -0.0466, -0.0110, -0.0451,  0.1942, -0.0841, -0.0822,
        -0.1344, -0.0785, -0.0280, -0.1262, -0.1062, -0.0198, -0.0105, -0.0733,
         0.0845,  0.0756, -0.0849,  0.0029, -0.0605, -0.1555, -0.0535],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0883, -0.0251,  0.0535, -0.0024,  0.0487,  0.0008, -0.1015,  0.0440,
        -0.0089,  0.0754,  0.1446, -0.0279, -0.0064, -0.0154, -0.0359, -0.0434,
        -0.0606, -0.0797, -0.0833,  0.0606,  0.0083, -0.0429, -0.0910, -0.0370,
         0.0854, -0.0547, -0.1371, -0.1086, -0.1198, -0.0613, -0.2184,  0.0117,
        -0.0802, -0.2578, -0.0351, -0.1032,  0.0146, -0.1395, -0.0554, -0.0971,
        -0.0601, -0.0073,  0.0678, -0.0912, -0.0499,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4851,  0.3432,  0.2417,  0.2690,  0.0811,  0.0660,  0.1112,  0.0400,
         0.2264, -0.0916, -0.0320,  0.1729,  0.1820,  0.0287,  0.1390, -0.0038,
        -0.0240,  0.4251,  0.0211,  0.0962,  0.4339,  0.1395,  0.1011,  0.0999,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4063, -0.2900,  0.0105,  0.0571, -0.0925, -0.0598,  0.0429, -0.0452,
         0.0678,  0.0748, -0.0202,  0.0248,  0.0023, -0.2330, -0.1564, -0.1448,
        -0.1945, -0.0785, -0.1097, -0.1925,  0.0090,  0.0076, -0.0942, -0.2514,
         0.1884, -0.0859, -0.2120, -0.0248, -0.0227, -0.2078,  0.0365,  0.0422,
        -0.0103,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5585, -1.6077, -0.0797, -0.1040, -0.1664, -0.0537,  0.2762,  0.2375,
        -0.9328,  0.0200,  0.0347,  0.1693,  0.2110,  0.0203,  0.1719,  0.0979,
         0.0958,  0.4147,  0.1097,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4329e-01, -6.1718e-02,  3.0516e-02, -1.2288e-01, -2.5259e-01,
        -1.2702e-01, -1.1929e-01, -5.5229e-02, -2.4801e-01,  3.6917e-02,
        -3.5602e-02, -3.5521e-02,  2.8680e-04, -1.4241e-02,  3.3375e-02,
         7.6345e-02,  1.2612e-01, -9.3039e-02, -2.2229e-02,  3.5046e-02,
        -4.0714e-02, -2.1117e-01, -6.8627e-02, -1.0014e-01,  2.9893e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4077,  0.5790,  0.3102,  0.0913,  0.1060,  0.2544,  0.8252,  0.1495,
         0.1695,  0.0992,  0.0253,  0.0296,  0.1016,  0.0635,  0.1401, -0.3152,
        -0.0512,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.3138,  0.0969, -0.1555, -0.0744,  0.0097, -0.1531,  0.1597, -0.1024,
         0.0027, -0.0530, -0.0348, -0.0900, -0.0070, -0.0079, -0.0996, -0.0672,
        -0.1699, -0.3122, -0.0306, -0.1405, -0.0356, -0.3379, -0.2639, -0.0900,
        -0.1588, -0.2166,  0.0384,  0.1324, -0.1476,  0.0236, -0.2248,  0.0830,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0893, -0.3176, -0.1797, -0.1189, -0.0631, -0.0647, -0.1678, -0.2306,
        -0.0742, -0.0471,  0.0525,  0.0173, -0.1049, -0.0668, -0.0296,  0.1083,
        -0.0960, -0.0625, -0.0837, -0.0025,  0.0024, -0.0675, -0.0762, -0.3129,
         0.0377, -0.1577, -0.4690,  0.0044,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1663, -0.6557, -0.0754, -0.0582,  0.0030, -0.1136, -0.4578,  0.0106,
        -0.0865, -0.1598,  0.0187, -0.1107,  0.0320, -0.2403, -0.0085, -0.1335,
        -0.0437, -0.1186, -0.0440, -0.0663,  0.0085,  0.0055, -0.0564, -0.0187,
        -0.0736,  0.0726, -0.0089,  0.1881, -0.1115,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0912,  0.0862, -0.0167, -0.0428, -0.3294, -0.0239, -0.0757, -0.0410,
        -0.0100, -0.0985,  0.0713, -0.0563, -0.0679, -0.1096, -0.0714, -0.1074,
        -0.0836, -0.1278, -0.0406, -0.0403, -0.1046, -0.0450, -0.1071, -0.0164,
        -0.0994, -0.0789, -0.0777, -0.0112, -0.0988,  0.0517, -0.0356, -0.1099,
        -0.2212,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0466, -0.4127, -0.0619,  0.0056,  0.0036, -0.1108,  0.0614, -0.0870,
        -0.0642, -0.0576, -0.1140, -0.0556, -0.0067, -0.0551, -0.0597, -0.0119,
        -0.0872, -0.0842, -0.3726, -0.1778,  0.0765, -0.0400, -0.0196, -0.1553,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0786, -0.2123, -0.1127, -0.0047, -0.0447, -0.1453, -0.1582, -0.1059,
         0.0213, -0.0990, -0.0762, -0.1127, -0.1029, -0.0830, -0.0495,  0.0249,
        -0.0115, -0.0938, -0.0319, -0.5926, -0.0388,  0.1644,  0.1105,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1237, -0.1694, -0.0153,  0.0013, -0.0032, -0.1580, -0.0030, -0.1399,
        -0.0232, -0.0114, -0.0401, -0.0386, -0.0379,  0.0545,  0.0343, -0.0295,
        -0.1900, -0.0602, -0.0647,  0.0297,  0.0711, -0.2036,  0.0693, -0.0332,
        -0.1320, -0.0520,  0.0123, -0.0612, -0.0786, -0.1760, -0.0977, -0.1185,
        -0.0623, -0.0567, -0.0822, -0.1136, -0.0223, -0.1071, -0.0098,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1462, -0.2335, -0.0621,  0.0007, -0.0904,  0.0134, -0.0200,  0.0820,
        -0.1555,  0.0100,  0.0238, -0.3089, -0.0328, -0.0462,  0.0107, -0.4406,
        -0.0414, -0.0341, -0.0152, -0.1083, -0.2182, -0.0293, -0.0247, -0.3514,
        -0.1294, -0.1083, -0.0079, -0.0090, -0.0320, -0.0958, -0.0174, -0.0518,
        -0.1809, -0.1001, -0.0381, -0.0709, -0.0453, -0.0203, -0.0621, -0.0249,
        -0.0169,  0.2891, -0.1254,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5669e-02, -6.6744e-01, -5.0498e-02,  6.9628e-03,  3.1547e-02,
        -1.8357e-02, -5.9430e-02,  1.6533e-04, -2.1538e-01,  3.8261e-02,
         3.7399e-02,  1.8451e-02, -4.0889e-02,  2.5899e-02, -1.4391e-02,
        -6.9345e-02, -5.5399e-02, -2.3285e-01,  4.6147e-02, -6.5396e-02,
         5.2318e-02, -5.0467e-03, -7.6673e-02, -4.8244e-02, -3.7901e-02,
        -1.8468e-02,  9.9248e-02,  5.1196e-02, -1.1709e-01, -4.4352e-02,
        -4.5402e-02, -6.4105e-03, -1.2982e-01,  6.0531e-03, -2.8550e-02,
         4.4876e-02, -5.7793e-02,  3.1852e-02, -1.4244e-01, -1.6681e-02,
        -6.5900e-02, -1.8006e-01, -1.0365e-02, -6.0770e-04, -2.3761e-02,
        -5.7622e-02, -2.7706e-02,  1.4363e-02, -3.7634e-03, -4.4261e-02,
        -9.1216e-02,  8.0824e-03, -5.3251e-02, -7.2116e-03,  3.3996e-02,
        -9.2515e-02, -1.5108e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0855, -1.3029, -0.0608,  0.0274, -0.1525,  0.1027, -0.2118, -0.3502,
        -0.0481,  0.1131, -0.1333,  0.0820, -0.1170, -0.2057,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1277, -0.4460, -0.0873, -0.0078,  0.0197, -0.0785, -0.0220, -0.0631,
        -0.0549,  0.0007, -0.0615, -0.0275,  0.1645,  0.0978, -0.2005, -0.1485,
        -0.0644, -0.1196,  0.0822, -0.0609, -0.6864, -0.0398,  0.0288, -0.0259,
         0.0554, -0.1304,  0.0367,  0.0643,  0.3879, -0.0382,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5033,  0.3812, -0.0304,  0.1275, -0.0728,  0.0280,  0.3293, -1.2097,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2294, -0.3740,  0.0226, -0.0700, -0.0921, -0.3128,  0.0460, -0.0236,
         0.0584, -0.0331, -0.0667, -0.1958, -0.1658, -0.2303, -0.0397, -0.0293,
         0.0091, -0.0284,  0.0045, -0.0292,  0.1010, -0.0042, -0.0234, -0.3518,
        -0.1154, -0.0890, -0.1060,  0.0564, -0.1469, -0.1656,  0.0547,  0.1183,
         0.0233,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0094, -0.5514, -0.2205,  0.1639, -0.0215, -0.1361, -0.0053, -0.0306,
        -0.1122, -0.0653,  0.0085,  0.0343, -0.0642, -0.0281, -0.0170, -0.0669,
        -0.0079,  0.0050, -0.0437, -0.0142, -0.0060,  0.0277, -0.0172, -0.0275,
         0.0220, -0.1132, -0.0358, -0.2505, -0.0309,  0.0006, -0.0239, -0.0019,
        -0.0954, -0.1886,  0.0308, -0.0973, -0.0759,  0.0200, -0.0366,  0.0434,
         0.0194, -0.0298, -0.0666, -0.0241,  0.1428,  0.0666, -0.0381, -0.0031,
        -0.0344, -0.0038, -0.0262, -0.0363, -0.0228, -0.0045,  0.0232,  0.0008,
        -0.0110, -0.0630,  0.0089, -0.0479, -0.0227, -0.0361, -0.1809, -0.1233,
        -0.0219, -0.0045,  0.0365,  0.1871, -0.0102, -0.1701, -0.0962],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1534, -0.0091, -0.0376,  0.0168, -0.1039, -0.0975, -0.2872, -0.0332,
        -0.0498, -0.0134,  0.0108, -0.0617,  0.0749,  0.0498, -0.0368, -0.0109,
        -0.0034,  0.0365, -0.1127, -0.2098,  0.0829,  0.0345,  0.2295, -0.2333,
        -0.0094, -0.0824, -0.1000,  0.0559,  0.0449, -0.0636,  0.0786, -0.0157,
        -0.0637, -0.0583, -0.0981, -0.0433, -0.1265,  0.0019, -0.0160,  0.0362,
        -0.0491, -0.1948,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0099e-01, -4.2057e-01, -7.3017e-02,  3.3434e-02, -4.2691e-03,
         1.6748e-02,  1.6227e-02,  3.7800e-02,  8.5961e-02, -1.5074e-01,
        -5.8700e-02, -8.6627e-02, -2.5141e-01, -6.7867e-02,  2.7934e-04,
         3.6494e-02,  1.8036e-02, -1.2444e-01,  1.8185e-02, -5.4141e-02,
        -9.4226e-02, -6.9374e-02, -3.4246e-02, -4.8531e-02, -5.8626e-03,
        -1.4552e-01, -3.1611e-02, -6.2413e-02, -3.1037e-01, -4.8898e-02,
        -6.8716e-02,  1.7449e-02,  9.3380e-02, -1.2774e-01, -6.2263e-02,
         9.3446e-02, -2.4454e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7878, -1.1938,  0.2223, -0.0969,  0.0434,  0.1243, -0.1829, -0.2196,
        -0.0477,  0.0400,  0.0474, -0.0497, -0.1351, -0.1040,  0.0676,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1037,  0.2260, -0.1175,  0.0330,  0.1268,  0.0544,  0.0755,  0.0704,
         0.2941,  0.0521, -0.0412, -0.2534,  0.1527, -0.0397,  0.0223, -0.0980,
         0.0127,  0.1032, -0.0822, -0.1815,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1137,  0.0595, -0.0146,  0.0785, -0.3917, -0.0168, -0.0151, -0.0362,
         0.0050, -0.0440, -0.0287, -0.0874, -0.0638,  0.0062, -0.0616, -0.0490,
         0.0189, -0.0425,  0.0197, -0.0574,  0.0194,  0.0336,  0.0138, -0.0065,
         0.0232, -0.0518, -0.1392, -0.0044,  0.0126, -0.0352, -0.0977, -0.0419,
        -0.0026, -0.0355, -0.0348, -0.1054, -0.0149, -0.0150, -0.0381, -0.0376,
         0.0411,  0.1685, -0.0346,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0390, -0.1055,  0.1649, -0.0927, -0.0332, -0.1377, -0.0647, -0.5195,
        -0.1081,  0.0148, -0.0036,  0.1406, -0.0584, -0.0497, -0.2119, -0.0014,
        -0.0205,  0.0948,  0.0164, -0.1045, -0.0648, -0.0479,  0.0053,  0.0181,
        -0.0020,  0.0763,  0.1477,  0.1264, -0.2625,  0.0279, -0.0071, -0.1062,
        -0.0267, -0.0779, -0.1105,  0.1067, -0.1417,  0.0499,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1771, -0.4020, -0.1530, -0.2579, -0.0465, -0.3430, -0.0618,  0.1235,
        -0.0146, -0.1958,  0.0144,  0.0018, -0.1388, -0.0416,  0.1674, -0.0885,
         0.0511, -0.1664,  0.0847, -0.0926,  0.0397,  0.0561,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2387, -0.3411, -0.0060, -0.0906,  0.0774, -0.0409,  0.0148,  0.0583,
         0.1413, -0.1610, -0.0438,  0.0021, -0.0285, -0.0340, -0.1029, -0.0602,
        -0.6521, -0.1322, -0.1445, -0.0643,  0.0531,  0.0880, -0.0700, -0.0950,
         0.0177, -0.0472,  0.0867, -0.0440, -0.0318, -0.1398, -0.0534, -0.0133,
         0.0287, -0.0441, -0.1030, -0.0159,  0.0599, -0.0611, -0.0280, -0.0745,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1763, -0.0263, -0.0824, -0.3468, -0.0266, -0.0675,  0.0082,  0.0015,
        -0.1662, -0.0628, -0.0041, -0.0157, -0.0395, -0.2121, -0.1339, -0.0243,
        -0.1151,  0.1272, -0.2080,  0.0142, -0.0340, -0.0485, -0.0898, -0.0809,
        -0.0811, -0.1402,  0.0493, -0.1168, -0.1052,  0.0071, -0.0441,  0.0782,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2383, -0.7759, -0.0022,  0.0898, -0.0188, -0.0298, -0.0750, -0.1384,
        -0.2311, -0.0662,  0.0312, -0.0908, -0.0399, -0.0704, -0.0635, -0.1424,
        -0.0684,  0.0010,  0.0344,  0.0710,  0.0043, -0.0218, -0.0314, -0.1696,
         0.0098,  0.0639, -0.0312, -0.0041,  0.0659, -0.0483,  0.0292, -0.0034,
        -0.0771,  0.1357, -0.0756,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([0.1286, 0.1109, 0.2106, 0.1821, 0.0961, 0.5126, 0.3678, 0.3362, 0.1905,
        0.2650, 0.0411, 0.1063, 0.0788, 0.1781, 0.0308, 0.0391, 0.1351, 0.0577,
        0.0877, 0.2125, 0.1109, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2538, -1.4363,  0.2067, -0.2010, -0.2423, -0.1665, -0.0916, -0.1996,
        -0.3179, -0.0616,  0.0210, -0.0578,  0.0786,  0.1850, -0.1282,  0.1219,
        -0.1630,  0.1147, -0.1963,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0568, -1.0052, -0.2392, -0.0211, -0.0959, -0.0697, -0.0441, -0.0931,
        -0.1251, -0.2826, -0.2038, -0.1434, -0.0226, -0.3431, -0.0501,  0.0141,
        -0.0628, -0.0545,  0.2015, -0.3837,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5236, -0.7693, -0.0186, -0.2701, -0.1002, -0.1027, -0.0562, -0.0907,
        -0.0319, -0.0535, -0.0144, -0.0849, -0.0166, -0.1127, -0.1102, -0.0724,
        -0.1892, -0.0147,  0.0048, -0.2234,  0.0663,  0.3225,  0.0800,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0379, -0.4843, -0.0844,  0.0271, -0.0106, -0.0631,  0.0522, -0.0365,
        -0.0572, -0.3697,  0.0069, -0.0367, -0.0137, -0.3994,  0.0584,  0.0792,
         0.0334, -0.0647, -0.1090, -0.0833, -0.1032,  0.0400,  0.0356,  0.0160,
        -0.0911,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1984,  0.8920,  0.0438,  0.0439,  0.0148,  0.0227, -0.0953,  0.1482,
         0.0080, -0.1345,  0.2167,  0.0474,  0.0229, -0.0042, -0.0158, -0.2030,
         0.0261, -0.0498, -0.0992,  0.0339,  0.0607,  0.1699,  0.1286,  0.0797,
         0.0959,  0.0709, -0.0532, -0.0093, -0.1140, -0.0888, -0.0761, -0.1061,
         0.0745,  0.2385, -0.0771,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3657,  0.0073,  0.0852, -0.3062,  0.0183,  0.0503, -0.1236, -0.0770,
        -0.5203, -0.0851,  0.0558, -0.0114, -0.1009, -0.1458, -0.0572, -0.3025,
        -0.0968, -0.2040, -0.2197, -0.0660,  0.1950, -0.2343,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5749,  0.2370,  0.0797,  0.0574,  0.3013,  0.1392,  0.0956,  0.0492,
         0.0784,  0.3002,  0.0495,  0.1613,  0.1596,  0.1737,  0.1050,  0.0365,
        -0.0851,  0.2925, -0.0228,  0.1543,  0.1298,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2884, -0.8710,  0.0346,  0.0106,  0.0535, -0.1487, -0.1538, -0.3853,
        -0.0765,  0.0869,  0.1377,  0.1452, -0.0490, -0.2471,  0.1606, -0.1433,
         0.0134,  0.0657,  0.0574,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0694, -0.0725, -0.1369, -0.0031, -0.0430, -0.1543, -0.0369,  0.0194,
         0.0088, -0.0042, -0.0231, -0.0102,  0.0256, -0.0812, -0.0277,  0.0199,
        -0.0218, -0.0904,  0.0092, -0.0510,  0.0337, -0.0814, -0.0329, -0.0107,
        -0.0549,  0.0238,  0.0204, -0.1586, -0.1247, -0.1687, -0.0038, -0.0401,
        -0.0908, -0.1875, -0.0147, -0.0113,  0.0043,  0.0439,  0.1088, -0.0021,
         0.0061, -0.0163,  0.0134,  0.0050, -0.0421, -0.0561,  0.1195],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4538, -0.1847, -0.1901, -0.0350, -0.0013, -0.0688,  0.0118,  0.0353,
         0.0085, -0.0421, -0.0311, -0.0154,  0.0379, -0.0667,  0.1353, -0.0341,
        -0.1134,  0.0015,  0.0939, -0.0252, -0.0042, -0.0543, -0.2120, -0.4858,
        -0.0187,  0.0266, -0.5139,  0.0359,  0.0185,  0.0590,  0.0260,  0.0413,
         0.1486,  0.2352,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4941,  1.7030,  0.0887, -0.2364, -0.2437,  0.1022,  0.0347,  0.0307,
         0.0872,  0.0399, -0.1053, -0.0366,  0.0881,  0.3451, -0.0291, -0.1548,
        -0.0409,  0.1628,  0.1728,  0.1663,  0.0419,  0.1586,  0.0174,  0.1054,
         0.0250, -0.0052,  0.0473,  0.0721,  0.0168,  0.0992, -0.0141, -0.0215,
         0.1119,  0.0577,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.4353, -0.8432, -0.0131, -0.1231, -0.0466, -0.1661, -0.0040,  0.0672,
        -0.1700, -0.1442, -0.1443, -0.1228, -0.1099, -0.2081, -0.1553,  0.0219,
        -0.0869,  0.0742,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3247, -0.2498, -0.1864, -0.0771, -0.1130,  0.0279, -0.0596, -0.0018,
        -0.0262, -0.4961,  0.0872, -0.0600, -0.0390, -0.1381, -0.1474, -0.1038,
        -0.0826, -0.0105,  0.0697, -0.3564,  0.0454,  0.0349, -0.0318,  0.0011,
        -0.0201, -0.1661, -0.1743,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1905,  0.0073, -0.0689, -0.0452, -0.1302, -0.2201, -0.0246,  0.0562,
        -0.2278, -0.0412, -0.0094, -0.2167, -0.2359, -0.2181, -0.0262,  0.0168,
        -0.1032, -0.1703,  0.0220, -0.7459, -0.1513, -0.0369,  0.1500,  0.0582,
        -0.0045,  0.2291,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1222, -0.6779, -0.0858, -0.1220, -0.2197,  0.0143, -0.0262,  0.0385,
        -0.1311, -0.1079, -0.1155, -0.1309, -0.0986, -0.0639, -0.2122, -0.1288,
        -0.0895, -0.0645,  0.0633, -0.1192,  0.0603,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3614,  0.6017,  0.1902,  0.2817,  0.0241,  0.2685,  0.2349,  0.1603,
         0.0269, -0.0413,  0.1796,  0.0806, -0.1637,  0.3989, -0.0392,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3608e-01,  4.6377e-02, -1.0975e-01, -1.1471e-01, -2.4108e-02,
         1.8661e-01, -5.2875e-02, -4.4503e-02,  1.8515e-02, -1.0363e-02,
        -3.7007e-02,  1.6830e-01,  3.2877e-01,  1.9851e-01,  2.8150e-01,
        -2.0009e-04,  1.2362e-02,  4.6704e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1885, -0.5650, -0.2510, -0.6013, -0.0828, -0.1611, -0.2811, -0.0961,
        -0.1557, -0.4295, -0.2762, -0.1041,  0.1179, -0.2641, -0.1351,  0.0159,
        -0.0393,  0.1232,  0.3522,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1712, -0.2538,  0.0359,  0.2205, -0.0364, -0.0317, -0.0344,  0.0206,
        -0.0375,  0.0153,  0.0140, -0.0372, -0.0469,  0.0242, -0.0019, -0.0206,
        -0.1057, -0.0452,  0.1577, -0.0619, -0.4601, -0.0148, -0.1587, -0.0527,
        -0.2163, -0.1087, -0.1268, -0.0278, -0.0008, -0.0644,  0.0038, -0.0192,
        -0.1733, -0.0447, -0.1142, -0.0727, -0.1716,  0.0675, -0.0536, -0.0965,
         0.0510,  0.1398,  0.0117, -0.0875], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3327e-01,  3.9753e-02,  9.0363e-02,  3.7495e-05, -7.8827e-02,
        -1.4838e-02, -1.1235e-01, -2.4845e-02,  4.0350e-03,  5.5563e-02,
        -4.9834e-02,  1.4270e-01,  5.5308e-03,  1.5844e-02,  7.8910e-03,
         3.2803e-02, -6.2950e-03, -2.8294e-01, -3.1816e-02, -4.2490e-02,
        -6.8913e-02,  2.3021e-02, -4.1277e-02,  4.3696e-02,  9.4462e-02,
        -6.0860e-03,  7.0564e-02, -3.0416e-01, -9.4097e-04, -1.1403e-01,
        -1.6680e-01,  2.5362e-02,  2.7306e-02, -6.1138e-01,  3.5547e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1358, -0.7863, -0.0617, -0.0336, -0.1021, -0.0808, -0.2124, -0.0955,
        -0.0050, -0.0136,  0.0292, -0.0254, -0.2829,  0.0226,  0.0869, -0.1489,
        -0.1163, -0.0182,  0.0510, -0.0208, -0.0716, -0.0476, -0.0613, -0.0815,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3816, -0.8370, -0.1180,  0.0363,  0.0271, -0.0920, -0.0565,  0.0559,
        -0.1775, -0.0369, -0.0689,  0.0299,  0.1008, -0.1505, -0.2508, -0.0417,
        -0.0281, -0.0230, -0.3425, -0.0191, -0.0416, -0.1361, -0.0770, -0.1328,
        -0.0263, -0.0899, -0.0200, -0.0356, -0.0071,  0.0096, -0.1550, -0.0609,
        -0.3770,  0.0584,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2071, -0.1252,  0.0265, -0.0280,  0.0716,  0.0711, -0.3554, -0.1060,
        -0.0287,  0.0076,  0.0585,  0.0429, -0.1377, -0.0553, -0.0994, -0.6707,
        -0.0240,  0.0448, -0.0862,  0.0091,  0.0394, -0.0435,  0.0181, -0.1025,
         0.0025,  0.0140, -0.0787, -0.0305,  0.0328,  0.1301,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.0645, -0.3887, -0.2421, -0.7046, -0.0718, -0.0718,  0.0188, -0.0683,
        -0.1522, -0.4897, -0.1397,  0.1186,  0.2046,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6526,  0.0033, -0.1815, -0.2778, -0.0210, -0.1437, -0.0100, -0.0859,
        -0.6088, -0.1785, -0.0481, -0.1960,  0.0877, -0.0680, -0.1610, -0.0322,
        -0.1900,  0.4989,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5598,  0.8588,  0.2243,  0.1131,  0.3965,  0.3006,  0.3891,  0.0639,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2664, -0.3598, -0.0668, -0.0011, -0.0503, -0.0995, -0.0435, -0.0296,
         0.0890,  0.0245, -0.0336,  0.0039, -0.0038, -0.0032, -0.0066, -0.0257,
         0.0092, -0.0055,  0.0169,  0.0236,  0.0040,  0.0246, -0.0764, -0.1600,
        -0.0340, -0.1680, -0.1136, -0.1062, -0.4062, -0.0354, -0.1015, -0.1452,
        -0.0067, -0.3347, -0.0609, -0.0535, -0.0734, -0.0644,  0.0884, -0.0048,
        -0.0029,  0.0114,  0.0017,  0.0636,  0.2265,  0.2228,  0.1347],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2109,  1.0137,  1.7624, -0.5287, -1.1242, -0.0263,  2.2895, -0.6087,
         1.3313, -1.0304, -2.2651,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8244,  0.9328,  0.2134,  0.1762,  0.1509,  0.1652,  0.1315,  0.9951,
         0.4005,  0.1956,  0.1706, -0.0234,  0.3301, -0.0828, -0.0933,  0.1503,
         0.0794,  0.0244,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0637,  0.1542,  0.5277,  0.2786, -0.1015,  0.0179,  0.0625, -0.0421,
         0.0509, -0.0764, -0.0337, -0.0786,  0.1047, -0.2157,  0.1334,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1718, -0.6987, -0.2889, -0.0132,  0.0681, -0.0441, -0.3739,  0.0355,
        -0.0938, -0.0168, -0.1965, -0.0408,  0.2141,  0.0278,  0.1770,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1645,  0.2943, -0.0568, -0.0739, -0.0661, -0.1735, -0.0567,  0.0604,
         0.2079,  0.1745,  0.0237,  0.2470,  0.3094, -0.0004, -0.0160, -0.2501,
         0.1160, -0.1753,  0.0228, -0.0513,  0.1254,  0.2933, -0.0054,  0.0377,
         0.0123,  0.3401,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1104, -0.1070, -0.1537, -0.2557, -0.1327, -0.0587,  0.1404, -0.1983,
        -0.5980, -0.0512, -0.0800,  0.0356, -0.0341, -0.0311,  0.0015,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2072e-01,  8.6640e-01,  5.8135e-02, -5.3195e-02,  4.3365e-01,
         2.2721e-01,  1.8778e-01, -5.1120e-02,  5.4557e-02,  1.0921e+00,
        -1.3512e-04,  2.4659e-02,  1.1909e-01,  9.4569e-02,  1.6394e-01,
         4.7212e-02,  8.2478e-02, -1.9079e-02,  1.1512e-01, -4.3059e-01,
         1.1781e-02, -1.1760e-01,  5.1461e-02, -4.0958e-02, -1.6300e-01,
        -1.9385e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0040, -0.0514,  0.0276, -0.1076, -0.0800, -0.0882, -0.0278, -0.0908,
        -0.0212, -0.0892, -0.4299, -0.0527, -0.0597, -0.0294, -0.1745, -0.1526,
         0.0501, -0.1391,  0.0159, -0.0797,  0.1406, -0.0095,  0.0543, -0.0123,
         0.1914, -0.1022,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.1453e-01, -8.9019e-01,  1.9380e-01,  2.3757e-04, -1.5880e-02,
        -3.5486e-02, -5.1616e-02, -1.0479e-01, -7.9655e-02, -9.8523e-02,
        -6.2321e-02, -8.0457e-02, -2.5161e-01, -8.8052e-02, -1.6938e-02,
         8.1353e-02, -9.0931e-03, -4.2514e-02, -9.7780e-03, -2.6506e-02,
        -1.4868e-02, -9.8186e-02,  5.9017e-02, -7.1491e-02, -1.6922e-01,
        -4.3814e-02, -1.1894e-01,  2.2401e-02, -1.2043e-02, -1.2638e-02,
         7.3992e-02, -1.0392e-01,  3.3006e-02,  2.8006e-02, -6.9310e-02,
        -1.5587e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3766, -0.2847, -0.1181, -0.0886, -0.1398, -0.2141, -0.1240, -0.3165,
         0.0198,  0.0176, -0.0676, -0.0558,  0.0387, -0.0106, -0.0358, -0.0896,
         0.0082, -0.0327,  0.0508,  0.0406, -0.0354, -0.1911, -0.1143, -0.0603,
        -0.0865, -0.3163, -0.0509, -0.0424, -0.1644, -0.1570, -0.0374, -0.0505,
        -0.0081, -0.0052, -0.0671, -0.0306,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0094,  0.8414, -0.0090, -0.0199,  0.0713,  0.1271,  0.0743,  0.1285,
         0.0100, -0.0450,  0.1266, -0.0244,  0.1101,  0.0347,  0.0618, -0.0112,
         0.1094,  0.0199,  0.1729,  0.0479,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3950, -0.1612, -0.0689, -0.0704, -0.0289, -0.0980, -0.2348, -0.1824,
        -0.0184, -0.0658, -0.0974, -0.0072,  0.0419, -0.0895, -0.2398, -0.0749,
         0.1171, -0.1616, -0.0300, -0.0928, -0.0134, -0.0381, -0.0024, -0.1769,
        -0.0984, -0.0709,  0.0142,  0.0014, -0.1119, -0.0351, -0.0946,  0.1794,
         0.1275, -0.2272,  0.1167,  0.1711,  0.0322,  0.1433,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4019e-01, -2.5387e-01, -3.7452e-02, -3.1379e-01, -2.5127e-02,
         1.5223e-04,  8.3172e-02,  1.7186e-02, -1.1323e-01, -6.1515e-02,
        -1.4053e-01, -1.5078e-01, -3.9712e-02,  1.1007e-01,  1.4538e-01,
        -2.6192e-01,  1.3477e-01, -1.5834e-01, -7.0651e-02, -3.6492e-02,
        -1.9314e-02,  1.0193e-01, -4.4548e-02,  9.6539e-02, -2.4706e-01,
         1.3192e-02, -1.1248e-01, -1.3778e-01, -1.7733e-01,  1.1040e-02,
        -4.9378e-02,  1.6840e-01,  4.5963e-02, -3.9258e-02, -3.4646e-02,
         7.8549e-02,  6.2027e-02,  9.9682e-02, -1.8380e-02,  2.6865e-01,
        -1.0029e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4223, -1.6963,  0.6009, -0.4013, -0.2858, -0.1067, -0.3288, -0.1125,
        -0.0779, -0.0518, -0.0668, -0.1051, -0.1396,  0.0504, -0.0894, -0.1329,
        -0.0182, -0.1700, -0.0560, -0.0739,  0.1879,  0.0916,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1333, -0.0508, -0.0030,  0.0457,  0.0926, -0.1323, -0.0084,  0.0700,
        -0.1465, -0.0211, -0.1349, -0.1791, -0.0109, -0.0729, -0.0333, -0.1069,
         0.0071, -0.0578, -0.0644, -0.1112, -0.5995,  0.0807, -0.0894, -0.0986,
        -0.0310, -0.0469, -0.0180, -0.1108, -0.0258,  0.0437,  0.0157, -0.0923,
        -0.1194,  0.1860, -0.1373,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1708, -0.0849,  0.2757,  0.1100,  0.2791,  0.0432,  0.0278,  0.1034,
         0.0773,  0.1000,  0.2376, -0.0025,  0.3604,  0.0594,  0.0211,  0.1923,
         0.0313,  0.0787,  0.2469, -0.0227,  0.0749,  0.0667,  0.2686,  0.2096,
         0.1305, -0.0252,  0.2099,  0.4638, -0.4981,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4710e-01,  8.7543e-01,  2.5497e-01,  2.5927e-01,  1.1207e-01,
         2.9574e-01, -7.0163e-04,  5.2362e-02,  7.0218e-02, -2.6265e-02,
         1.3092e-01,  9.8887e-02, -1.5705e-01, -1.9311e-02,  3.7980e-02,
         8.8731e-02,  1.9097e-01,  7.7279e-03,  2.2903e-01,  6.1068e-02,
         1.7708e-01,  2.5659e-01,  3.7452e-01, -2.0937e-01, -4.6625e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3836, -0.1317, -0.0216, -0.0299, -0.3847, -0.1056, -0.1249, -0.2652,
        -0.1545, -0.0016, -0.0885,  0.0991, -0.0538,  0.0931, -0.1170, -0.2108,
        -0.1686, -0.0325,  0.0187, -0.0758, -0.0103, -0.1478,  0.1932,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1589, -0.1494,  0.0566,  0.0321, -0.1127,  0.0744,  0.1105, -0.1154,
        -0.0459,  0.0164,  0.0035,  0.0095, -0.2292, -0.4869,  0.1609, -0.1119,
        -0.0695, -0.1320, -0.0770,  0.0034, -0.2181, -0.0544, -0.0799, -0.0925,
        -0.0453, -0.0383, -0.3587, -0.1531, -0.1803, -0.2103, -0.1087, -0.0368,
        -0.1049,  0.0099, -0.0224, -0.0152,  0.0298,  0.2264,  0.2489,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0264,  0.1555, -0.0945, -0.2287, -0.7518, -0.1194,  0.0587, -0.2023,
        -0.4437,  0.0875, -0.1737, -0.2515,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.3103, -0.2698, -0.2742,  0.0104, -0.0590, -0.0978, -0.0921, -0.0552,
         0.0092, -0.0575, -0.0657, -0.0009,  0.0022, -0.0185, -0.0801, -0.0243,
        -0.0033, -0.2079, -0.0175, -0.0593, -0.0517, -0.0505,  0.0162, -0.0451,
         0.0338,  0.0015, -0.0456,  0.0346, -0.0776,  0.0223, -0.0401,  0.0131,
        -0.2119, -0.0087, -0.1147, -0.0862, -0.0805, -0.0133,  0.0280,  0.0040,
         0.0713,  0.0445, -0.0534,  0.0504, -0.1053,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2647, -1.3827, -0.0485, -0.2652, -0.1422, -0.0305,  0.1708, -0.2954,
        -0.4687,  0.0661, -0.0984, -0.0328,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7022,  1.6618,  0.3079,  0.3769, -0.0881, -0.1574, -0.0879,  0.1978,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6826e-02, -9.7296e-01, -9.4403e-02,  6.2698e-02,  8.9078e-02,
         1.1518e-01,  9.0703e-02,  1.7447e-02, -1.3391e-01, -3.9400e-01,
        -1.9716e-01,  6.5922e-02, -1.4735e-02,  7.3455e-04, -2.7201e-02,
         8.7564e-02,  6.8059e-02, -2.2322e-01,  2.9900e-03, -3.1507e-02,
        -5.8943e-02, -2.0937e-03, -7.1766e-03,  7.9352e-03,  6.3452e-03,
        -1.2789e-01, -2.5630e-02, -6.0913e-02, -1.2185e-01,  4.8695e-02,
        -9.9262e-02,  5.4165e-02, -4.0834e-02, -7.0062e-02,  3.8706e-02,
        -7.7915e-02,  1.4934e-02, -1.9484e-02, -6.0382e-02, -2.6226e-02,
        -3.2386e-03, -2.5034e-02, -3.6612e-02, -6.5971e-02,  3.9199e-02,
         4.4119e-01, -2.4082e-01, -1.0367e-01, -1.2045e-01, -2.3814e-01,
        -8.7540e-02, -3.9464e-02, -1.1105e-01,  2.4545e-02, -2.7794e-02,
         5.3614e-02, -7.7185e-02,  1.7935e-02,  7.8521e-04,  1.8886e-01,
         5.6044e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0301e-01, -9.0936e-02,  4.2498e-02, -4.1018e-02, -3.1539e-01,
        -7.8383e-02, -1.3322e-01, -4.2470e-05, -4.4762e-02,  3.9427e-02,
        -7.4925e-02, -7.3172e-02, -1.8924e-02, -5.5172e-02, -2.3768e-01,
        -2.5531e-02, -3.3935e-02,  1.0952e-01, -1.7297e-01, -1.5603e-02,
        -2.1274e-01, -8.9951e-02, -2.7206e-01, -1.0806e-02, -9.2122e-02,
        -7.2414e-02, -1.1855e-02, -1.1216e-02, -2.4900e-02, -4.7075e-02,
        -2.2299e-02,  8.0590e-02, -2.9730e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0032, -0.4297, -0.4149, -0.2196, -0.3124, -0.1191, -0.2495, -0.0516,
         0.0066,  0.0946,  0.1053, -0.0553, -0.1550, -0.1111, -0.1453,  0.0657,
        -0.0531, -0.3163, -0.9732,  0.0315, -0.0363, -0.1270,  0.0853,  0.1516,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1583, -0.1570, -0.2791, -0.1453, -0.0484, -0.3461, -0.0723, -0.0868,
         0.0438, -0.2523, -0.1644, -0.0830, -0.3296,  0.0010, -0.0919, -0.0390,
        -0.0278,  0.0128,  0.0948,  0.0857, -0.0610,  0.0072,  0.0008, -0.3310,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3484, -0.3156, -0.2486, -0.1856,  0.0125, -0.0217, -0.0774, -0.0574,
        -0.0038, -0.0268, -0.0893,  0.0298,  0.0195,  0.0177, -0.0987, -0.2283,
        -0.1907, -0.0860, -0.0638, -0.1090, -0.1043, -0.0455, -0.1429, -0.2554,
        -0.0503, -0.0604, -0.0967, -0.0405, -0.0507,  0.0271, -0.0412, -0.0967,
         0.1081,  0.0365,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1533,  0.0089, -0.0846,  0.0166, -0.2523, -0.0617, -0.0209, -0.0159,
         0.1034, -0.0289,  0.0278, -0.0270, -0.1061, -0.5278, -0.1055, -0.1293,
        -0.2418, -0.0839, -0.1190, -0.0645, -0.0814, -0.1666, -0.0109, -0.0714,
        -0.1663,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2266,  0.0555, -0.1794, -0.0760, -0.1196, -0.0528, -0.2088, -0.1423,
        -0.1764, -0.4345,  0.1094, -0.1227, -0.0844, -0.1618, -0.1426,  0.0978,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1895, -0.4009, -0.0949, -0.1954, -0.0978, -0.1811, -0.2997, -0.0680,
        -0.0184, -0.0954, -0.0813,  0.2171, -0.2001, -0.1189, -0.1402,  0.0200,
        -0.0491, -0.0650, -0.2481, -0.1356,  0.0601,  0.0324,  0.0178,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0006,  0.3725,  0.1150,  0.2971,  0.5003,  0.3852, -0.0343, -0.0091,
         0.0209, -0.1868, -0.1356,  0.1212, -0.1918, -0.0190,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.6400, -0.1262, -0.1703, -0.5831, -0.0641,  0.0302, -0.0512, -0.0525,
        -0.0665, -0.0749, -0.2689, -0.3076, -0.1260, -0.0448, -0.0317, -0.0464,
        -0.1067, -0.6546, -0.1579, -0.1882, -0.2614, -0.1607, -0.1613, -0.1472,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1068,  0.0315, -0.6315, -0.1625, -0.2776, -0.3245, -0.5137, -0.2152,
         0.0473, -0.3229, -0.1962,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0552, -0.0261, -0.0066, -0.2215, -0.0066, -0.1161, -0.2642, -0.1502,
        -0.2701, -0.0023, -0.0519,  0.0880, -0.0937, -0.0295, -0.1721, -0.0393,
        -0.1571, -0.0666, -0.2755, -0.5041, -0.2007,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4323,  0.2805, -0.1728,  0.2381,  0.0354,  0.0655, -0.0161,  0.4677,
         0.1815,  0.3691, -0.0851,  0.0610, -0.0511,  0.0647,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2303, -0.1211, -0.0116, -0.0437,  0.0328, -0.0326,  0.0076, -0.0924,
         0.0112, -0.1515,  0.0398, -0.0030,  0.7169, -0.0243,  0.0437, -0.0073,
        -0.0392,  0.0567,  0.0438, -0.3084, -0.2009,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3719,  0.7653,  0.0403, -0.0114, -0.4780,  0.4927, -0.0357,  0.1718,
         0.1157,  0.2814,  0.1982,  0.1766,  0.0926,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0037, -0.1022,  0.0588,  0.0205, -0.0322, -0.0116, -0.0552, -0.0802,
        -0.0199, -0.0166, -0.0254,  0.0785,  0.0149, -0.0016,  0.0217, -0.0065,
         0.0369,  0.0424, -0.0181, -0.0126, -0.0435,  0.0020, -0.1017, -0.0513,
        -0.4978,  0.0189, -0.1914, -0.0875, -0.0722, -0.0620, -0.0535, -0.0612,
         0.0711, -0.0314,  0.1591, -0.0308, -0.1026, -0.0188,  0.0034, -0.0087,
        -0.0548,  0.0337,  0.0431, -0.0387,  0.0458], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1951, -0.1224, -0.1686, -0.0634, -0.0736,  0.0168, -0.1005, -0.0835,
        -0.0144, -0.0710, -0.1949,  0.0691, -0.0393, -0.0254, -0.0764,  0.0307,
        -0.1820, -0.0574, -0.0395, -0.0421, -0.1547, -0.3834, -0.0509, -0.2408,
         0.1249,  0.0592, -0.0016,  0.0057, -0.2713, -0.0057, -0.0014,  0.1111,
        -0.2863,  0.2161,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2615, -0.4484, -0.3698, -0.0501, -0.0152, -0.0901,  0.0951, -0.0597,
        -0.2684, -0.0843, -0.0606, -0.0834, -0.0549,  0.0227, -0.0309,  0.0757,
         0.0762, -0.0327,  0.0253, -0.3268, -0.5075,  0.0536, -0.0506,  0.0303,
        -0.0994,  0.1087,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2029, -1.0435,  0.0496, -0.1789,  0.1686, -0.2341,  0.2751, -0.2100,
        -0.0052, -0.0315, -0.0451,  0.0016,  0.0500, -0.0236, -0.1278, -0.1501,
        -0.4814,  0.2653, -0.2165,  0.1178,  0.0757,  0.1643, -0.0598,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2547, -0.2220, -0.0701, -0.1329, -0.0054, -0.0592,  0.0116, -0.0637,
        -0.1067, -0.0260,  0.0083,  0.0922,  0.0163,  0.0022, -0.6208,  0.1309,
         0.0675, -0.0959, -0.1017, -0.1670, -0.0386,  0.0241,  0.0423, -0.1003,
         0.0114, -0.0728, -0.1331, -0.2089,  0.0318,  0.0202, -0.0192, -0.0027,
         0.0263,  0.0792,  0.0497,  0.0415, -0.0650,  0.0125, -0.0491, -0.1188,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0589, -0.1330, -0.0863, -0.0777, -0.2755, -0.0290, -0.0529, -0.2317,
        -0.0866, -0.0638, -0.0883, -0.0715, -0.0803, -0.0198, -0.0795, -0.0999,
        -0.0950, -0.0131,  0.1328, -0.0668, -0.0250, -0.1189, -0.0529,  0.0510,
        -0.1798,  0.0101,  0.0218,  0.0700,  0.0537, -0.0244, -0.3074, -0.0258,
        -0.1482, -0.1376,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.1706, -0.2443, -0.0321,  0.0114,  0.0452, -0.0166, -0.0239, -0.0521,
         0.0119,  0.0623, -0.0561, -0.0183,  0.0264, -0.1254, -0.4984, -0.0017,
         0.0299,  0.0248, -0.1458,  0.0251,  0.0510,  0.0216, -0.0083,  0.0288,
        -0.1686, -0.3169,  0.0083, -0.1273,  0.0830,  0.0241, -0.0800, -0.0105,
         0.0212,  0.2537, -0.0797,  0.0054, -0.0893, -0.0578, -0.0352, -0.0762,
        -0.0658, -0.0199, -0.0565, -0.0191, -0.0181,  0.0363,  0.0008,  0.0165,
        -0.0448,  0.0017, -0.0340,  0.0756], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0689,  0.0051,  0.0600, -0.0526, -0.2827, -0.0964, -0.0182,  0.0567,
        -0.0256, -0.1133,  0.0047, -0.0647, -0.0966, -0.2780, -0.1483, -0.1813,
        -0.2594,  0.0229, -0.0870, -0.0575, -0.1080, -0.1182, -0.0929, -0.1814,
        -0.0135,  0.1011,  0.0479,  0.0625, -0.0572,  0.0125,  0.0350,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9177e-01, -1.3654e-01, -1.6862e-02, -4.1129e-02, -4.8330e-02,
        -3.5661e-04, -8.6010e-03,  4.1577e-02, -1.2619e-01, -4.0562e-02,
        -6.3022e-02, -1.5768e-01, -2.0830e-01, -1.1984e-01, -1.1668e-01,
        -1.6103e-01,  2.8879e-01, -9.9172e-02, -2.2111e-01, -2.4850e-01,
        -1.8445e-01,  9.8632e-02, -3.3701e-01, -1.2662e-01, -1.1670e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0254, -0.0344,  0.0077,  0.1140, -0.0044, -0.1364, -1.2789, -0.0176,
        -0.1380, -0.1837,  0.3034, -0.0103,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5790, -1.0500, -0.0059, -0.0799, -0.1627, -0.0330,  0.0307, -0.0460,
         0.0551, -0.0492,  0.0989,  0.0758, -0.1682, -0.3535, -0.0604, -0.1073,
        -0.0182,  0.1101,  0.0194,  0.0041, -0.0247,  0.0012, -0.1113,  0.0809,
         0.0146, -0.2610, -0.0253,  0.2257, -0.3801,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1331,  1.0993, -0.6925, -0.0310, -0.1698,  0.2813,  0.8907, -0.3621,
        -0.4687, -0.0069, -0.1658,  0.2919,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2492, -1.0315,  0.0806, -0.1878,  0.4567, -0.5354, -0.7773,  0.0808,
        -0.2532, -0.1366,  0.0766, -0.0866,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1664, -0.3578,  0.2699,  0.1501,  0.2792, -0.2651,  0.0529,  0.2123,
         0.3219,  0.0722,  0.1171,  0.3653,  0.1849, -0.0824,  0.0812,  0.3015,
         0.1954,  0.0282,  0.2597, -0.4189,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4667, -1.5408, -0.0917, -0.0139, -0.5662, -0.0684,  0.1054, -0.0063,
        -0.0914, -0.0753,  0.0131, -0.0424,  0.0135, -0.0247, -0.1027,  0.0195,
        -0.0889,  0.0337,  0.0206, -0.1837, -0.0785,  0.0569,  0.0711,  0.2567,
         0.0212, -0.0236,  0.0388, -0.0073, -0.0242,  0.0582, -0.0613, -0.0151,
        -0.1346, -0.1045, -0.0433,  0.2301, -0.4596,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6260,  1.5832,  0.6234, -0.1872,  0.0958,  0.1283, -0.1552, -0.1318,
         0.5901,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1526, -0.4084, -0.2004, -0.7194, -0.1736, -0.1369, -0.2989, -0.2893,
        -0.0421,  0.0113,  0.0204,  0.0385, -0.1327, -0.2575, -0.0017,  0.0677,
         0.0269,  0.1924, -0.0697, -0.1917,  0.0588,  0.0078,  0.0679, -0.0171,
         0.0044,  0.0050, -0.0110,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9172e-01, -1.3715e+00, -8.2444e-03, -9.0223e-02, -1.1320e-01,
        -6.0909e-02, -1.2216e-03,  2.1179e-02,  3.3291e-03, -3.2081e-02,
         4.2795e-02, -2.5438e-01, -1.0980e-01, -4.0893e-02, -1.6866e-02,
         4.3374e-02, -1.5195e-01,  1.2737e-01,  3.6968e-03, -9.3450e-02,
         6.4302e-01, -1.9870e-01, -1.5795e-01, -2.5831e-02, -3.6013e-02,
         5.4456e-02,  1.2919e-02, -8.9941e-02, -7.7882e-01, -6.0352e-02,
        -3.2566e-02, -3.3988e-02,  1.7470e-01, -5.2021e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.5541, -0.0287,  0.0120,  0.0312, -0.0656, -0.0396,  0.0342,  0.0196,
        -0.0254,  0.0187, -0.0436, -0.3488, -0.0787, -0.0291, -0.0314, -0.0531,
        -0.1616,  0.0672, -0.0433, -0.0077,  0.0634,  0.0181, -0.1165,  0.0126,
        -0.0134, -0.0263, -0.0428,  0.0085,  0.0276, -0.0033,  0.0427, -0.0762,
        -0.3924, -0.0387, -0.0169, -0.0713, -0.2387,  0.0164, -0.0102,  0.0340,
        -0.0661, -0.0225,  0.0221,  0.1164,  0.1176,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4447e-02,  6.5724e-02,  6.0674e-05, -1.3009e-02, -1.3205e-01,
        -1.9180e-01, -6.3081e-01, -5.1208e-02, -4.9437e-02, -2.3845e-01,
        -1.1875e-01, -2.8773e-01, -1.6150e-01, -2.3266e-01, -4.7117e-02,
        -1.5698e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5508e-01,  1.3508e-01, -1.5234e-02,  3.4070e-02,  3.4346e-02,
        -1.2272e-01, -6.3980e-02, -2.8780e-02, -2.0157e-01, -2.2296e-01,
        -1.6215e-01, -2.3508e-01, -1.9246e-01, -4.6663e-02, -1.1594e-01,
         3.5338e-02, -6.5591e-02, -8.1375e-02,  8.8678e-03,  7.1009e-02,
         2.0242e-04,  1.2045e-01, -6.4952e-02, -6.7513e-02,  8.0433e-02,
         2.0906e-01, -2.6608e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0690e-01, -5.3993e-02, -1.5691e-01,  3.9198e-03, -5.2278e-02,
        -3.2114e-02, -5.2565e-02, -3.2436e-02,  4.4399e-03, -1.6005e-02,
        -3.6424e-02, -6.1283e-02, -6.4650e-02, -1.9100e-02, -6.7654e-03,
        -8.8253e-03, -3.6631e-02, -1.0584e-02,  9.3325e-03, -1.1663e-01,
        -1.1400e-02, -6.2299e-02, -8.6475e-02, -4.2347e-01,  1.0806e-02,
        -5.9089e-03, -3.5112e-02, -9.8092e-02,  1.2772e-02, -8.4803e-02,
         4.6180e-02, -7.0944e-02, -9.9333e-02, -9.4240e-02, -2.6622e-02,
        -8.5274e-02, -4.2477e-02, -1.3176e-01,  4.8692e-04,  7.4664e-02,
        -4.8608e-02, -1.4117e-01, -4.1331e-02,  1.7043e-02,  7.2618e-02,
         3.4583e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3134,  0.1256, -0.0647, -0.1335,  0.0779, -0.1433, -0.0064, -0.0626,
        -0.1630, -0.0640,  0.0360,  0.0225,  0.0472,  0.0237, -0.1278,  0.0211,
        -0.0635, -0.0591, -0.2592, -0.4345, -0.0891,  0.0521, -0.0015,  0.0243,
        -0.0187, -0.1723, -0.2448,  0.0380,  0.4102,  0.3451,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([0.5857, 0.3142, 0.4368, 0.1722, 0.2368, 0.7486, 0.2234, 0.4670, 0.4193,
        0.2236, 0.0840, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2124, -0.6865, -0.0926, -0.1788, -0.0690, -0.1023, -0.0708, -0.4143,
        -0.0038, -0.0808, -0.0076,  0.0723, -0.0677, -0.0668, -0.0571, -0.0343,
        -0.0345, -0.0443,  0.0542, -0.1163,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4993, -1.0846, -0.3748, -0.3470,  0.3972, -0.2993, -0.6827,  0.0094,
        -0.2804,  0.1987, -0.3253,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1889e-01, -7.6284e-01, -7.3239e-02, -2.2707e-01, -2.5546e-01,
         1.1474e-01, -3.2916e-02, -8.3127e-02, -2.2932e-01,  4.8095e-02,
         3.8883e-02, -3.8337e-02, -1.0044e-01,  1.9256e-04, -1.0573e-01,
        -1.1008e-01,  6.1106e-02, -3.3425e-02, -1.2679e-01,  3.9713e-02,
        -6.9651e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1347, -0.2861, -0.0336, -0.0068,  0.1254, -1.1522,  0.1386,  0.0012,
        -0.0716, -0.1326, -0.0086, -0.1038, -0.0797,  0.0828, -0.1079, -0.1151,
         0.2313,  0.1008,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2392, -0.2436, -0.3689, -0.0356, -0.1044, -0.0771, -0.0019,  0.0402,
        -0.0259, -0.0740, -0.0119,  0.0264, -0.0169, -0.0537, -0.0681,  0.0534,
        -0.1376, -0.1967, -0.0907,  0.0749, -0.1311, -0.0998, -0.2963, -0.0203,
        -0.0862, -0.0497, -0.0882, -0.0436, -0.0219, -0.1810,  0.0142, -0.0121,
        -0.0129,  0.0213, -0.0366, -0.0172,  0.0514,  0.0370, -0.0321,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2001,  0.7011,  0.0495,  0.0412,  0.0424, -0.0134, -0.0569, -0.1976,
        -0.0772, -0.0209, -0.1317,  0.0997, -0.0352,  0.0643, -0.0732, -0.0377,
         0.2000, -0.0008,  0.0281, -0.0065,  0.0390,  0.0349,  0.0127, -0.0054,
         0.0555,  0.0642,  0.1307,  0.0380,  0.1649,  0.2694,  0.0257,  0.0524,
         0.1959,  0.0518,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.2439e+00,  1.0857e+00,  1.5310e-01,  1.5119e-01,  2.7622e-01,
         5.1082e-01,  5.9132e-04,  3.3591e-01,  3.5484e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0240, -0.8628, -0.1127, -0.2089, -0.0693, -0.0893, -0.0224,  0.0573,
        -0.0285, -0.0851, -0.0512, -0.0795,  0.0175, -0.0708, -0.0266, -0.1456,
        -0.0521, -0.7815, -0.0811, -0.1939, -0.0569,  0.0310,  0.0810, -0.0828,
        -0.2321, -0.0760,  0.0635,  0.0204,  0.1545,  0.0203, -0.1530, -0.1172,
         0.0196, -0.1165,  0.0681,  0.0303, -0.0280,  0.0720, -0.2908, -0.0572,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1817, -0.0954, -0.3811, -0.0822, -0.0783, -0.0641, -0.0754, -0.0742,
        -0.0873,  0.0792, -0.0249,  0.0126, -0.0261, -0.0024,  0.0325, -0.0143,
        -0.0625, -0.0939, -0.0855,  0.0122,  0.0133,  0.0108, -0.0107, -0.0576,
        -0.0064, -0.0759, -0.2109, -0.0149, -0.1358, -0.3040, -0.0908, -0.0915,
        -0.0065, -0.0694, -0.0639, -0.1198, -0.1418, -0.2275, -0.0547, -0.0192,
         0.0954,  0.0407,  0.0622,  0.0140,  0.0065, -0.0112, -0.0528, -0.0266,
         0.0148, -0.0305, -0.0162,  0.1357,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6526, -0.8423, -0.2563,  0.0726, -0.0860, -0.0974, -0.0829, -0.0013,
         0.0045,  0.0446,  0.0896, -0.1212, -0.1717,  0.0689, -0.1487, -0.2063,
        -0.1221,  0.0271, -0.1686, -0.2227,  0.0307, -0.6610,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0960, -1.2637, -0.2195, -0.1066, -0.2552,  0.0393, -0.0481, -0.0883,
        -0.1317, -0.1318,  0.0649, -0.0358, -0.0202, -0.0563, -0.0368, -0.1630,
        -0.1325,  0.0872,  0.0822,  0.1889, -0.1015,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2603, -0.1923, -0.0064, -0.0602,  0.1605, -0.0639, -0.0587,  0.0377,
        -0.0439,  0.0012, -0.0406, -0.0438, -0.0433, -0.0210,  0.0129, -0.0103,
        -0.0263, -0.0406, -0.0690,  0.0124,  0.0311, -0.0553, -0.0101, -0.0341,
        -0.0138, -0.0125, -0.1204, -0.0840,  0.0131, -0.0210, -0.2608, -0.0726,
        -0.0200, -0.0571, -0.0673, -0.0581, -0.0106,  0.0238,  0.0039, -0.0142,
        -0.2242, -0.0210, -0.1006,  0.0146, -0.0954, -0.0110, -0.0315, -0.0887,
        -0.0728, -0.0174, -0.0503, -0.0163, -0.0823,  0.0287, -0.0932],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1551, -0.1617,  0.0868, -0.0985, -0.1323, -0.0501, -0.0756, -0.0345,
        -0.0186,  0.0347,  0.0765,  0.0130,  0.0168, -0.0663, -0.0807, -0.0770,
        -0.3751, -0.1444,  0.0118, -0.0196, -0.0432, -0.1153, -0.1159, -0.0375,
         0.1285, -0.0278, -0.0760, -0.1845, -0.0697,  0.0047,  0.1434, -0.0310,
        -0.0238, -0.2783, -0.0177, -0.0878, -0.0325, -0.0691, -0.0499, -0.1113,
        -0.0460, -0.0084,  0.0486,  0.3381,  0.0891,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2782e-01, -5.7815e-01, -2.6968e-01, -3.0195e-01,  6.1778e-02,
        -1.1317e-01, -1.2307e-01, -1.7683e-01, -1.5574e-01, -3.0010e-02,
        -7.5866e-05, -5.0242e-02, -7.1065e-02, -7.0918e-02, -1.2729e-01,
        -7.1088e-02, -3.2907e-02, -2.0248e-01,  8.6308e-02, -4.5523e-02,
        -5.0166e-01,  1.2329e-02, -1.6098e-02,  7.1354e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0217, -0.0691,  0.0380,  0.0413, -0.0270,  0.0016, -0.0116, -0.1011,
        -0.0595,  0.0527, -0.0201, -0.0060, -0.0176, -0.1662, -0.0551, -0.0022,
        -0.1725, -0.1067, -0.1343, -0.1716, -0.0636, -0.0525, -0.1249, -0.1885,
         0.1359, -0.1095, -0.1993, -0.1017, -0.0587, -0.1512,  0.0170,  0.0547,
        -0.0824,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5280e-01, -1.0538e+00,  1.4671e-02, -9.0087e-02, -4.7039e-02,
        -4.4075e-02,  7.5833e-02, -1.2379e-01, -4.7018e-01, -3.7602e-03,
        -8.0558e-02, -1.7965e-03,  7.2815e-01, -1.6384e-01,  2.2168e-04,
        -6.8742e-02, -1.3768e-01, -1.2952e-03,  1.0061e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2706, -0.0441,  0.0918, -0.2349, -0.2082, -0.0100, -0.2349, -0.1414,
        -0.4615,  0.0025, -0.0292, -0.1452, -0.0018, -0.0819, -0.0364,  0.0631,
         0.0411, -0.1436, -0.0294, -0.0208, -0.1844, -0.4762, -0.2658, -0.1475,
         0.2973,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3051, -0.8729, -0.1724,  0.0313, -0.5150, -0.3881, -1.7070,  0.0812,
        -0.2085, -0.1414,  0.1495,  0.0626, -0.0728, -0.0820, -0.2610,  0.4666,
        -0.3688,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0866,  0.0273, -0.1551,  0.0743, -0.0563, -0.1129, -0.0956, -0.1757,
        -0.0589, -0.2092, -0.0450, -0.1253, -0.0084, -0.0098, -0.0446, -0.0551,
        -0.1006, -0.4582,  0.0242, -0.0528, -0.0125, -0.2858, -0.4901, -0.0319,
        -0.0583, -0.1758, -0.2319, -0.0849,  0.0026,  0.0429, -0.0434,  0.0815,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4240, -0.6295, -0.1960, -0.1359, -0.0482, -0.0991, -0.1264, -0.3682,
        -0.0043, -0.0069, -0.0704, -0.0126, -0.1291, -0.0511, -0.0727,  0.1795,
        -0.0868, -0.1274, -0.0662,  0.0144,  0.0236,  0.1517, -0.3084, -0.3126,
        -0.1324, -0.0525, -0.6348, -0.0704,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0550, -0.3848,  0.0134, -0.0216,  0.1519, -0.0974, -0.3558, -0.0284,
        -0.0814, -0.0370,  0.0433, -0.2344,  0.0766,  0.0011,  0.0423, -0.0286,
        -0.0621, -0.2525,  0.0150, -0.0431, -0.0235, -0.0240,  0.0386, -0.0548,
        -0.1307,  0.1123,  0.0102,  0.0391, -0.0057,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0646, -0.2299, -0.0163, -0.1563, -0.3942, -0.2309, -0.0195,  0.0145,
         0.0101,  0.1263,  0.0611, -0.1064, -0.0583, -0.2792,  0.0359, -0.0738,
        -0.1721, -0.3348, -0.1305,  0.0185,  0.2062, -0.0314, -0.0393, -0.0487,
        -0.1077, -0.0273, -0.0041, -0.0910, -0.1203, -0.0338,  0.0021,  0.0272,
         0.0595,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1128, -1.0193, -0.0626, -0.2478, -0.0914, -0.2352,  0.0071, -0.1779,
        -0.0197, -0.0794, -0.0490, -0.0503, -0.1238, -0.0472, -0.0588, -0.0089,
        -0.1075, -0.0237, -0.3232, -0.1263, -0.2137, -0.1057, -0.0377,  0.1198,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3580e-01, -4.3654e-01, -2.4019e-01, -1.9149e-02, -5.6072e-02,
        -8.9395e-02, -1.6406e-01, -1.9246e-01, -3.7354e-02, -1.2027e-01,
        -1.1569e-01, -1.3332e-01, -1.2914e-01, -6.0637e-02, -2.4871e-02,
        -3.2715e-02, -2.3687e-04, -6.6873e-02, -5.1981e-02, -4.8696e-01,
        -8.5763e-02, -7.5283e-02, -1.3874e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0241, -0.3490, -0.0027, -0.0310,  0.0243, -0.1125,  0.0603, -0.0781,
        -0.0075, -0.1126,  0.0260, -0.0394, -0.0876,  0.0533, -0.0095,  0.0562,
        -0.1400, -0.1277, -0.0957, -0.1433,  0.0385, -0.0901, -0.0044, -0.0733,
        -0.1831, -0.1154, -0.0368, -0.0554, -0.0679, -0.2567, -0.1938, -0.1902,
        -0.1096, -0.1172, -0.0900, -0.1110, -0.1384,  0.1909, -0.0010,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0964, -0.1943, -0.1259, -0.0621, -0.2794, -0.0315,  0.0275, -0.0188,
        -0.1416,  0.0386, -0.0197, -0.0147, -0.0087, -0.0089,  0.0464, -0.1510,
         0.0312,  0.0418, -0.0414, -0.0302, -0.0715,  0.0174,  0.0022, -0.0775,
        -0.0558, -0.1031, -0.0480, -0.0080,  0.0040, -0.0500, -0.0270, -0.0109,
        -0.1779, -0.0471, -0.0137,  0.0067, -0.0501, -0.1583, -0.0335, -0.0126,
        -0.0279, -0.1812, -0.1711,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0295, -0.6911,  0.0311,  0.0129, -0.0295,  0.0446,  0.0167, -0.0375,
        -0.2473,  0.0627,  0.0814, -0.0012, -0.0780,  0.0296, -0.0330,  0.0163,
        -0.0186, -0.0791,  0.0181, -0.0193,  0.0501, -0.0623, -0.0154,  0.0009,
        -0.0550,  0.0317,  0.0049,  0.0137,  0.0382, -0.0141, -0.0453,  0.0129,
        -0.1991,  0.0281,  0.0031,  0.1304,  0.1022,  0.0113, -0.1119,  0.0151,
        -0.1019, -0.1771, -0.0526,  0.0432,  0.0457, -0.0931, -0.0426, -0.0094,
         0.0216,  0.0130, -0.0634, -0.0347, -0.0487, -0.0107,  0.0304,  0.0215,
        -0.1635], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4275e-01, -1.3358e+00, -1.4532e-01, -1.3290e-02, -9.0247e-02,
         2.5122e-01, -5.8320e-03, -2.0010e-01, -1.9172e-01,  1.6318e-01,
        -2.2313e-01,  1.1091e-03,  4.1945e-01, -2.0249e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0110, -0.2880, -0.1959,  0.2330,  0.1519, -0.1011, -0.0694, -0.0103,
         0.0388,  0.0244, -0.0501, -0.6448, -0.0413, -0.0311, -0.0396, -0.0534,
        -0.0357, -0.1212,  0.1241, -0.1583, -0.0473, -0.0832, -0.0309, -0.0701,
        -0.0331, -0.0133,  0.0285,  0.0621, -0.0013,  0.0944,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1994,  0.3085,  0.3579,  0.3057,  0.1643, -0.0059,  0.0024,  0.0413,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.1540, -0.2679,  0.0430, -0.0644, -0.0587, -0.2740,  0.0481,  0.0775,
        -0.0073, -0.0421,  0.0108, -0.1258, -0.1324, -0.1831, -0.0536, -0.0517,
        -0.0484, -0.0653, -0.0810, -0.0576, -0.0281, -0.0449, -0.0669,  0.0242,
        -0.1330, -0.0651, -0.0568,  0.1626, -0.1310, -0.2639,  0.0475,  0.1880,
         0.1115,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3312, -0.3238, -0.1673, -0.0137, -0.0477, -0.0622, -0.0161, -0.0019,
        -0.0154, -0.0865, -0.0424, -0.0169, -0.0550, -0.0342,  0.0368,  0.0548,
         0.0301,  0.0421, -0.0506,  0.0198, -0.0419, -0.0010, -0.0218, -0.0517,
        -0.0216, -0.0483, -0.0964, -0.2697, -0.0185, -0.0330, -0.0501, -0.0480,
        -0.0486, -0.2748, -0.0834,  0.0852, -0.0192, -0.0227, -0.0020, -0.0399,
         0.0200, -0.0011, -0.0185, -0.0898, -0.0343,  0.1474, -0.0034, -0.0017,
        -0.0007,  0.0242, -0.0258, -0.0428, -0.0103, -0.0312, -0.0245,  0.0043,
         0.0171, -0.0074, -0.0619, -0.0998, -0.0652, -0.0242, -0.2301, -0.0635,
         0.0130, -0.0223, -0.0006,  0.0875, -0.0148,  0.0842,  0.0329],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8637e-01, -7.9725e-02, -4.8680e-03, -5.4565e-02, -1.5085e-01,
        -4.5750e-02, -6.6490e-01, -8.4577e-02, -6.0870e-02, -5.5677e-02,
        -3.9857e-02, -4.4232e-02, -4.5586e-02,  2.7515e-02, -4.7668e-02,
        -5.8577e-02, -3.2321e-02,  6.3398e-02,  8.6416e-02, -2.2011e-01,
         4.5190e-02,  1.1922e-02,  2.1718e-02, -1.3044e-01,  5.4470e-02,
        -3.8462e-02, -6.8429e-02, -2.7291e-02,  2.5444e-02, -1.2478e-02,
         9.3058e-02,  1.5119e-02, -1.6856e-01, -1.1282e-01,  2.8289e-02,
        -6.4371e-02, -6.2333e-02,  6.3048e-02,  1.9292e-04, -2.9613e-02,
         2.0369e-01, -1.2086e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6770e-01, -4.0135e-01,  2.0597e-02,  2.8821e-02, -1.6057e-02,
        -1.2714e-02,  7.5487e-03,  3.5634e-02,  6.6762e-02, -1.0945e-01,
        -9.7516e-02, -4.5120e-02, -2.0815e-01, -4.8133e-02,  1.4209e-01,
        -6.9336e-02, -4.3676e-03, -1.1347e-01, -2.9494e-01, -3.6600e-02,
        -9.6351e-03, -2.3019e-02, -2.3652e-02, -3.9240e-02,  4.7476e-03,
        -1.1396e-01, -1.1819e-01, -8.3244e-02, -2.0829e-01, -6.4535e-02,
        -2.2410e-02, -5.7757e-02,  1.0972e-05, -1.0962e-01, -8.1321e-02,
         9.9567e-02, -9.1324e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5956, -1.2520,  0.3343, -0.3619, -0.0429,  0.1167, -0.0734, -0.1655,
        -0.0331, -0.1039,  0.0540, -0.0883, -0.1219, -0.0547,  0.1763,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3162,  0.0883,  0.0463,  0.0668, -0.0073,  0.0815,  0.0996,  0.1422,
         0.4619,  0.0812,  0.0935,  0.0698,  0.4467,  0.1625, -0.1704, -0.0257,
         0.3255,  0.0512,  0.1360,  0.0924,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0662, -0.0025, -0.0691, -0.0513, -0.4395, -0.1392, -0.0641, -0.0566,
        -0.0346, -0.0881, -0.0084, -0.0645, -0.1013, -0.0288, -0.0392, -0.0521,
         0.0354, -0.0636,  0.0532, -0.1054,  0.0027, -0.0286,  0.0128,  0.0053,
        -0.0146, -0.0627, -0.2251, -0.0188, -0.0028, -0.0966, -0.1046, -0.0407,
        -0.0080, -0.0523, -0.0708, -0.1793, -0.0645, -0.0514, -0.0092, -0.0794,
        -0.0292,  0.2982, -0.2318,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4720e-01, -1.5552e-02,  1.2942e-02, -7.0706e-02, -1.0443e-01,
        -9.6597e-02, -8.4534e-02, -3.6700e-01, -8.4973e-02, -5.5417e-02,
        -1.6590e-01,  2.3012e-02, -1.2760e-01, -9.6216e-02, -2.1890e-01,
        -1.8007e-02,  5.3311e-03, -2.6280e-02,  4.1373e-02, -6.6845e-02,
        -1.2089e-01, -7.6061e-02,  6.6286e-02,  6.6616e-02,  2.1314e-02,
         7.3963e-03,  6.0221e-02,  6.5978e-02, -2.7277e-01,  3.1263e-04,
        -4.6449e-02, -7.4781e-02, -6.6545e-02, -5.0739e-02, -1.2141e-01,
        -4.7410e-02,  1.2379e-01,  1.2613e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2598, -0.6287, -0.2814, -0.3707, -0.0212, -0.2596, -0.0762,  0.0795,
        -0.0152, -0.4833, -0.0334, -0.0830, -0.0676, -0.0360, -0.0637, -0.0757,
        -0.1374, -0.0404,  0.0524, -0.0479,  0.1381,  0.1077,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1484, -0.0971,  0.1331, -0.0355,  0.1052,  0.0444, -0.0924,  0.0702,
         0.0460, -0.2640, -0.0483, -0.0288, -0.0126, -0.0412, -0.0381, -0.1144,
        -0.3235, -0.0742, -0.0529, -0.2670,  0.0792,  0.0486, -0.0987, -0.0907,
        -0.1104, -0.0198, -0.0610, -0.0457,  0.0222, -0.1858,  0.0201, -0.0009,
         0.0739,  0.0781, -0.0533, -0.2659,  0.0874,  0.0353, -0.0853, -0.1637,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1939, -0.2129,  0.1142, -0.1808,  0.0546,  0.0626,  0.0773,  0.0177,
        -0.1462, -0.0433,  0.0110, -0.0005,  0.0626, -0.1555, -0.1227, -0.0768,
        -0.0488,  0.0119, -0.1538, -0.0150, -0.0590,  0.0150,  0.0704,  0.0580,
        -0.0741, -0.3122,  0.0747, -0.2077, -0.1633,  0.0510, -0.0071, -0.1314,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1166, -0.9674, -0.0596, -0.0215, -0.1685,  0.0367, -0.0227, -0.2170,
        -0.2049, -0.0986,  0.0141, -0.1186,  0.0045,  0.0538, -0.0814,  0.0406,
        -0.1053,  0.0747, -0.0110, -0.0100, -0.0474,  0.0218, -0.0362, -0.0714,
        -0.0090,  0.1207, -0.0501, -0.0409,  0.0393, -0.0602,  0.0259,  0.0343,
         0.0079, -0.0940,  0.0246,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.1838, -0.3562, -0.3228, -0.3023,  0.0807, -0.1489, -0.4592, -0.3128,
        -0.0689, -0.2572, -0.0692, -0.0499, -0.0053, -0.1824, -0.1150, -0.1160,
        -0.1125, -0.0458, -0.0383, -0.1488, -0.0899,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0136, -1.0322,  0.2453, -0.1383, -0.0528, -0.1093,  0.0204, -0.2191,
        -0.4997,  0.1149,  0.0291, -0.0140, -0.0867,  0.0753, -0.1522,  0.0887,
        -0.1721,  0.3539, -0.0380,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3746, -0.2440, -0.1273,  0.0121, -0.0239, -0.1080,  0.0885,  0.1050,
        -0.0686, -0.1046, -0.3755, -0.0042, -0.1640, -0.4562,  0.0981, -0.1310,
         0.0379, -0.1723,  0.0862, -0.1548,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6922e-04, -8.3929e-01, -4.7474e-02, -2.3064e-01,  8.7105e-02,
        -5.1027e-02, -1.9034e-01, -8.8860e-02, -4.2921e-02, -4.0053e-02,
        -5.1400e-02, -2.6243e-02, -5.9425e-02, -7.0834e-02, -3.2467e-02,
        -8.2720e-02, -2.5568e-01, -3.7451e-02, -4.6951e-02, -1.0306e-01,
        -1.6435e-02, -8.9149e-02, -5.5021e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1312, -0.6921, -0.1481, -0.2053,  0.0330, -0.0227, -0.0861, -0.0483,
        -0.0749, -0.5590, -0.0900, -0.1542, -0.0974, -0.2826,  0.0959, -0.0289,
        -0.0116,  0.0621, -0.0240, -0.0879, -0.1376, -0.0176, -0.0151,  0.0404,
         0.1604,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4811,  1.2013,  0.1738, -0.0615, -0.0547,  0.0081,  0.0272,  0.1195,
         0.1315,  0.0675,  0.2662, -0.0050,  0.0192,  0.2739, -0.1617,  0.2348,
         0.0794,  0.0505, -0.0076,  0.0189,  0.1843,  0.1044,  0.1460,  0.1444,
         0.0616,  0.0566, -0.0635, -0.0129,  0.0553,  0.0658, -0.0277,  0.0406,
         0.0294,  0.0320, -0.1984,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2890,  0.0986,  0.0162, -0.0736,  0.0110, -0.0052, -0.1296,  0.0343,
        -0.2320, -0.0447, -0.0888,  0.0447, -0.0331, -0.0626, -0.0961, -0.2130,
        -0.0303, -0.1524, -0.0982,  0.0024,  0.0484,  0.1007,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0498, -0.2997,  0.2422, -0.0976, -0.2863, -0.2279, -0.0789,  0.0388,
        -0.1028, -0.2034, -0.0976, -0.2112,  0.0367, -0.3521, -0.1351, -0.1351,
        -0.1252, -0.1458, -0.1004,  0.0268,  0.0286,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0596, -1.5427,  0.1210,  0.0093, -0.1052,  0.2398, -0.1480, -0.3359,
         0.0099,  0.1425, -0.0024,  0.0939,  0.1400, -0.0160,  0.0257, -0.2273,
         0.2782, -0.1132,  0.1918,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0421, -0.1594, -0.0625, -0.0487, -0.0644, -0.2273, -0.0412, -0.0406,
         0.0301,  0.0218, -0.0126,  0.0042, -0.0174, -0.0532, -0.0225,  0.0274,
         0.0022, -0.0709, -0.0014, -0.0326,  0.0103, -0.0178, -0.0750,  0.0112,
        -0.0711, -0.0098, -0.0294, -0.1396, -0.0613, -0.3620, -0.0249, -0.0516,
        -0.0745, -0.1645,  0.0015,  0.0086,  0.0540,  0.0184, -0.0179, -0.0731,
         0.0242, -0.0007, -0.0029,  0.0170,  0.0085, -0.0389, -0.0300],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3406, -0.4491, -0.1531, -0.0409,  0.0355, -0.0070, -0.0542,  0.0400,
        -0.0919,  0.0077, -0.0165, -0.0062,  0.0169, -0.0181, -0.0134,  0.0402,
        -0.3036,  0.0078, -0.1481, -0.0224,  0.0977,  0.0463, -0.0662, -0.1855,
         0.2050, -0.0581, -0.1652, -0.0431,  0.0785,  0.0975, -0.0042,  0.0399,
        -0.0180, -0.1251,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7956, -0.9824, -0.0054,  0.0646,  0.0861, -0.1395, -0.0418, -0.0752,
        -0.0308, -0.0013,  0.1256, -0.0160, -0.2192, -0.1701, -0.0234, -0.0520,
        -0.0057,  0.0941,  0.0085, -0.2720, -0.0591,  0.1875, -0.0123,  0.0764,
         0.0050,  0.0455, -0.0387, -0.0646, -0.0148, -0.0356,  0.0261, -0.1007,
         0.1799, -0.1517,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.1666, -0.9271, -0.0873,  0.0227, -0.0920, -0.2345, -0.1307, -0.0707,
         0.0311, -0.1833, -0.1213, -0.0864, -0.1056, -0.6642, -0.1415, -0.0487,
         0.0038, -0.0774,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3850,  0.0185,  0.1265,  0.1106,  0.0286, -0.0369,  0.0045, -0.0536,
         0.2434,  0.2860, -0.1375,  0.0957, -0.0545,  0.0664,  0.1605,  0.1491,
         0.0479,  0.1596,  0.1223,  0.2848, -0.0827,  0.2201,  0.0791,  0.0234,
        -0.0291, -0.0489, -0.0624,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2694, -0.0872,  0.1839, -0.0574,  0.0171,  0.0079, -0.0561, -0.0971,
        -0.3809, -0.0413, -0.0477,  0.0624, -0.4924, -0.2197, -0.0462,  0.0641,
        -0.0870, -0.1035, -0.0085, -0.2952, -0.0552,  0.0141, -0.0282,  0.0204,
         0.0235,  0.3101,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0410, -1.0103, -0.0351, -0.0489, -0.4639,  0.2457,  0.2351, -0.0599,
        -0.0651, -0.1808, -0.1529, -0.0356,  0.1797,  0.2254,  0.0874, -0.0098,
        -0.0157, -0.0556,  0.0600, -0.0515,  0.1661,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4133, -0.7752, -0.4556, -0.8823, -0.0184, -0.2391, -0.1523, -0.0568,
        -0.0416,  0.2099,  0.0374, -0.1051,  0.1131,  0.0676, -0.0680,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2424,  0.1205, -0.0372, -0.1597,  0.1983, -0.1599, -0.0757, -0.1172,
         0.0090, -0.2778,  0.0188, -0.2540, -0.3750,  0.0604,  0.1692, -0.2690,
         0.1035, -0.0228,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1235, -0.6122, -0.2746, -0.3354,  0.0131, -0.0114, -0.1585, -0.0078,
        -0.0494, -0.2358, -0.2150, -0.0426,  0.1084, -0.1648, -0.1370, -0.1462,
        -0.0091, -0.1547,  0.0613,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0418, -0.1798, -0.0600,  0.0303, -0.0374, -0.0498,  0.1169,  0.0183,
        -0.0553, -0.0230, -0.0821,  0.0042, -0.0376,  0.0032,  0.0323, -0.0437,
         0.0471, -0.0076,  0.1127, -0.0398, -0.3026,  0.0587, -0.2278, -0.1204,
        -0.1003, -0.0388, -0.1503,  0.1053, -0.0269, -0.1516, -0.0178, -0.1300,
        -0.1417,  0.0491, -0.0154, -0.0746, -0.1623,  0.0417, -0.0357,  0.0161,
         0.0404,  0.0877, -0.0567,  0.1752], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2186, -0.5471, -0.0576, -0.0469, -0.1774, -0.2299, -0.0279,  0.0265,
        -0.3466,  0.1425, -0.1442, -0.0843, -0.0214, -0.0047,  0.0935, -0.0087,
         0.1484, -0.3856,  0.0567, -0.0775, -0.1190, -0.0056, -0.0537, -0.0051,
        -0.0909, -0.0946, -0.0214, -0.1382,  0.0508, -0.1301, -0.1543, -0.1098,
        -0.0125, -0.0939, -0.0192,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0145, -0.9436, -0.1171, -0.1800, -0.1127,  0.0272, -0.2319, -0.1580,
        -0.2344,  0.0219,  0.0386, -0.0886, -0.1582, -0.0178,  0.0203, -0.0904,
         0.0323,  0.0177,  0.0717,  0.0093, -0.0352,  0.0209, -0.2757,  0.1430,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0388, -0.6674, -0.0748, -0.0944, -0.1751, -0.2132, -0.0573,  0.0652,
        -0.1073,  0.0237, -0.0326,  0.0274,  0.0792, -0.1744, -0.0549,  0.0235,
         0.2010,  0.0422, -0.4060, -0.0600, -0.0077, -0.0968, -0.0182, -0.1209,
        -0.1615, -0.1075, -0.0901, -0.0541, -0.0907, -0.0984, -0.1477,  0.0098,
        -0.1067,  0.1272,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5254, -0.1453, -0.0989, -0.2192, -0.1000,  0.0506, -0.2250, -0.0139,
        -0.0357, -0.1144, -0.1202, -0.0603, -0.3311, -0.1469, -0.3067, -0.2671,
        -0.0206,  0.0586, -0.0600, -0.3684,  0.0446, -0.0423, -0.0931, -0.1489,
        -0.0906, -0.0157, -0.0959, -0.2191, -0.0184, -0.3510,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.3126,  0.1172, -0.3882, -1.3537, -0.2406, -0.1650, -0.1277, -0.2344,
        -0.2017, -0.3948, -0.0190, -0.1353,  0.0285,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6471, -0.0215, -0.2940,  0.3502, -0.0548, -0.2664,  0.2536, -0.2813,
        -0.5845, -0.1002, -0.0602, -0.1544, -0.0235, -0.3501, -0.4916, -0.0199,
         0.1679, -0.0417,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0558,  0.5855,  0.3417, -0.0304,  0.6330,  0.2952,  0.0874,  0.4674,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3296, -0.0495, -0.0114,  0.0051,  0.0035, -0.0347,  0.0241, -0.0381,
        -0.0171, -0.0062,  0.0035, -0.0206, -0.0325,  0.0041, -0.0300, -0.0025,
         0.0232,  0.0052,  0.0313,  0.0183,  0.1045, -0.0135, -0.1108, -0.0778,
        -0.0370, -0.1084,  0.0106, -0.0932, -0.3672, -0.2081,  0.0014, -0.1626,
        -0.0560, -0.3745, -0.0476, -0.1483, -0.0992, -0.0499,  0.0219, -0.0108,
        -0.0095, -0.0322, -0.0193,  0.0374,  0.0465, -0.0838, -0.1122],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3663,  0.2958,  0.0379,  0.2507,  0.1333,  0.1581, -0.0331, -0.4136,
        -0.0063, -0.3411,  0.3056,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0717,  0.7222,  0.0234,  0.4630, -0.0296, -0.0458,  0.1715,  0.4278,
         0.2316,  0.2786, -0.0748,  0.0395,  0.3419, -0.0368, -0.1192, -0.0533,
         0.4361,  0.0193,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4710, -0.7219, -0.7446,  0.4254,  0.0159,  0.0085, -0.2616, -0.1580,
        -0.2519, -0.0843,  0.0412,  0.1453, -0.0050,  0.0480,  0.0861,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2158, -1.3621, -0.2993, -0.0478, -0.0207, -0.1746, -0.3415, -0.0870,
        -0.0842,  0.1836, -0.1144,  0.0091,  0.1581, -0.1123,  0.1399,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1664,  0.0622,  0.0591, -0.0898, -0.0993, -0.2364,  0.0813,  0.1707,
         0.1701,  0.2126,  0.1924,  0.2164,  0.2598,  0.0207,  0.0147, -0.0074,
         0.1555, -0.2906,  0.0833, -0.2307,  0.1096,  0.1043, -0.0661, -0.0816,
         0.1245,  0.0146,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1466, -0.3901,  0.0261, -0.2789, -0.0492, -0.3050,  0.0678, -0.3382,
        -0.8401, -0.0593, -0.0845, -0.1380,  0.0240,  0.0418, -0.0109,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0256, -0.8884, -0.1825, -0.0868, -0.3102, -0.1015, -0.2062,  0.0358,
         0.0100, -0.4653,  0.0231,  0.0547, -0.0678, -0.0238,  0.0357, -0.1215,
        -0.0817,  0.1784, -0.1690,  0.1378, -0.1100,  0.0253,  0.0495,  0.1664,
        -0.1696, -0.4480,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3374e-01, -1.3769e-02, -4.6398e-02, -2.0712e-01, -1.9394e-01,
        -1.8294e-01,  1.0556e-04,  8.8359e-03, -4.9710e-02, -1.9571e-01,
        -6.0808e-01, -9.4950e-02, -2.5529e-02, -7.8762e-03, -4.0838e-01,
        -1.1392e-01, -1.4155e-01, -2.2170e-01, -1.8591e-02, -7.3188e-02,
        -4.6810e-02, -6.5090e-02,  8.1797e-05, -9.2689e-02, -7.3533e-04,
         1.8627e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.8752e-01, -6.4277e-01, -6.3450e-02, -8.7343e-02, -2.3244e-02,
         6.3680e-02, -1.4987e-01, -7.2483e-02,  9.8021e-03, -2.5936e-02,
         1.5620e-02, -6.3260e-02, -7.2433e-02,  1.0263e-03, -3.3154e-02,
        -1.8824e-02, -1.4525e-01,  1.0185e-01, -1.5515e-01, -2.5508e-02,
        -8.0472e-02, -1.1866e-01, -2.2144e-02, -1.7902e-01, -1.2510e-01,
        -7.4154e-02, -1.2714e-01,  1.0954e-02,  2.2541e-02,  5.6187e-02,
         4.2110e-02, -3.8067e-02, -4.8008e-05, -1.8503e-03,  2.0621e-02,
        -3.5702e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2308, -0.3474, -0.1846, -0.1762, -0.1996, -0.1054, -0.1051, -0.3669,
        -0.0059,  0.0620,  0.0142,  0.0245, -0.1397,  0.1543, -0.0611, -0.0212,
         0.0228, -0.0156,  0.0577, -0.0469, -0.0598, -0.1882, -0.1165, -0.2014,
        -0.1349, -0.1100,  0.0594, -0.1136, -0.2228, -0.0924, -0.0020, -0.0575,
        -0.0671,  0.0385, -0.0805,  0.0251,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5065,  0.4714,  0.2517,  0.0716,  0.0309,  0.1853,  0.0689,  0.0107,
         0.0568, -0.1657, -0.1167,  0.0701, -0.0010, -0.0636, -0.0544, -0.1014,
         0.2068,  0.0862,  0.0705, -0.0960,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4164, -0.2642, -0.0182, -0.0415,  0.0628, -0.0136, -0.1531, -0.0715,
        -0.0105, -0.0852,  0.0016, -0.0755, -0.0195,  0.0090, -0.1696, -0.1391,
         0.0078, -0.0708,  0.0059, -0.0301,  0.0008, -0.0373, -0.0006, -0.0629,
        -0.1462, -0.0467, -0.0208, -0.0374, -0.0447, -0.2202, -0.0846,  0.0100,
        -0.0895, -0.3448,  0.1110,  0.0403, -0.1343, -0.1919,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2237, -0.2638, -0.1044, -0.3240, -0.0280,  0.0056, -0.0231, -0.0298,
         0.0296, -0.0253, -0.1322, -0.1682, -0.0452,  0.0138,  0.0099, -0.2033,
         0.0666, -0.1159, -0.0373, -0.0098,  0.0511,  0.0591,  0.0189, -0.0458,
         0.0332, -0.0053,  0.0079, -0.0595, -0.2576,  0.0065,  0.0052,  0.0327,
         0.0819,  0.0268, -0.0608, -0.0015, -0.0235,  0.1093, -0.0074,  0.0247,
        -0.0502], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5836, -1.3528, -0.1152, -0.1046, -0.0766, -0.0717,  0.0452, -0.0157,
        -0.0649, -0.0379,  0.0285,  0.0350, -0.1479, -0.0808, -0.0225, -0.1188,
        -0.0751, -0.1525,  0.0238,  0.1332, -0.0450,  0.2809,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1414, -0.0961, -0.0282,  0.0127,  0.0010, -0.1357, -0.0199,  0.0581,
        -0.0153, -0.0367, -0.0569, -0.1647, -0.0713, -0.0938,  0.0167, -0.1706,
        -0.0165, -0.1895, -0.0299, -0.2459, -0.1868,  0.0207,  0.0358, -0.0556,
        -0.0784,  0.0079, -0.0652, -0.0944, -0.1502, -0.1043, -0.0539, -0.0669,
        -0.0554,  0.0355, -0.0112,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6570, -0.1941, -0.2776, -0.1221, -0.1770, -0.0697,  0.0146, -0.0778,
        -0.1518, -0.0572, -0.1304, -0.0249, -0.0940,  0.0704,  0.0299, -0.2505,
         0.2227, -0.0746, -0.3875, -0.1655, -0.0965, -0.0520, -0.1083, -0.0582,
        -0.0906,  0.0164, -0.1189, -0.1944,  0.0785,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0392,  1.1603,  0.3251,  0.1143,  0.2378,  0.1867,  0.0707,  0.1092,
         0.1126,  0.0811,  0.0959,  0.1672, -0.1217,  0.0080,  0.0734,  0.0747,
         0.3001,  0.1246,  0.1504,  0.0086,  0.0621,  0.0612, -0.0231,  0.0676,
        -0.1886,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3252,  0.3374, -0.1311,  0.3445, -0.3093, -0.1223, -0.0176, -0.3047,
        -0.1792,  0.0260,  0.0365,  0.0366,  0.0142,  0.1153, -0.1968, -0.1760,
        -0.0824, -0.1971, -0.0990, -0.0979, -0.1259,  0.1351,  0.2929,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4567, -0.0606,  0.0289, -0.0151, -0.0427,  0.0744, -0.0365, -0.0613,
        -0.0032, -0.0271,  0.0403,  0.0762, -0.1735, -0.5036, -0.0461, -0.2090,
        -0.0765, -0.0816, -0.0747, -0.0156, -0.0208, -0.0126,  0.0360, -0.0428,
        -0.1236, -0.0824, -0.5391, -0.1572, -0.2711, -0.0659, -0.1013, -0.0131,
        -0.0767, -0.1033, -0.0318, -0.0244,  0.0454,  0.0625,  0.0374,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3519, -0.3898, -0.1904, -0.3429, -0.4340, -0.1259, -0.0702, -0.4234,
        -0.2448, -0.1604,  0.0380,  0.2020,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.2949, -0.3012, -0.1595, -0.0690, -0.0224, -0.0314, -0.1263,  0.0300,
        -0.0565, -0.1039, -0.0489, -0.0563, -0.0958,  0.0650, -0.1019,  0.0091,
        -0.0231, -0.0887, -0.0278, -0.1081, -0.0314, -0.0486,  0.0087,  0.0213,
         0.0428, -0.0327, -0.0763,  0.0633,  0.2254,  0.0196, -0.0906, -0.1514,
        -0.0957, -0.0163, -0.1409, -0.0859, -0.1281, -0.0508, -0.0022, -0.0139,
         0.0351, -0.0811,  0.0275, -0.1760, -0.0570,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2390,  1.0878, -0.1216,  0.0201, -0.3296,  0.0692,  0.2593,  0.1811,
         0.2792, -0.0777, -0.1613, -0.0402,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0411e-04, -8.9070e-01, -1.0672e-01, -4.4223e-01,  2.0978e-01,
         2.6405e-01,  2.5882e-01, -1.2261e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0317, -0.6333, -0.0446, -0.1857, -0.1024, -0.0785, -0.0037, -0.0124,
        -0.0247, -0.2704, -0.0984, -0.0417, -0.0135,  0.0098, -0.0356,  0.0846,
         0.1482, -0.0678,  0.0201,  0.0205, -0.0448,  0.0233,  0.0212, -0.0303,
        -0.0383, -0.1061, -0.0202, -0.0354, -0.0476,  0.0305, -0.1334, -0.0231,
        -0.0446, -0.0048, -0.0432, -0.0371, -0.0280,  0.0110,  0.0107, -0.0485,
        -0.0611, -0.0444, -0.0305, -0.0585,  0.0869,  0.0013, -0.0810, -0.1936,
        -0.1283, -0.1731, -0.0770,  0.0108, -0.0683, -0.0501, -0.0604, -0.0019,
        -0.0239,  0.0118, -0.0165,  0.0653,  0.0030], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2050, -0.0592,  0.0812, -0.0520, -0.2954, -0.0360, -0.1515, -0.1083,
        -0.0230,  0.0367,  0.0137, -0.1040, -0.0524, -0.1389, -0.2602, -0.0177,
         0.0047,  0.0088, -0.1354, -0.0456, -0.1397,  0.0219, -0.1595, -0.0634,
        -0.1023, -0.0819, -0.0154, -0.0004, -0.1174, -0.1686, -0.0033,  0.1191,
        -0.0168,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8048, -0.1987, -0.2607, -0.0581, -0.0166, -0.0080, -0.3071, -0.1166,
         0.0596,  0.1125, -0.0243, -0.0309, -0.0036,  0.0540, -0.0495, -0.2463,
         0.1491, -0.1550, -0.9114,  0.0191,  0.0890, -0.0140, -0.0970,  0.3148,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1411, -0.1586, -0.3735, -0.1481, -0.1680, -0.5790, -0.1344, -0.2081,
         0.1025, -0.1973, -0.3231, -0.2008, -0.3508, -0.1395,  0.2371, -0.0741,
         0.0564, -0.1331,  0.0297,  0.0048, -0.1218,  0.0067, -0.1962, -0.2032,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5002e-01, -1.6683e-01, -1.5777e-01, -2.1555e-01,  6.1096e-03,
        -1.0633e-02, -5.7979e-02, -4.6540e-02, -1.5519e-02, -3.3367e-02,
         5.7512e-03, -4.6184e-04,  7.2200e-02,  6.7227e-04, -1.4280e-01,
        -1.9924e-01, -2.0673e-01, -6.4173e-02, -4.9461e-02, -5.6273e-02,
        -1.5212e-01, -2.6801e-02, -9.2887e-02, -2.9234e-01, -2.6775e-02,
         1.3642e-01, -2.5548e-01, -1.7217e-02, -2.7880e-02,  9.4977e-02,
        -1.2706e-01,  8.1949e-02, -2.3328e-01,  1.9764e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2081,  0.0595,  0.0394,  0.0925, -0.0451,  0.0463,  0.0422,  0.0479,
        -0.0099, -0.0843,  0.0255,  0.0038, -0.0229, -0.9051, -0.0062, -0.0602,
        -0.2662, -0.0725, -0.1062, -0.0954,  0.0278, -0.1227, -0.0604, -0.0611,
        -0.0684,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0832, -0.1575, -0.0553,  0.0176, -0.1455,  0.0872, -0.2085, -0.0248,
        -0.2150, -0.9383,  0.0231, -0.0971, -0.0132, -0.1074, -0.0930,  0.1610,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0434, -0.6059, -0.0882, -0.3017, -0.1221,  0.0503, -0.9510, -0.0564,
         0.1079, -0.0197, -0.0385, -0.0693, -0.1165, -0.0620, -0.0831,  0.0467,
         0.0480, -0.1639, -0.1417, -0.0164,  0.0670,  0.3451,  0.0962,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2380e-04,  4.5963e-01, -4.0799e-02,  4.7014e-01,  5.8869e-01,
        -7.2051e-02, -7.4135e-02, -5.4079e-03,  8.1665e-02,  5.5556e-02,
         5.4896e-02,  7.0379e-02, -1.0483e-01,  8.4337e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.1846, -0.1844, -0.1862, -0.4595, -0.0479,  0.1462, -0.0845,  0.1239,
        -0.1364, -0.0919, -0.2537, -0.1964, -0.2845, -0.0636, -0.0086, -0.1385,
         0.0631, -0.4980, -0.1419, -0.2018, -0.2666, -0.0343,  0.0540, -0.1486,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1416, -0.0985, -0.2969, -0.0176, -0.2082, -0.1088, -0.7408, -0.0439,
        -0.1111, -0.1071, -0.0410,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1627, -0.0138, -0.4642, -0.1348, -0.1660, -0.2163, -0.4563,  0.2359,
        -0.1922,  0.0155,  0.0072,  0.0164, -0.0615,  0.0367, -0.1687,  0.0567,
        -0.0932, -0.2665, -0.0268, -0.2266,  0.0838,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0157, -0.0567, -0.0639, -0.4935,  0.0157,  0.0596, -0.1048, -0.3105,
        -0.0862, -0.4080, -0.1136,  0.1549, -0.0745,  0.2194,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4222, -0.0528, -0.0118, -0.0978, -0.1562, -0.2380, -0.0776,  0.0057,
         0.0883, -0.0640,  0.1053, -0.0308, -0.9687, -0.1161,  0.0387,  0.0450,
         0.0769,  0.0789,  0.0129, -0.0909,  0.2023,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4400, -0.8728, -0.0549, -0.2712, -0.1584, -0.2640, -0.0608, -0.6864,
         0.0236, -0.1385,  0.0027, -0.1516,  0.2191,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0635, -0.1072,  0.0326,  0.0196, -0.0083, -0.0133, -0.0277, -0.1077,
        -0.0114,  0.0460,  0.0241,  0.0173,  0.0272,  0.0477, -0.0056, -0.0279,
        -0.0065,  0.0569, -0.0096,  0.0658, -0.1283, -0.0091, -0.0032,  0.0262,
        -0.1526, -0.0012, -0.2749, -0.2113, -0.1225, -0.0236, -0.0442,  0.0379,
         0.0182,  0.0082,  0.0747,  0.0248, -0.0971, -0.0137, -0.0156, -0.0075,
        -0.0322,  0.0248, -0.0448,  0.0869,  0.0554], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1391e-02, -2.3499e-01, -7.4928e-02,  4.8701e-02, -1.1214e-02,
         6.1451e-04, -1.0084e-01, -6.0448e-02, -9.0169e-03,  3.7331e-02,
        -2.0210e-01,  4.5607e-02,  6.5592e-02,  5.9323e-03, -7.5653e-02,
        -5.2345e-02, -8.8464e-01, -5.2029e-02, -2.2674e-02, -6.4103e-02,
        -1.3405e-01, -7.9470e-02, -4.6257e-02,  4.6261e-02,  4.8574e-02,
        -1.8176e-02, -1.7628e-02, -2.6970e-02, -1.7017e-01, -5.9243e-02,
         3.3876e-02, -9.8692e-02,  3.1403e-01,  5.3967e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3659e-02,  2.0369e-01,  1.6273e-01,  1.7114e-01,  7.9351e-02,
         5.6049e-03, -4.3914e-02,  7.1655e-02, -2.6793e-01,  3.6240e-02,
         9.8918e-02,  8.4609e-03,  6.1261e-02,  1.2955e-04,  6.2490e-02,
        -8.9877e-02, -1.6263e-01, -8.9071e-02,  9.8090e-02,  3.5787e-01,
         6.0875e-01, -1.0655e-01,  3.7245e-02, -4.3033e-01,  2.0392e-02,
         1.3796e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3523, -0.8076, -0.0687, -0.0645,  0.0530,  0.0021, -0.0363, -0.0985,
        -0.0807,  0.0208, -0.1043, -0.0061, -0.0172, -0.0983, -0.1726, -0.1339,
        -0.4466,  0.0210, -0.1249, -0.0617, -0.0746, -0.1303,  0.2354,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0502, -0.2697,  0.1502, -0.0384,  0.0117, -0.0029, -0.0215, -0.0524,
        -0.1627,  0.0023, -0.0804,  0.0373, -0.1520, -0.1176, -0.7692,  0.0640,
         0.1546, -0.1188, -0.1451, -0.0718, -0.0067, -0.0131,  0.0347,  0.1092,
        -0.0156,  0.0112, -0.1674, -0.2899,  0.1377,  0.0605,  0.0398, -0.0976,
        -0.0027,  0.0212,  0.1042, -0.0685, -0.0660,  0.0189,  0.0201, -0.0790,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5044, -0.2561, -0.0373, -0.1174, -0.1492, -0.0766, -0.2037, -0.1678,
        -0.0602, -0.0545, -0.0188, -0.0367, -0.1132, -0.0758, -0.0991, -0.1086,
        -0.0951, -0.0528,  0.0286, -0.0675,  0.0342, -0.0556, -0.0212,  0.1060,
        -0.0254, -0.0397,  0.0286, -0.1153, -0.0969, -0.0357, -0.2612, -0.1110,
         0.1035, -0.0457,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.4368, -0.2951,  0.1146, -0.0380, -0.1235,  0.1214, -0.0565, -0.0826,
        -0.0038,  0.0883, -0.0471, -0.0469,  0.0308, -0.0888, -0.2896,  0.0801,
        -0.0559,  0.0035, -0.1303, -0.0019,  0.0122, -0.0285, -0.0262,  0.0466,
        -0.1057, -0.2001, -0.1261, -0.1159, -0.1537, -0.0201, -0.0503, -0.0144,
         0.0132, -0.0590, -0.1992, -0.0015, -0.0734, -0.0633, -0.0023, -0.0613,
        -0.0572,  0.0344, -0.0283, -0.0212, -0.0201, -0.0192, -0.0093,  0.0050,
        -0.0602, -0.0193,  0.0367, -0.0072], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1059, -0.1462,  0.0377,  0.0238, -0.1305,  0.0841, -0.1014,  0.1539,
        -0.0091,  0.1066, -0.0376, -0.0344, -0.0769, -0.2932, -0.1083, -0.1286,
        -0.1683, -0.1375, -0.1035, -0.0793, -0.0587, -0.0923, -0.0526, -0.1645,
        -0.0159, -0.0077,  0.0084, -0.0558, -0.1515,  0.0144, -0.1691,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1800,  0.4215, -0.1126,  0.0943,  0.0320,  0.0158,  0.0462, -0.0092,
         0.1221, -0.1370, -0.0024,  0.2539,  0.1883,  0.0160,  0.2048,  0.1839,
        -0.2829,  0.1497,  0.3462,  0.3645,  0.1042,  0.1688,  0.3576,  0.0513,
        -0.4541,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9432,  0.1085, -0.2823, -0.2015, -0.1117, -0.2082, -1.4340, -0.2503,
         0.1166, -0.1647,  0.2293,  0.0737,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1634, -1.3263, -0.0626, -0.1041, -0.2243,  0.0152,  0.0184, -0.0211,
        -0.0126,  0.0359, -0.0424,  0.0806, -0.0763, -0.2049, -0.1115, -0.0730,
         0.0389, -0.0375,  0.0768,  0.0140, -0.0462,  0.1092,  0.0665,  0.0768,
        -0.0484, -0.2571, -0.0729,  0.0400, -0.1205,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0394, -1.0221, -0.0230, -0.0934, -0.0787, -0.2738, -0.3968,  0.1799,
        -0.1815, -0.1224,  0.0767, -0.0881,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0743, -0.9697,  0.0731, -0.0499, -0.0168, -0.1640, -0.5283,  0.0735,
        -0.2173, -0.0334, -0.0868,  0.1785,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5300,  0.2364, -0.0951, -0.0364, -0.2984, -0.0492, -0.0495, -0.0436,
        -0.4215, -0.2093, -0.0016, -0.5623, -0.0515, -0.0923,  0.2654, -0.2032,
        -0.0208, -0.0646, -0.0676, -0.1511,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9062e-02, -9.9502e-01, -2.5227e-02, -2.6883e-02, -1.1696e-01,
        -4.9876e-02, -1.3023e-01,  2.6544e-02,  3.3035e-02, -2.7811e-02,
        -5.3984e-02, -1.3137e-01, -9.1209e-02,  4.4897e-02,  1.0071e-01,
         5.9969e-02,  2.5328e-02,  4.6573e-02, -5.9321e-04, -9.6687e-02,
        -2.9526e-02, -1.7560e-02,  6.4429e-02, -3.3133e-02,  5.6093e-02,
         2.8652e-02, -9.2447e-02, -5.5602e-02, -5.9538e-02,  1.3138e-02,
        -2.2554e-02, -7.1528e-03, -1.1780e-01, -4.6833e-01, -7.1881e-02,
         3.1823e-02,  8.2584e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5128,  1.5396, -0.0314, -0.1403, -0.1591, -0.1031,  0.2112, -0.2335,
         0.3099,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0155,  0.1646,  0.3046, -0.3050, -0.0687, -0.2157, -0.2150, -0.2254,
         0.0152, -0.1111,  0.0020, -0.1102, -0.1980, -0.2214,  0.0459,  0.0814,
        -0.0403, -0.0399, -0.1764, -0.1480, -0.0409,  0.0550, -0.0335, -0.0358,
        -0.0551,  0.0767, -0.0747,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8664, -0.9782,  0.0658, -0.0654, -0.1130, -0.0484,  0.0715, -0.0585,
        -0.2026, -0.2438,  0.1421, -0.1268, -0.1230,  0.0943, -0.1015, -0.0228,
        -0.1083,  0.0796,  0.1620, -0.1060,  0.3602, -0.0807, -0.1259,  0.0035,
        -0.0568, -0.1143,  0.0060, -0.0933, -0.4108,  0.0312,  0.0606, -0.0865,
         0.6784, -0.1800,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.1051, -0.0028,  0.0222,  0.0181, -0.0987, -0.0205,  0.0344,  0.0357,
        -0.0153, -0.0173, -0.0652, -0.2411, -0.1837, -0.0308,  0.0155, -0.1501,
        -0.2642,  0.0029,  0.0070,  0.0054,  0.0508,  0.0029, -0.1408, -0.0855,
         0.0020, -0.0447, -0.0627,  0.0292,  0.0357,  0.0460, -0.0333, -0.1381,
        -0.3835, -0.0832, -0.0309, -0.1030, -0.2599, -0.0364,  0.0764, -0.0029,
        -0.0134, -0.2535,  0.0360,  0.0467,  0.2350,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0909, -0.1736, -0.1596,  0.0272, -0.0431, -0.2400, -0.4288,  0.0175,
        -0.0131, -0.2208, -0.0607, -0.0571, -0.2804, -0.0918,  0.0312, -0.1673,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0765, -0.1056,  0.0217,  0.0198, -0.0292,  0.1218,  0.1747, -0.1143,
         0.1522,  0.1460,  0.1155,  0.3228,  0.0008,  0.0264,  0.1620, -0.0692,
         0.0697, -0.0256,  0.0258,  0.1839,  0.0164, -0.0031, -0.0040,  0.1100,
        -0.0585, -0.0023,  0.2433,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0446,  0.0603, -0.2456, -0.0404, -0.0795, -0.0223, -0.0909, -0.0811,
        -0.0643, -0.0084, -0.0241, -0.0280, -0.0468,  0.0062,  0.0063,  0.0105,
        -0.0285, -0.0049,  0.0381, -0.0723,  0.0985, -0.1086,  0.0361, -0.3494,
        -0.0048, -0.0517, -0.0830, -0.1478,  0.0180, -0.1844, -0.0817, -0.0452,
        -0.1622,  0.0781, -0.0567, -0.0591,  0.0041, -0.1260, -0.0522, -0.0196,
        -0.0284, -0.1241,  0.0103, -0.0546,  0.0941,  0.1183], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5850e-01,  2.6208e-02, -5.4583e-02, -1.1007e-01,  2.0192e-02,
        -1.9649e-01,  2.7879e-04, -1.9407e-01, -1.6219e-02, -3.3637e-02,
        -6.8759e-02,  1.2377e-01,  4.3185e-02, -7.3048e-02, -9.0183e-02,
        -1.3090e-02, -8.5765e-02, -4.2873e-02, -1.6194e-01, -5.8917e-01,
         9.3965e-03, -1.4383e-01, -4.8322e-02, -3.1649e-02, -6.0440e-02,
        -2.2392e-01, -1.3119e-01,  3.6085e-01,  4.3752e-02,  3.2960e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9941, -0.1350, -0.4521, -0.1474,  0.0268,  0.2127,  0.2126, -0.4215,
        -0.0116,  0.0834,  0.0068,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0640, -0.7491, -0.1351, -0.2568, -0.0805, -0.0522, -0.1506, -0.6392,
        -0.0500, -0.5225,  0.0057, -0.0256,  0.0130, -0.0598, -0.0690, -0.1212,
        -0.1526, -0.0797, -0.0546, -0.1921,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1835,  1.8705,  0.7461,  0.4431, -0.1499,  0.5013,  0.2986, -0.0496,
         0.1460,  0.1094, -0.2061,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4550, -0.5846, -0.2600, -0.3698, -0.1796,  0.0885, -0.0916, -0.1506,
        -0.3107, -0.1173, -0.0252,  0.0267, -0.0972, -0.2131, -0.1324, -0.1712,
        -0.0284, -0.0819,  0.0009, -0.2634,  0.4578,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0571,  0.2622,  0.2211, -0.0423,  0.0885,  0.8142, -0.6853, -0.0883,
         0.2250, -0.0446,  0.0681,  0.2058,  0.0114,  0.0207, -0.0070,  0.4016,
         0.0629,  0.0296,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1165, -0.1942, -0.5341,  0.0371,  0.0142, -0.0156, -0.0260,  0.0282,
        -0.0311,  0.0064, -0.0061,  0.0294, -0.0051, -0.0912, -0.1530,  0.0261,
        -0.0817, -0.1542, -0.0969, -0.0122, -0.0258, -0.1154, -0.2727, -0.0024,
        -0.0570, -0.0233, -0.0990, -0.0798, -0.0691, -0.2126, -0.0264, -0.0265,
        -0.0475,  0.0031, -0.0659, -0.0149, -0.0087, -0.0600,  0.0655,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1279, -1.5365,  0.0663, -0.0993, -0.0329, -0.0534,  0.1235,  0.0082,
         0.1322,  0.0609, -0.1011, -0.1645, -0.1248, -0.0433, -0.0937, -0.0982,
        -0.2581, -0.0298, -0.0129, -0.1667, -0.0292, -0.0183, -0.0140,  0.0347,
        -0.0139, -0.0230, -0.0932, -0.0049, -0.2493, -0.2957, -0.0172,  0.0132,
        -0.1424,  0.0777,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.2420, -0.8823, -0.3983, -0.5620,  0.0927, -0.1567,  0.2127, -0.0881,
        -0.2068,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6233e-02, -1.1092e+00, -2.5917e-01, -9.3100e-02,  8.9419e-02,
         6.6077e-02, -4.5519e-02,  4.9059e-04, -1.3911e-02, -1.3064e-01,
        -8.0782e-02, -5.7482e-02, -1.9587e-02, -3.3765e-02,  2.5975e-02,
        -3.6207e-01, -1.7225e-01, -4.0548e-01, -1.4514e-01, -1.4176e-01,
         1.2597e-01,  4.4855e-02, -2.7796e-01,  4.5161e-02, -2.2029e-01,
        -1.7443e-01, -1.6542e-01,  8.0898e-03,  8.1686e-02, -1.6919e-02,
        -4.7777e-02, -2.1480e-01,  2.9388e-02, -7.1263e-02,  2.4249e-02,
        -8.0825e-02,  9.5518e-02, -1.7162e-02,  2.4390e-02,  1.8937e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0620, -0.1518, -0.3694, -0.0700, -0.0713, -0.0165,  0.0339, -0.0318,
        -0.1009, -0.2097, -0.0089, -0.0127, -0.0214,  0.0103, -0.0034,  0.0392,
         0.0008, -0.0303, -0.0529, -0.0061, -0.0426, -0.0184,  0.1293, -0.0525,
         0.0410, -0.0611, -0.1141,  0.0144, -0.1082, -0.2969, -0.0297, -0.1062,
        -0.0030, -0.0077, -0.0312,  0.0307, -0.0926, -0.0666, -0.0048, -0.0151,
         0.0404, -0.0776,  0.0068, -0.0217, -0.0465,  0.0165,  0.0159, -0.0433,
         0.0533, -0.0454, -0.0116,  0.1255,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0600,  0.8195,  0.1892,  0.0579,  0.1784,  0.2645,  0.1399,  0.0653,
         0.0680, -0.0983,  0.2928,  0.1226,  0.2076,  0.0991,  0.1820,  0.1963,
         0.0850, -0.1331,  0.2421,  0.3083,  0.1335,  0.3539,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1273, -1.1953, -0.1354, -0.0311, -0.2179,  0.0682, -0.2472, -0.0404,
         0.1006,  0.0094, -0.1320, -0.0154, -0.1273, -0.0557,  0.0308, -0.2685,
        -0.1760,  0.1169, -0.0092, -0.0419, -0.0090,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5718e-02, -1.4719e-01, -2.1048e-02, -3.9069e-02,  2.4927e-02,
         3.1148e-02, -7.2231e-02, -7.2001e-03, -3.7751e-02, -2.8935e-02,
        -6.0591e-03, -3.2656e-02,  1.5740e-02,  5.8671e-02,  2.0679e-02,
        -2.5648e-04,  3.6753e-02, -1.7664e-02, -2.5499e-02,  1.5033e-02,
         8.7717e-03, -1.9461e-02, -3.4877e-03, -2.4086e-02, -2.6042e-02,
         5.1855e-02, -8.7058e-02, -1.1461e-01, -3.8684e-02, -5.2630e-02,
        -2.2180e-01, -1.2293e-01, -3.2032e-02, -5.8715e-02, -1.9894e-02,
        -2.5636e-02, -2.4154e-03,  6.0060e-02,  1.8039e-02, -5.1546e-02,
        -2.9308e-01, -6.3620e-02, -4.7051e-02, -1.8290e-03, -1.1419e-01,
        -3.7286e-02, -7.3195e-03, -6.7682e-02,  2.3732e-03, -1.0585e-02,
        -1.1258e-01, -8.9592e-03, -5.1088e-02, -1.5254e-01,  1.6502e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1562, -0.0328,  0.0805, -0.0997, -0.0066, -0.0838, -0.1152,  0.0063,
        -0.0949,  0.0173,  0.0443,  0.0229, -0.0063, -0.1034, -0.0255, -0.0693,
        -0.0894, -0.1110, -0.1092, -0.1439, -0.1723, -0.0782, -0.1391,  0.0255,
         0.0518,  0.0989, -0.1192, -0.4093, -0.1550, -0.1408, -0.1308,  0.0084,
        -0.1232, -0.4471, -0.0198, -0.1361, -0.0512, -0.1677, -0.1634, -0.0921,
        -0.0505,  0.0225,  0.0376, -0.1324, -0.0114,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4734, -0.7677, -0.2210, -0.2821,  0.1593, -0.1709, -0.0732, -0.0219,
        -0.2661,  0.2271, -0.0294, -0.1518, -0.0220, -0.2637, -0.2667,  0.2056,
        -0.1689, -0.3514,  0.1709, -0.0548, -0.0664,  0.1217, -0.0697,  0.1911,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2967, -0.0113, -0.0128,  0.0163, -0.0027,  0.0901, -0.0411, -0.0865,
        -0.0359,  0.0289,  0.0451,  0.0334, -0.0164, -0.2031, -0.1290,  0.0784,
        -0.2253, -0.1117, -0.0842, -0.1576,  0.0848, -0.0239, -0.1518, -0.2109,
         0.0542, -0.0863, -0.3044, -0.0734, -0.0829, -0.2707, -0.0155, -0.1324,
        -0.1222,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0518, -0.9912, -0.2123, -0.2252,  0.1051, -0.1073,  0.0903, -0.1105,
        -0.4051,  0.2572,  0.0425, -0.0036,  0.0091, -0.0854,  0.0919,  0.0436,
         0.0430, -0.1216,  0.0459,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0389, -0.0220,  0.0515, -0.1308, -0.1784, -0.0441, -0.2860, -0.0373,
        -0.2615, -0.0297,  0.0382,  0.1657, -0.0348, -0.0086, -0.0281, -0.0240,
        -0.0512, -0.1641, -0.0343, -0.0129, -0.1087, -0.4604, -0.0529, -0.0393,
        -0.0248,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1628, -1.0368,  0.0209,  0.0500,  0.0057, -0.4720, -0.3077, -0.0655,
        -0.3540, -0.1417, -0.1021, -0.0894, -0.0951,  0.0995, -0.2177, -0.1958,
         0.2503,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0397,  0.0672, -0.1422,  0.0367,  0.0078, -0.1244,  0.0297, -0.0323,
         0.0837, -0.1495,  0.0245, -0.0599,  0.0543,  0.0493, -0.0389, -0.0717,
        -0.0484, -0.2681,  0.0298, -0.1627, -0.0278, -0.3055, -0.4212,  0.0005,
         0.0989,  0.1160, -0.0205, -0.0437, -0.0549, -0.0304,  0.0015,  0.1407,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2131, -0.2908, -0.1275, -0.0859, -0.1097, -0.0651, -0.1212, -0.3977,
        -0.0383, -0.0235,  0.0008, -0.0260,  0.0730, -0.1490, -0.0945, -0.0123,
        -0.0851, -0.0824, -0.1691, -0.0300, -0.0069, -0.0017, -0.1735, -0.3485,
        -0.0389, -0.0689,  0.1846, -0.0849,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1151, -0.0471, -0.0224,  0.0959,  0.0339, -0.1177, -0.4232, -0.0172,
        -0.0354,  0.0032,  0.0282, -0.1856, -0.0072, -0.0469,  0.0030, -0.0499,
        -0.0282, -0.1663,  0.0222, -0.0054, -0.0343, -0.0072,  0.0206, -0.0958,
        -0.1379, -0.0177,  0.0521, -0.0089, -0.1185,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2662, -0.0450, -0.0274, -0.0952, -0.3703, -0.0569,  0.0408, -0.0337,
        -0.0006,  0.1033, -0.0561, -0.0748, -0.1282, -0.3283,  0.0416, -0.0989,
        -0.0777, -0.3925, -0.0621, -0.1118, -0.0160, -0.0028, -0.0257, -0.0362,
        -0.0960, -0.0418, -0.0543, -0.0248,  0.0289,  0.0056, -0.0274, -0.2778,
        -0.0009,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2397, -0.5002, -0.0184, -0.0956, -0.1247, -0.2198,  0.1570, -0.0184,
        -0.0877, -0.1355, -0.0921, -0.0466,  0.0039, -0.0761, -0.1692, -0.0073,
        -0.0668, -0.0120, -0.4966, -0.0741, -0.3373, -0.1007, -0.1154, -0.1865,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1237, -0.3422, -0.2603, -0.0632, -0.1582, -0.1668, -0.1802, -0.1590,
        -0.0068, -0.1160, -0.0584, -0.0690, -0.1186,  0.0063, -0.0455, -0.0178,
         0.0157, -0.0689, -0.0251, -0.2498, -0.0568,  0.1262, -0.0346,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1233, -0.3105,  0.0890,  0.1005,  0.0052, -0.1660,  0.0341, -0.1216,
         0.0041, -0.0461,  0.0244, -0.0327, -0.0584,  0.0550,  0.0206,  0.0026,
        -0.1881, -0.1556, -0.0834, -0.0631,  0.0686, -0.1490,  0.0214, -0.1053,
        -0.2343, -0.0737, -0.0603, -0.1239, -0.0787, -0.2645, -0.2693, -0.0315,
        -0.0687,  0.0689, -0.0820, -0.0710, -0.0540,  0.0330, -0.1710,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3786, -0.2800, -0.0649, -0.0708, -0.4831, -0.0320, -0.0170,  0.0158,
        -0.1640,  0.1229, -0.0205, -0.0861,  0.0089,  0.0623, -0.0452, -0.1319,
         0.0510, -0.0194,  0.0099, -0.0905, -0.2648, -0.0934, -0.0832, -0.1373,
        -0.2263, -0.2211, -0.0211, -0.0624,  0.0221, -0.1178, -0.0945, -0.1058,
        -0.0621, -0.0496, -0.0255, -0.0267, -0.0514, -0.2427, -0.0158, -0.0257,
        -0.0082, -0.1003, -0.0606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3014, -0.8023,  0.0196,  0.0681,  0.0766, -0.0512, -0.0949, -0.0559,
        -0.1945,  0.0365,  0.1061, -0.0109, -0.0749,  0.0257,  0.0241, -0.1009,
        -0.0672, -0.1711,  0.0397, -0.0354, -0.0137, -0.0878, -0.0951, -0.0375,
         0.0187, -0.0070,  0.1011,  0.0148, -0.0616,  0.0392,  0.0141, -0.0642,
        -0.1166,  0.1432,  0.0127, -0.0051, -0.1610, -0.0868, -0.1035, -0.0353,
        -0.1045, -0.0737, -0.0145, -0.0119,  0.0088, -0.0757, -0.0299, -0.0545,
         0.0364, -0.0482, -0.0178, -0.0243, -0.1763,  0.0086,  0.0976,  0.0740,
        -0.1194], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0464, -2.3092, -0.3451, -0.0258, -0.2102, -0.0762, -0.1162, -0.4822,
        -0.1135, -0.2861, -0.3957, -0.0228, -0.0803, -0.1061,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3870, -0.7112, -0.3809, -0.0900, -0.0647,  0.0370,  0.0763, -0.0451,
        -0.0029, -0.0176, -0.0633, -0.5515, -0.3442,  0.0950,  0.0060, -0.3325,
         0.0762, -0.2104, -0.0090, -0.1494, -0.3644, -0.1385, -0.1082, -0.0060,
         0.0645, -0.0218,  0.0434, -0.0479, -0.1311,  0.4418,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.3933, 0.5156, 0.1201, 0.4584, 0.2344, 0.0902, 0.1178, 0.0675, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2907, -0.3045, -0.0338, -0.1721, -0.1037, -0.4296, -0.0199,  0.0294,
         0.0080, -0.0418, -0.0957, -0.1053, -0.1490, -0.3322, -0.0051, -0.0042,
        -0.0581, -0.0045, -0.0453, -0.0235,  0.0026, -0.0328, -0.0302, -0.1134,
        -0.2254, -0.0630, -0.1728,  0.1610, -0.2525, -0.2552,  0.0398,  0.0210,
         0.1655,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2201e-02, -4.1758e-01, -2.1842e-01,  3.1192e-02, -1.8028e-02,
         5.0945e-02,  7.0765e-02, -8.7217e-02,  2.2236e-03, -1.5050e-02,
        -1.6049e-03, -1.7249e-02, -4.6144e-02, -1.8694e-02,  3.7826e-02,
         9.4731e-03,  8.5202e-03,  4.1878e-02, -7.1885e-03,  1.1869e-02,
         1.9872e-02, -1.7573e-02,  9.5531e-04, -4.0238e-02,  4.7060e-02,
        -5.6916e-02, -6.4448e-02, -1.2788e-01,  3.7484e-02, -8.5866e-03,
        -3.1648e-02,  1.1941e-02, -6.0032e-02, -3.1640e-01, -3.7270e-03,
        -4.5828e-02, -3.9113e-02,  1.5318e-03, -3.9470e-02, -3.8326e-02,
        -3.2539e-02, -1.6314e-02, -5.3304e-03, -8.9255e-02, -3.1362e-02,
         4.3267e-02, -1.2491e-01,  1.0733e-02, -1.1671e-01, -1.3312e-01,
        -2.7894e-02, -3.7631e-02, -1.6947e-02, -2.0019e-02,  2.0665e-02,
         5.1587e-02, -1.8560e-03, -2.1938e-02, -3.4857e-02, -6.0118e-02,
        -4.5407e-02, -2.9747e-02, -2.2439e-01, -3.9843e-02,  1.1219e-02,
        -7.8169e-03, -7.4909e-03,  5.4280e-02,  2.8573e-04, -3.7462e-02,
        -6.4373e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1686, -0.1112, -0.0406,  0.0320, -0.0963, -0.0032, -0.5007, -0.0651,
        -0.0570,  0.0072, -0.0333, -0.0673,  0.0255, -0.0461, -0.0247, -0.0049,
        -0.0356,  0.0616, -0.0301, -0.1094, -0.0404, -0.0713,  0.0584, -0.1650,
         0.0773, -0.1011, -0.1049, -0.0489, -0.0119, -0.0516, -0.0814,  0.0069,
        -0.1170, -0.1417, -0.0310, -0.0147, -0.0395,  0.0358, -0.0403,  0.0915,
         0.3084,  0.0611,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1368, -0.4228, -0.0077, -0.1129, -0.0087, -0.0198, -0.0225,  0.0099,
        -0.0429, -0.1171, -0.1129, -0.0701, -0.1600, -0.0219, -0.0369,  0.0729,
         0.0654, -0.1579, -0.1731, -0.1000, -0.0602, -0.0126,  0.0384, -0.0056,
        -0.0993, -0.1691, -0.0799, -0.0392, -0.2283, -0.0935, -0.0076, -0.1035,
         0.0306, -0.0366, -0.0881, -0.2485,  0.1904,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1297,  0.9904,  0.0455,  0.4119,  0.1327,  0.0276,  0.0779,  0.0163,
        -0.0657,  0.1496,  0.1481,  0.0764,  0.1653,  0.1670, -0.0373,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4532,  0.0527,  0.0487,  0.0658,  0.0994,  0.1849,  0.0590,  0.1236,
         0.4129,  0.0641,  0.0055,  0.1789,  0.4621, -0.1359, -0.1371, -0.0484,
         0.0558,  0.0070,  0.0893, -0.2617,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6021e-02,  1.8860e-03, -2.7225e-03, -4.4070e-02, -2.0124e-01,
         1.5872e-02, -6.2027e-02,  2.3433e-02,  3.3612e-02, -7.9295e-02,
        -4.4949e-02, -8.6808e-02, -1.2402e-02,  1.3929e-04, -4.0281e-02,
        -7.1753e-02,  4.0568e-02, -6.3643e-02,  3.3388e-02, -8.4773e-02,
         1.0961e-02,  2.0828e-04, -3.9684e-02, -3.5737e-02, -5.2506e-02,
        -5.7234e-02, -1.8123e-01, -4.4455e-03, -2.0063e-03, -5.6795e-02,
        -9.8918e-02, -1.1947e-02, -1.6665e-02, -6.9952e-02, -6.4609e-02,
        -9.8308e-02,  7.8867e-02, -9.8376e-03, -9.0455e-03,  2.7015e-02,
        -5.9520e-03,  1.2841e-02,  3.3087e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1077, -0.0376, -0.0073, -0.0355, -0.0889, -0.1245, -0.0655, -0.3897,
        -0.1116, -0.0697, -0.0905, -0.0175, -0.2010, -0.1299, -0.1783, -0.0708,
        -0.0060,  0.0454, -0.0354, -0.0075, -0.1499, -0.0444, -0.0454,  0.0745,
        -0.0103, -0.1319, -0.1327, -0.0032, -0.1310, -0.1012,  0.0082, -0.1226,
        -0.0423, -0.0918, -0.1013, -0.0639, -0.0369, -0.0310,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1521, -0.6937, -0.3251, -0.2838,  0.0527, -0.5302, -0.1579,  0.0303,
         0.0050, -0.3297, -0.0123, -0.0353, -0.2164, -0.0223, -0.0785, -0.3733,
        -0.0209, -0.1842,  0.1501,  0.0134, -0.4613, -0.0878,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0151, -0.1678,  0.1240,  0.0630,  0.0146, -0.0567, -0.0210,  0.1281,
         0.0780, -0.2919, -0.0027,  0.0041, -0.0351,  0.0321, -0.0503, -0.0921,
        -0.5635,  0.0017, -0.0496, -0.2620,  0.0207, -0.1767, -0.0843, -0.0799,
        -0.0447, -0.0375, -0.0507,  0.1033,  0.0480, -0.0907, -0.2318,  0.0456,
         0.0092,  0.0414, -0.0361, -0.4764,  0.1073,  0.0304, -0.0390, -0.1619,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1453, -0.1594,  0.0124, -0.0294, -0.0845, -0.1107,  0.0188,  0.0511,
        -0.1515, -0.0857, -0.0076, -0.0083, -0.0401, -0.4668, -0.2190, -0.0735,
        -0.1085, -0.0625, -0.1518, -0.0442, -0.0275, -0.0190, -0.0262, -0.0878,
        -0.1214, -0.3565, -0.0162, -0.1885, -0.2664, -0.0105,  0.0430,  0.2289,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7490e-01, -1.1922e+00, -1.3829e-01,  1.0207e-01, -1.2196e-01,
         4.6184e-02, -7.5113e-02, -1.5710e-01, -3.9372e-01, -3.3305e-02,
        -4.3260e-02, -1.1039e-01, -3.7943e-04,  1.1357e-02,  9.9145e-03,
        -5.6176e-03, -2.2230e-01, -4.9463e-02, -4.4206e-02, -2.8599e-02,
        -9.4160e-03, -2.9388e-02,  5.2401e-02, -1.1236e-01,  2.0885e-02,
         1.6982e-01, -4.1066e-02, -4.5768e-02,  9.9488e-02,  1.4778e-02,
         4.9340e-02, -8.0644e-04, -2.4790e-02,  1.0195e-01, -7.2476e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.1540,  0.1583,  0.4321,  0.4300,  0.0874,  0.0450,  0.5237,  0.2635,
         0.3537,  0.1968,  0.1159,  0.1570,  0.0183,  0.2007,  0.1110,  0.0750,
         0.1247,  0.0617, -0.0152,  0.0333,  0.0817,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5787, -1.1654, -0.0697, -0.1201, -0.2061, -0.1677, -0.2412, -0.3385,
        -0.6700, -0.0213,  0.0261,  0.0775,  0.0618,  0.1605, -0.1779, -0.0078,
        -0.0922, -0.0249,  0.2312,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5560, -0.5602, -0.3081, -0.1678, -0.1438, -0.0426,  0.0311,  0.0616,
        -0.0904, -0.3496, -0.0429,  0.2139, -0.0781, -0.7033,  0.1157, -0.1079,
         0.1399, -0.0404,  0.0396,  0.2266,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3362, -0.6395, -0.1404, -0.2668, -0.0697, -0.0232, -0.1463, -0.1602,
        -0.0066, -0.0671, -0.0418, -0.0668, -0.0080, -0.1250, -0.0164, -0.1386,
        -0.2607,  0.0215, -0.0688, -0.1142, -0.0497,  0.1956, -0.0761,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1313,  0.8743,  0.0121, -0.0975, -0.0227,  0.1468,  0.0921,  0.1427,
         0.0783,  0.4837,  0.1711,  0.1863,  0.1471,  0.4135,  0.0446, -0.0233,
        -0.1676, -0.0332,  0.0663,  0.0645,  0.0215, -0.0472,  0.1824,  0.1862,
        -0.1428,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5467e-01,  9.3897e-01,  4.4313e-02,  3.4842e-01,  6.9944e-02,
         1.3391e-01, -1.0866e-01,  7.4442e-02, -1.5474e-01, -2.3067e-02,
         1.3359e-01, -2.0758e-02, -2.4327e-03,  1.0856e-02,  9.7235e-04,
         4.0328e-02,  7.1461e-02,  2.3406e-02, -3.3741e-02,  6.4840e-02,
         5.9838e-02,  6.9586e-02,  9.5505e-02,  1.2199e-01, -2.1453e-02,
         1.3593e-01,  1.4463e-01, -2.1098e-04, -2.3764e-02, -7.2782e-02,
        -6.1832e-02,  1.6358e-02, -1.5608e-02,  9.2994e-02,  8.2098e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4129,  0.0771, -0.0310, -0.2106, -0.0365,  0.0007, -0.1669, -0.1273,
        -0.3884, -0.1043,  0.0499,  0.0454,  0.0447, -0.1459, -0.1169, -0.1788,
        -0.2054, -0.3110, -0.0925,  0.1418, -0.0457,  0.0380,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1553,  0.2508, -0.1045,  0.1040, -0.0207,  0.3331,  0.0012, -0.0462,
         0.0818,  0.4128,  0.2858, -0.0946,  0.1481,  0.0300,  0.1623,  0.1732,
         0.2071, -0.0386,  0.0593, -0.2897, -0.0731,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0184e-01, -1.0904e+00, -2.0955e-02, -9.2219e-02,  8.5453e-02,
         5.9663e-02, -1.3648e-01, -4.8813e-01, -6.9257e-02,  7.6942e-02,
        -7.7940e-02,  2.5371e-02, -6.3982e-03,  3.5974e-02, -6.7087e-02,
        -1.3017e-01,  3.4031e-01, -1.1750e-04, -1.2514e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1430, -0.1203, -0.0880, -0.0005, -0.0656, -0.1809,  0.0806,  0.0054,
         0.0838,  0.0006, -0.0350, -0.0389, -0.0337, -0.1155, -0.0079,  0.0528,
        -0.0711, -0.1760, -0.0130, -0.0878, -0.0012, -0.0415, -0.1102, -0.0211,
        -0.0441, -0.0501,  0.0561, -0.1471, -0.0669, -0.2800,  0.0816, -0.0435,
        -0.1206, -0.2728, -0.0472, -0.0093,  0.0594,  0.0014,  0.0159, -0.0243,
         0.0675,  0.0116,  0.0296, -0.0115,  0.0775,  0.0399,  0.1846],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0770, -0.2592, -0.1712, -0.0236,  0.0380, -0.0181, -0.0158,  0.0110,
        -0.0507,  0.0388, -0.0059,  0.0677, -0.0204, -0.0432, -0.0157, -0.0168,
        -0.3642,  0.0282, -0.0836, -0.0041, -0.0986, -0.1353, -0.1754, -0.5322,
        -0.0953,  0.1152, -0.2345, -0.1137,  0.0475,  0.0363,  0.0038,  0.0744,
        -0.0257,  0.0242,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5986e-01, -1.5616e+00,  1.7704e-01,  9.4790e-03, -8.8689e-03,
        -8.7870e-02, -3.1606e-02, -5.9958e-02, -1.4409e-02, -2.2540e-02,
         1.0850e-01, -2.1685e-02, -1.0536e-01, -3.5496e-01,  8.6471e-02,
         1.0849e-01, -2.8343e-02,  1.4810e-02, -5.3636e-03, -1.4721e-01,
        -3.9703e-02, -8.4834e-03, -3.7792e-02, -9.2737e-03,  1.1437e-02,
        -3.8411e-02,  3.3301e-04, -2.4572e-02, -1.2080e-02, -2.7993e-02,
        -7.6605e-02, -5.0427e-03,  1.7404e-02, -3.5808e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.1758,  1.1471, -0.5480, -0.0952,  0.1060,  0.1143, -0.0569, -0.1616,
         0.1389,  0.1228,  0.1502, -0.0328,  0.2995,  0.5452,  0.3120,  0.1500,
        -0.0288,  0.2147,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7057, -0.0401,  0.1229, -0.2535,  0.1038, -0.0798,  0.0692, -0.0289,
         0.1112,  1.0323,  0.0693,  0.0413,  0.1444,  0.1165,  0.0466,  0.0398,
         0.2457, -0.0980,  0.1516,  0.5912,  0.0497, -0.1821, -0.0145, -0.0936,
        -0.0015, -0.2368, -0.5048,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4674e-01, -2.6503e-01, -9.3049e-02,  9.6612e-02,  8.4625e-02,
         1.5064e-01, -1.2434e-01, -1.0070e-01,  5.8229e-01, -3.6023e-02,
         8.0731e-02,  1.5432e-01,  6.9403e-01,  4.6421e-02,  2.1698e-02,
        -8.1609e-02,  7.3920e-02, -9.2890e-05, -6.5442e-02,  8.3861e-01,
         9.5446e-02,  1.8739e-03, -2.7740e-02,  1.2058e-03,  9.6604e-02,
         5.5114e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6677,  0.7486,  0.0159,  0.1976,  0.8176,  0.1873, -0.1271,  0.0746,
         0.1594,  0.2320,  0.2969,  0.0664,  0.0436, -0.0572,  0.1477,  0.0798,
         0.0414,  0.0715, -0.0843, -0.1890,  0.1127,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1829,  1.6244,  0.5667,  1.1101, -0.1237,  0.1091,  0.4919, -0.0152,
         0.1087, -0.4201,  0.0966,  0.2412,  0.0787, -0.1439,  0.3422,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4071,  0.0299,  0.1250, -0.0030,  0.0130,  0.1900,  0.1034,  0.0155,
         0.0265, -0.0583, -0.0136,  0.1215,  0.5209,  0.1761,  0.1819, -0.0703,
        -0.1630, -0.0897,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0284, -0.8385, -0.3112, -0.8036, -0.0767, -0.0378, -0.1023,  0.0759,
        -0.2220, -0.2258, -0.2301, -0.0268,  0.0853, -0.1651, -0.0734,  0.0034,
         0.0612,  0.2184,  0.3541,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1973, -0.3206,  0.0950, -0.0019,  0.0456, -0.0519,  0.0464, -0.0026,
        -0.0736, -0.0182, -0.0443,  0.0072, -0.0312,  0.0203, -0.0421, -0.1085,
        -0.2561, -0.0284,  0.1307,  0.0890, -0.3348, -0.0329, -0.1985, -0.1006,
        -0.1268, -0.1080, -0.1184, -0.0089, -0.0231, -0.1379, -0.0102, -0.0113,
        -0.1211, -0.0308, -0.0042, -0.0239, -0.1640, -0.0154, -0.0639,  0.0370,
         0.0658,  0.0638, -0.0956,  0.0746], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9201e-01, -5.8181e-01, -1.6587e-01, -1.9204e-01, -7.6251e-02,
        -5.6757e-02, -1.7715e-02, -1.8055e-02, -1.2992e-01,  7.2508e-02,
        -9.8747e-02,  4.4712e-02,  8.8206e-02, -1.4640e-02,  1.2542e-01,
        -6.9407e-04, -1.8850e-02, -1.3010e-01,  3.7239e-03, -7.4557e-02,
        -1.7179e-01,  2.4262e-02,  3.5541e-02,  1.8728e-02,  4.0601e-02,
        -4.7047e-03,  4.1358e-04, -2.1587e-01,  8.4029e-02, -2.5493e-02,
        -4.6020e-02, -1.3009e-01,  2.9436e-02, -2.7250e-01,  3.8809e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2703, -0.6878, -0.0087, -0.1181, -0.0571, -0.0042, -0.2234, -0.2380,
        -0.0252,  0.0105,  0.0065, -0.0438, -0.4234,  0.0015,  0.0008, -0.2910,
        -0.1748,  0.0202, -0.0228,  0.0785,  0.0306, -0.0364,  0.0986,  0.1204,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5055, -1.3997, -0.1855,  0.1606,  0.0256, -0.1035,  0.0182,  0.0884,
        -0.2711,  0.0247,  0.0926,  0.0220,  0.0152, -0.1784, -0.3276,  0.1274,
         0.0600,  0.0889, -0.2903, -0.0178, -0.0616, -0.1490, -0.0336, -0.1016,
         0.0417, -0.3040, -0.0528, -0.1268, -0.0225,  0.1330, -0.1503, -0.1133,
        -0.0936,  0.2342,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0690, -0.0204,  0.0237, -0.1681,  0.1083,  0.1052, -0.3672, -0.0784,
        -0.0505, -0.0826, -0.0479, -0.0096, -0.2218, -0.1418, -0.2498, -0.6756,
        -0.0708,  0.1073, -0.0656, -0.0204,  0.0490, -0.0249, -0.0116, -0.1995,
        -0.0047, -0.0523, -0.0916, -0.0794, -0.0329, -0.1311,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.3394, -0.1507, -0.2862, -1.3221, -0.0660,  0.0525, -0.0816, -0.1616,
        -0.2805, -0.6697, -0.0031,  0.0228, -0.2915,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2618, -0.2244, -0.0393,  0.0060, -0.0601, -0.1718,  0.1213, -0.0615,
        -0.6364, -0.0863, -0.0948, -0.1418, -0.2034, -0.0382, -0.2322, -0.0242,
         0.0404,  0.0217,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3077,  0.6379,  0.1693, -0.1349,  0.4445, -0.0136,  0.2098,  0.4894,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1263, -0.5701, -0.0608,  0.0422, -0.0955, -0.0650, -0.0258,  0.0068,
         0.0126, -0.0147,  0.0214,  0.0138, -0.0102, -0.0249, -0.0032, -0.0126,
         0.0177,  0.0044, -0.0388,  0.0214, -0.0533, -0.0284, -0.0996, -0.0110,
        -0.0105, -0.0794, -0.0830, -0.0508, -0.4208, -0.0854,  0.0670, -0.2175,
        -0.0564, -0.3097, -0.0217, -0.1842, -0.1191, -0.0570,  0.0646,  0.0221,
        -0.0171,  0.0289, -0.0350,  0.0122,  0.1101,  0.0087, -0.0125],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5265,  1.0812,  2.3838,  0.7628,  0.7729,  0.5049,  0.3488,  0.8613,
         2.4032,  0.0147, -1.5476,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2159,  0.6959,  0.2076,  0.3018,  0.0750,  0.0225,  0.1031,  0.7629,
         0.2723,  0.1708,  0.2082,  0.0711,  0.3110,  0.0926,  0.0215, -0.1096,
        -0.1048,  0.1018,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2396, -0.0293,  0.6290, -0.1777, -0.0367, -0.0758,  0.1891,  0.1296,
         0.2762,  0.1533, -0.1413, -0.0659,  0.3138,  0.0708,  0.3428,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7077, -1.3804, -0.1635, -0.0980,  0.1927, -0.1747, -0.4501,  0.1653,
         0.0365, -0.1940, -0.3133, -0.0368, -0.0333,  0.2261,  0.0036,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4228, -0.1416,  0.0142,  0.0854,  0.0542,  0.0395,  0.0544, -0.0483,
        -0.2663, -0.1815, -0.1473, -0.2251, -0.2524,  0.0070,  0.0595, -0.0377,
        -0.1962,  0.0513,  0.0254,  0.0280, -0.0561, -0.2670,  0.0112,  0.0979,
        -0.0351, -0.2083,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9723e-01,  4.1861e-03,  2.3062e-02, -1.9014e-01,  1.4393e-01,
         2.1086e-01, -3.4573e-02,  2.4280e-01,  1.2570e+00,  1.0982e-01,
         3.0109e-04,  1.1277e-02,  4.1682e-02, -3.8794e-01, -2.8155e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1050e-01, -1.3802e+00, -2.5371e-02, -8.0250e-03, -3.3708e-01,
         5.5269e-03, -1.6690e-01, -6.2149e-03, -1.3701e-03, -5.7768e-01,
         9.5535e-02,  4.4781e-02, -6.4853e-02, -5.4739e-02, -3.2952e-02,
         3.9755e-02, -4.9217e-02,  1.0738e-01, -6.6035e-02,  7.1984e-02,
        -5.3951e-02, -1.4424e-01,  4.0644e-02,  1.4048e-01,  8.9964e-02,
         8.4309e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1160,  0.0491,  0.1428, -0.0099, -0.1313, -0.0848, -0.0336, -0.0856,
        -0.0665, -0.1626, -0.6347, -0.1146, -0.0872, -0.3787, -0.2330, -0.0715,
         0.0621, -0.1227,  0.0417, -0.0989, -0.1108,  0.0078, -0.0264,  0.0142,
         0.3922, -0.2540,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.3339, -1.0852, -0.2206, -0.0386, -0.1005,  0.0831, -0.1004, -0.1799,
         0.0592, -0.0850, -0.0624, -0.0442, -0.1848, -0.0887, -0.0662, -0.1464,
        -0.0540, -0.0178, -0.0753, -0.0353, -0.0345, -0.0739, -0.0303, -0.0904,
        -0.1829, -0.1548, -0.0892, -0.0142,  0.0459,  0.0071, -0.0368, -0.0402,
        -0.0188,  0.0258, -0.0355, -0.0728,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1107, -0.5182, -0.0699, -0.1430, -0.0963, -0.0788, -0.1038, -0.2530,
         0.0008,  0.1848, -0.0568,  0.0307,  0.0966,  0.2115, -0.0459, -0.0401,
         0.0371,  0.0045,  0.0473,  0.0377, -0.0337, -0.1092, -0.0504, -0.0961,
        -0.0968, -0.2455,  0.1053, -0.0456, -0.2413, -0.0480,  0.1061, -0.0732,
        -0.1879,  0.1034, -0.0688, -0.2102,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0157,  0.5610, -0.1274,  0.1670, -0.0138,  0.3120,  0.1132,  0.0675,
        -0.0068,  0.0953,  0.2078,  0.0901,  0.2436,  0.1341,  0.0964,  0.0930,
         0.0549,  0.0244, -0.0536, -0.2295,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1096, -0.3742, -0.2144, -0.0145,  0.0134, -0.0368,  0.0203, -0.0524,
        -0.0295, -0.0619,  0.0372, -0.1131,  0.0289, -0.1094, -0.2056, -0.0571,
        -0.0203, -0.1282, -0.0647, -0.0237, -0.0581, -0.0312,  0.0025, -0.0065,
        -0.0131, -0.0250,  0.0159, -0.1602,  0.0276, -0.0881,  0.0016, -0.1023,
        -0.0570, -0.1513,  0.0178,  0.0507,  0.0028,  0.1273,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0401, -0.2151, -0.0685, -0.3616, -0.0111, -0.0034,  0.0372, -0.0069,
         0.0294, -0.0192, -0.1588, -0.1454, -0.0080,  0.0512,  0.1493, -0.2309,
        -0.0581, -0.2023,  0.0408,  0.0505,  0.0536,  0.0171,  0.0542, -0.0097,
        -0.1177,  0.0555, -0.0356, -0.0720, -0.1988,  0.0146, -0.0689, -0.0267,
         0.0416,  0.0251, -0.0633,  0.0268,  0.0474,  0.0676,  0.0498,  0.0077,
        -0.2850], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0303, -2.4867, -0.2043,  0.4938, -0.0591,  0.0438, -0.1207, -0.3050,
        -0.1514,  0.0261, -0.0703, -0.1632, -0.1955, -0.1187, -0.1124, -0.2028,
         0.0072, -0.2061, -0.0635,  0.1632, -0.0951, -0.1704,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2386, -0.0039,  0.0131, -0.0284, -0.0872, -0.0568,  0.0321, -0.0037,
        -0.0643,  0.0354, -0.0377, -0.1989, -0.0336, -0.1073, -0.0846, -0.1315,
         0.0134, -0.0597, -0.0026, -0.2020, -0.3660,  0.0272,  0.1352, -0.0970,
        -0.0568,  0.0379,  0.0188, -0.0956, -0.4362, -0.0193, -0.0194, -0.1378,
        -0.1925,  0.2331,  0.2235,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3619, -0.2089, -0.3765, -0.1555, -0.3635,  0.0535,  0.0191,  0.0577,
        -0.0677,  0.0413, -0.0594, -0.0161, -0.0281, -0.0491,  0.0605, -0.1271,
         0.0246, -0.0810, -0.2777, -0.0833, -0.0598, -0.0625, -0.0299, -0.0761,
        -0.0207,  0.0318, -0.1496, -0.0766,  0.1708,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3199,  1.3896,  0.3603,  0.2742,  0.1819,  0.3540,  0.1439, -0.0785,
        -0.0318, -0.1264,  0.0506,  0.1594, -0.0820,  0.0157, -0.0513,  0.1370,
         0.0654,  0.0534,  0.1283, -0.0439,  0.1859,  0.1636,  0.1192, -0.0578,
        -0.5915,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6029,  0.0647, -0.1412,  0.0115, -0.8437, -0.0899, -0.0233, -0.1702,
        -0.1305, -0.1281,  0.0601, -0.0810, -0.0246,  0.1410, -0.2937, -0.1673,
        -0.0288, -0.0347, -0.1064, -0.0300, -0.0911,  0.1517, -0.0083,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0998, -0.0554, -0.0078, -0.0326,  0.0174,  0.0439, -0.0591, -0.0440,
        -0.0317, -0.0499, -0.0058, -0.0565, -0.0637, -0.5940,  0.0446, -0.0336,
         0.0034, -0.0376,  0.0336, -0.0072,  0.0418, -0.1845,  0.0487,  0.0137,
        -0.0395, -0.0742, -0.0513, -0.0934, -0.2037, -0.0684, -0.0466, -0.0340,
        -0.1309, -0.0832,  0.0258, -0.0067, -0.0451,  0.0449, -0.0641,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4698,  0.3171, -0.2180,  0.5531,  1.0322, -0.0250,  0.0829,  0.2985,
         0.5629, -0.1026, -0.0261, -0.0050,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.2105,  0.2462,  0.3494,  0.0285,  0.0126, -0.0013,  0.1513, -0.1302,
         0.0017,  0.0056, -0.0229, -0.0044, -0.0134, -0.0632,  0.0581,  0.0286,
         0.0307,  0.1104, -0.0132,  0.1266,  0.0655,  0.0687, -0.0357,  0.0446,
        -0.1139,  0.0724,  0.2576,  0.3493, -0.0338,  0.1357, -0.0271,  0.0555,
         0.1599, -0.0047,  0.0928,  0.1170,  0.1160,  0.0570, -0.0060,  0.0410,
        -0.0833, -0.0810, -0.1972, -0.1445,  0.1493,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1965,  1.7851,  0.3900,  0.5282,  0.1746,  0.0808, -0.0682,  0.3804,
         0.4124, -0.3086,  0.0432,  0.0596,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0489,  1.8326,  0.1162,  0.0392, -0.1148,  0.1605,  0.2371, -0.1020,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1719, -0.7404, -0.1623,  0.0392, -0.0419, -0.0299, -0.0578, -0.0021,
        -0.0399, -0.0755, -0.1125, -0.0108, -0.0145,  0.0485, -0.0091,  0.0304,
         0.0239, -0.1571,  0.0199, -0.0102, -0.0099,  0.0365,  0.0182, -0.0571,
         0.0027, -0.0273,  0.0498, -0.0752, -0.0614,  0.1012, -0.0269, -0.0467,
        -0.0094, -0.0171, -0.0267,  0.0171, -0.0127, -0.0570,  0.0095,  0.0075,
        -0.0491, -0.0103,  0.0214, -0.0382, -0.1049,  0.3188, -0.2484,  0.0571,
        -0.1172, -0.2594, -0.1414,  0.0688, -0.0650,  0.0963, -0.0802,  0.0295,
        -0.0670,  0.0151,  0.0132,  0.1969, -0.0260], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0069, -0.1310,  0.0372, -0.0771, -0.2544,  0.0215, -0.0932,  0.0248,
        -0.0039, -0.0404, -0.0184, -0.1546,  0.0022, -0.1457, -0.3552, -0.0810,
         0.0846,  0.0447, -0.2320, -0.0310, -0.2609, -0.0940, -0.0672,  0.0550,
        -0.1664, -0.0714,  0.0088, -0.0329, -0.0667, -0.1784, -0.1156, -0.2445,
         0.0401,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1285, -0.2773, -0.3912, -0.1543, -0.0078, -0.1405, -0.0948,  0.0783,
         0.0419, -0.1982,  0.1414, -0.0162, -0.0732, -0.0716, -0.0644,  0.5131,
        -0.0482, -0.2886, -0.7947, -0.0297,  0.0474,  0.0272,  0.2736,  0.1390,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8470, -0.2844, -0.3024, -0.0605, -0.1593, -0.2132, -0.0528, -0.1960,
         0.0512, -0.2311, -0.2418, -0.2776, -0.5408, -0.1192, -0.0449, -0.1130,
         0.0195, -0.0746,  0.0759,  0.0858, -0.1213,  0.0295, -0.1820,  0.2528,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4342, -0.2257, -0.2915, -0.2916,  0.0195, -0.1201,  0.2404, -0.1863,
         0.0220,  0.1649, -0.0097, -0.0284, -0.0105, -0.0145, -0.1006, -0.2994,
        -0.1972, -0.0921, -0.0354,  0.0246, -0.3204, -0.0554, -0.1270, -0.3966,
        -0.0566, -0.0080, -0.0196, -0.0714,  0.0931, -0.0688, -0.0763, -0.0379,
        -0.2230, -0.2567,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6275, -0.3756, -0.9404,  0.0277, -0.0946,  0.4496, -0.1007, -0.1277,
        -0.0313, -0.2749,  0.0071, -0.6260, -0.0050, -1.0176, -0.5192,  0.6757,
        -0.5260, -0.1736, -0.0483,  0.1557, -0.2038, -0.6465,  0.2884, -0.5112,
         0.0693,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1902, -0.1347,  0.1338,  0.1009,  0.1280,  0.0523,  0.2007,  0.0899,
         0.2050,  1.1365,  0.0320,  0.0655,  0.1331, -0.0026, -0.0524,  0.2016,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0805,  0.4635,  0.0340,  0.1063,  0.1419,  0.1678,  0.7819,  0.0573,
        -0.0216, -0.0956, -0.0008,  0.0171,  0.0882, -0.0129,  0.2263, -0.1662,
         0.0174,  0.0539,  0.0926, -0.0255, -0.0077, -0.0699, -0.1230,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0859,  0.2872,  0.2184,  0.2722,  0.4294,  0.0973,  0.0883,  0.0226,
         0.2249,  0.0840,  0.1181, -0.0292, -0.1249,  0.1648,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.3520,  0.2794,  0.2318,  0.5384,  0.0762,  0.0289,  0.1910, -0.1267,
         0.2031,  0.1221,  0.2367,  0.3240,  0.0683,  0.0148,  0.0808,  0.0236,
        -0.0007,  0.4443,  0.0490,  0.3081,  0.0889,  0.0408,  0.0345, -0.0044,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3923e-03, -1.1720e-01, -3.5143e-01, -2.5685e-02, -2.8385e-01,
        -3.5027e-01, -6.6415e-01, -2.3159e-01, -1.6614e-01, -1.5662e-01,
         4.1568e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0914, -0.1568,  0.0101, -0.1076, -0.0963, -0.1681, -0.3563, -0.0676,
        -0.2269, -0.0374, -0.0420, -0.0423, -0.1228, -0.0733, -0.0551,  0.0185,
        -0.0281, -0.1625,  0.0861,  0.0682, -0.0716,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1109, -0.2008, -0.0035, -0.1761,  0.0801, -0.0859,  0.0998, -0.3701,
        -0.4176, -0.5456, -0.1710,  0.1231, -0.0997, -0.3768,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2054, -0.0890, -0.0008, -0.1078, -0.1277, -0.1739,  0.0486,  0.2320,
         0.1449, -0.0499, -0.2847, -0.2584, -0.7829, -0.0171, -0.0399,  0.0700,
        -0.0744,  0.0261, -0.1221,  0.2381,  0.0163,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1001, -1.1916, -0.2002, -0.1078, -0.1214, -0.4405, -0.1601, -0.6451,
         0.0863, -0.1788,  0.0146, -0.1517,  0.1600,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0328, -0.0947, -0.0347, -0.0126,  0.0474,  0.0330, -0.0586, -0.1824,
         0.0562, -0.0080, -0.0366, -0.0331, -0.0743,  0.0129, -0.0292,  0.0764,
         0.0641,  0.0418, -0.0149, -0.0762, -0.0186, -0.0171,  0.0207, -0.0335,
        -0.6859,  0.0103, -0.2434, -0.2328, -0.1303,  0.0816, -0.0895, -0.0621,
        -0.0362, -0.0018, -0.0186, -0.0063, -0.0881, -0.0140,  0.0052, -0.0447,
        -0.0526, -0.0210,  0.0494, -0.2311, -0.1272], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0280, -0.4833, -0.0233,  0.0215,  0.0010, -0.0394, -0.0429, -0.0346,
         0.0119, -0.0315, -0.1706, -0.0801,  0.0728,  0.0989, -0.1365, -0.0275,
        -0.5822,  0.0084, -0.0574,  0.0803, -0.0053, -0.2728,  0.0767, -0.2892,
         0.0234, -0.0743, -0.0592, -0.0806, -0.2290, -0.0654,  0.0634, -0.1260,
         0.1155,  0.0946,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1477,  0.0009,  0.1144,  0.2266,  0.0431,  0.0275,  0.0241,  0.0849,
        -0.3361,  0.0808,  0.0565, -0.0277, -0.0115,  0.0444, -0.0256, -0.2084,
        -0.0794, -0.1021,  0.0581,  0.4262,  0.5046,  0.0148,  0.0074, -0.1750,
         0.1502,  0.0175,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0801, -0.7654, -0.1034, -0.0919, -0.0537, -0.2198,  0.0346, -0.1926,
        -0.1043,  0.0184, -0.0264,  0.0014,  0.0139, -0.0539,  0.1062,  0.0091,
        -0.7574, -0.0266, -0.0760,  0.0650, -0.0048,  0.1921, -0.1289,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1338, -0.2508,  0.0094, -0.0089,  0.0305,  0.0628, -0.0177, -0.0836,
        -0.1331, -0.0408, -0.0011, -0.0010, -0.0612, -0.1298, -0.8395,  0.2830,
         0.1144, -0.0862, -0.0575, -0.2161,  0.0184,  0.0504,  0.0364,  0.1306,
        -0.0268,  0.0705, -0.1829, -0.2674, -0.0187,  0.0361,  0.1227,  0.1568,
         0.0127,  0.1205,  0.0806, -0.0488,  0.0164,  0.0429,  0.1753, -0.0542,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0991, -0.2585, -0.0736, -0.1120, -0.2132, -0.0502,  0.0172, -0.1045,
        -0.1119, -0.0880, -0.0793, -0.0543, -0.0871, -0.0837, -0.1522, -0.1342,
        -0.0946, -0.0427,  0.0301, -0.0531,  0.0687, -0.1358, -0.1005,  0.0041,
        -0.0049,  0.0027,  0.0522, -0.1874, -0.0035, -0.0752, -0.3068,  0.0772,
        -0.2438, -0.3488,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.2556e-01, -1.2174e-01, -5.2981e-02,  7.0144e-03, -7.4801e-02,
         1.1310e-01,  6.0626e-02, -5.5819e-02,  2.1321e-02,  1.6205e-02,
        -8.8606e-02, -3.5635e-02, -1.1671e-01, -5.4119e-02, -6.2593e-01,
        -1.0474e-01, -1.2241e-01,  9.7615e-03, -2.7279e-01, -6.8827e-03,
        -2.5451e-02, -8.3500e-04, -1.0729e-02,  1.5470e-02, -2.1391e-01,
        -3.8690e-01, -1.6278e-02, -6.5583e-02,  5.6066e-04, -1.6328e-01,
        -4.8077e-02,  1.5041e-02,  5.8228e-02,  4.9890e-02, -1.7497e-01,
        -4.6937e-02, -3.0239e-02, -1.0076e-01, -3.9232e-02,  1.1923e-03,
        -3.4521e-02,  3.1286e-02, -3.7949e-02,  1.5316e-02,  1.9182e-04,
         4.2641e-02,  5.8830e-02,  2.4521e-02, -4.4884e-02, -4.0620e-02,
        -1.2987e-01, -2.0139e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4773, -0.0553, -0.1523, -0.1590, -0.4941, -0.0817,  0.0247,  0.0863,
        -0.0760, -0.1537, -0.0117, -0.0332, -0.0517, -0.5048, -0.1298, -0.3070,
        -0.2137, -0.1716, -0.1617, -0.0383, -0.0808, -0.1099, -0.1285, -0.2308,
         0.0580, -0.0481,  0.0327, -0.0019, -0.0156, -0.0474, -0.0340,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1448, -0.4882,  0.0197,  0.0568,  0.0390,  0.0510,  0.0436,  0.0638,
        -0.0836,  0.0574, -0.0096, -0.1144, -0.0421, -0.0345, -0.0864, -0.2055,
         0.0484, -0.0424, -0.3068, -0.3280, -0.1142, -0.0246, -0.2606,  0.0493,
         0.2886,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0448, -0.1875, -0.0318,  0.0340, -0.2728, -0.7234, -1.0693, -0.0132,
        -0.1868, -0.2069, -0.2318, -0.0250,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2898, -0.9629, -0.0914, -0.1586, -0.3175, -0.0996,  0.0499, -0.0422,
        -0.0331, -0.2039,  0.2480, -0.0127, -0.1354, -0.3930, -0.0348, -0.1607,
        -0.0167,  0.0549,  0.1164,  0.0238, -0.0773, -0.0183, -0.1296, -0.0765,
         0.0724, -0.3904,  0.0062,  0.0031,  0.1192,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3462,  0.9573, -0.1896,  0.0220,  0.0089,  0.6769,  0.7983, -0.1500,
        -0.4093, -0.1158,  0.1813,  0.4103,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6262, -0.4620, -0.2406, -0.2658, -0.0322, -0.0723, -1.0485, -0.0888,
        -0.1397, -0.1276,  0.0261,  0.1180,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3354e+00,  3.0692e-01, -2.4007e-01, -1.5501e-01, -3.4756e-01,
        -1.9170e-02, -1.6600e-01, -3.8007e-01, -7.0017e-01, -1.7470e-01,
         2.7674e-02, -4.2650e-01, -7.9274e-02, -6.8015e-04, -9.2090e-02,
        -6.0644e-01,  6.0533e-02, -7.2690e-02,  1.8237e-01,  7.2456e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2026,  1.4883,  0.1327,  0.2519,  0.5275,  0.2519,  0.0528, -0.0956,
         0.0831,  0.0444,  0.0796,  0.2048,  0.1066, -0.0318,  0.1015, -0.0496,
         0.0965, -0.0147,  0.0294,  0.1137,  0.0454, -0.0718,  0.0761,  0.0712,
        -0.0271, -0.0541, -0.0151,  0.1305,  0.0564, -0.0102, -0.1200,  0.0114,
         0.0571,  0.1824,  0.1971, -0.1017,  0.3004,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6158,  1.8817, -0.0432, -0.1488, -0.0364, -0.1073,  0.0892, -0.3455,
         0.0541,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1398, -0.0932,  1.0765, -0.2765, -0.0901, -0.2670, -0.2347, -0.2414,
         0.0606,  0.0035, -0.0736, -0.1464, -0.2995, -0.3843, -0.0724,  0.0096,
        -0.0141,  0.1494, -0.0771, -0.1964,  0.0387,  0.0138, -0.0585,  0.0293,
         0.1344,  0.5036, -0.1050,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2678, -1.5363, -0.1953, -0.1908, -0.2320, -0.1593,  0.0194,  0.0398,
        -0.0147, -0.1941, -0.0896, -0.1425, -0.0336, -0.0091, -0.0624, -0.0656,
        -0.3041,  0.0627,  0.1581,  0.0175,  0.3304, -0.1134, -0.1070,  0.0477,
         0.0419, -0.1215, -0.0267, -0.1001, -0.4995, -0.0313, -0.0487, -0.0478,
        -0.1497,  0.0345,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.1769,  0.0712, -0.0097,  0.0084, -0.0779,  0.0551, -0.0049, -0.0425,
         0.0395,  0.1490, -0.1794, -0.5017, -0.1355, -0.0188, -0.0695, -0.1873,
        -0.4000,  0.0928, -0.0304,  0.0225,  0.0937, -0.0396, -0.1742, -0.0833,
         0.0232,  0.0061,  0.0263, -0.0184,  0.0686,  0.0413, -0.0243, -0.2209,
        -0.3983,  0.1135, -0.0690, -0.1155, -0.2212, -0.0505,  0.0523, -0.0101,
        -0.0504, -0.1864,  0.0498,  0.0446,  0.0943,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2755,  0.0815,  0.0618,  0.1636,  0.2202,  0.4369,  1.0736,  0.0931,
         0.0617,  0.3484,  0.1194,  0.1239,  0.1622, -0.0966, -0.0383, -0.2129,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1802, -0.0656,  0.2434,  0.0576,  0.0171,  0.0317,  0.0082,  0.1652,
         0.2756,  0.3552,  0.0619,  0.1543, -0.0090,  0.0440,  0.0636, -0.0083,
         0.0482,  0.0389,  0.0109,  0.0589,  0.0252,  0.0346, -0.0089, -0.0522,
         0.0413,  0.0288,  0.1235,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3083, -0.0479, -0.1303,  0.0102, -0.0219,  0.0282, -0.0189,  0.0076,
        -0.0797, -0.0301,  0.0161, -0.0109, -0.0441, -0.0023, -0.0289, -0.0200,
        -0.0353, -0.0321,  0.0865, -0.0856,  0.0019, -0.0484, -0.0124, -0.3932,
         0.0291,  0.0030, -0.1281, -0.2473, -0.0047, -0.2173,  0.0979, -0.1810,
        -0.2153, -0.0274, -0.0729, -0.0305, -0.0448, -0.2341, -0.0133,  0.0922,
        -0.1016, -0.2242, -0.0156,  0.0245,  0.2832, -0.0653], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3897,  0.1068, -0.0842,  0.1321, -0.0376, -0.0336, -0.1477, -0.1866,
         0.0344, -0.0409, -0.0764, -0.0679, -0.0315,  0.0227, -0.0506, -0.0186,
        -0.0649, -0.0310, -0.1265, -0.4990, -0.0752, -0.1696, -0.0401, -0.0809,
         0.0299,  0.0452, -0.2869,  0.0762, -0.3043,  0.1618,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2708,  0.2082,  0.2814,  0.1160,  0.0991, -0.0366, -0.3388,  0.4343,
        -0.1443, -0.4870, -0.0090,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0906, -1.2242, -0.5852, -0.1566, -0.1495, -0.0894, -0.0995, -0.4218,
         0.0934, -0.0957,  0.0777,  0.0013, -0.1814, -0.0039, -0.1624, -0.2330,
        -0.0791, -0.1241, -0.0207, -0.1570,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5698,  1.2754,  0.5747,  0.4077,  0.0240,  0.1103,  0.0918, -0.0405,
         0.1032,  0.3101, -0.0170,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3428,  0.7941,  0.1747,  0.4804, -0.1392, -0.1033,  0.1075,  0.2372,
         0.3590, -0.0171,  0.0344,  0.0490,  0.0239,  0.0156,  0.0953,  0.2906,
        -0.0308,  0.0014, -0.0683,  0.0257, -0.1932,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0128e-01,  4.3498e-01,  2.0237e-01,  1.3127e-02,  3.9580e-01,
         7.5502e-01, -6.7726e-01, -2.3227e-01,  6.5326e-02, -9.2643e-02,
         1.2170e-01, -3.0276e-02, -3.5985e-02,  3.9883e-04, -5.5132e-03,
         8.7991e-02,  1.5135e-02, -7.5338e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7909e-02, -2.0221e-01, -6.9843e-01,  3.4057e-02,  3.2211e-02,
        -5.8915e-03,  6.4407e-02, -2.4981e-04, -1.2012e-02, -3.1396e-02,
         1.4398e-02,  3.9946e-02,  1.2225e-01, -1.3981e-01, -1.1301e-01,
         3.9110e-02, -9.7853e-02, -2.3533e-01, -7.2676e-02, -1.5444e-01,
        -8.4737e-02, -2.3389e-01, -2.8250e-01, -8.5509e-02, -1.4848e-01,
        -3.0671e-02, -5.1629e-02, -4.7422e-02, -3.4444e-02, -2.5571e-01,
         1.2172e-02, -1.6675e-02, -9.5514e-03,  3.2495e-02, -5.2864e-02,
         5.5245e-04, -4.9090e-02,  1.8039e-01, -1.9884e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0841,  0.7166,  0.0338,  0.1258,  0.0478,  0.0742,  0.0060, -0.1348,
        -0.0415, -0.0203, -0.0465,  0.1916,  0.0191,  0.0610, -0.0983,  0.0779,
         0.1196,  0.0349, -0.0018,  0.0808, -0.0196,  0.0857, -0.0018, -0.0243,
         0.0031,  0.0720,  0.0906,  0.0656,  0.2560,  0.2780,  0.0389,  0.0665,
         0.2858,  0.0855,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.7675, -0.5795, -0.3506, -0.4886,  0.1861, -0.4380, -0.1020,  0.2663,
        -0.4491,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5287, -0.6125, -0.1402, -0.2839, -0.0089, -0.3107, -0.0230, -0.0438,
         0.1029,  0.0756, -0.1353, -0.0324, -0.0490, -0.0036,  0.0495, -0.3505,
        -0.0290, -0.8578, -0.1377, -0.1269,  0.0565, -0.0113,  0.0999,  0.0058,
        -0.1174, -0.0700, -0.0920,  0.0018,  0.0216, -0.0649, -0.0097, -0.1712,
         0.0192, -0.0625,  0.2198,  0.0132,  0.0468, -0.0916,  0.1907, -0.1975,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2748e-01, -2.8707e-01, -3.3675e-01, -7.8227e-02,  9.4658e-03,
        -3.4466e-02,  1.1133e-01, -7.2894e-03, -4.5522e-02, -1.8732e-01,
         8.9207e-03,  2.4803e-02, -2.5842e-02, -3.2881e-02, -1.3654e-01,
         1.0965e-04, -4.5734e-02, -6.7831e-02, -1.0509e-01,  1.9005e-01,
         7.0754e-02, -2.3729e-03,  2.1448e-02,  4.5341e-02, -5.7759e-02,
        -5.7226e-02, -1.5272e-01, -1.5666e-03, -1.6814e-01, -2.7744e-01,
         3.1297e-02,  2.8917e-02, -1.6053e-02, -3.4332e-02,  2.2028e-02,
         3.1981e-03, -9.7415e-02, -2.0294e-01,  4.3483e-02,  4.9423e-02,
         7.3625e-02,  8.7439e-02, -1.8680e-02, -6.4309e-02,  1.2710e-03,
         6.1181e-02,  2.3890e-03,  1.4350e-01,  5.4209e-02, -8.1524e-02,
         2.4662e-01,  7.7282e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0733,  0.8027,  0.0634,  0.0537, -0.0074,  0.0611,  0.2029,  0.0277,
         0.0072,  0.0129, -0.2340,  0.3279,  0.3597,  0.0641,  0.1867,  0.2595,
         0.1733, -0.1970,  0.3079,  0.0968,  0.0551,  0.2320,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1034,  0.8540, -0.0146,  0.0922, -0.0344,  0.0189, -0.0019,  0.1143,
         0.1177,  0.0997,  0.2751,  0.0312,  0.0798,  0.0746, -0.0345,  0.1241,
        -0.1298,  0.1420, -0.0043,  0.0236,  0.1504,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3775, -0.4887, -0.0256, -0.0592,  0.0434, -0.0240, -0.0159,  0.0915,
        -0.0041,  0.0339, -0.0094, -0.0038,  0.0409,  0.0240, -0.0016, -0.0180,
         0.0429,  0.0343,  0.0215, -0.0016,  0.0208,  0.0328,  0.0406,  0.0834,
        -0.0029,  0.0232, -0.1647, -0.2147, -0.0230, -0.0174, -0.0972, -0.1231,
        -0.0149, -0.0092, -0.0506,  0.0021,  0.0496,  0.0194,  0.0629, -0.0270,
        -0.4614,  0.0087,  0.0430, -0.0560, -0.1340, -0.0006, -0.0851, -0.0764,
         0.0628,  0.0681, -0.0241,  0.0036,  0.0186, -0.0697, -0.0993],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2897, -0.0736,  0.0780, -0.1014,  0.0385, -0.0289, -0.2614, -0.1155,
        -0.0885,  0.0113,  0.0565, -0.0495, -0.0064, -0.0882, -0.0939, -0.0475,
        -0.0571, -0.1722,  0.0128, -0.1065, -0.0368, -0.0416, -0.0991,  0.0337,
         0.0715,  0.0045, -0.0084, -0.2110,  0.0132, -0.0744, -0.1439, -0.0070,
        -0.0009, -0.2836,  0.0927, -0.0777, -0.0082, -0.1684, -0.1312, -0.1459,
        -0.0581,  0.1472,  0.0231,  0.1873, -0.1575,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3107,  1.0619,  0.2158,  0.4342,  0.0095,  0.0054,  0.1002, -0.0398,
         0.1367, -0.1561, -0.0819,  0.1712,  0.0157,  0.1283,  0.1836, -0.2921,
         0.0477,  0.0548, -0.0924,  0.0448,  0.1709, -0.1027, -0.1204,  0.1071,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4348,  0.0641,  0.0480,  0.1413, -0.0173, -0.0344, -0.0867, -0.1827,
         0.0456,  0.0061,  0.0397,  0.0061, -0.0006, -0.1801, -0.1246, -0.0964,
        -0.2575, -0.1540, -0.1306, -0.2103, -0.1257, -0.0740, -0.1407, -0.0976,
        -0.1311, -0.2287, -0.3950, -0.0717, -0.1143, -0.1671, -0.0059, -0.1981,
         0.0446,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6438, -0.9124, -0.2537, -0.2259,  0.0904,  0.1844,  0.0562, -0.0643,
        -0.7843, -0.0976,  0.1186, -0.0407,  0.2515, -0.1802, -0.1387, -0.1721,
        -0.0539,  0.1937, -0.3214,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7380, -0.0358,  0.1004, -0.1329, -0.2356, -0.0605, -0.2260, -0.1610,
        -0.4075, -0.0141,  0.0079, -0.0695, -0.0931, -0.0305, -0.0038,  0.0015,
        -0.0042, -0.1074, -0.0319,  0.0100, -0.1641, -0.4562, -0.0131, -0.0708,
         0.1364,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0834, -0.8747, -0.1811, -0.1724, -0.3613, -0.2247, -0.9405, -0.2230,
        -0.0735, -0.1422, -0.0103, -0.0349, -0.1345, -0.0892, -0.1889,  0.0869,
         0.0766,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.4949, -0.0803,  0.1359, -0.0200,  0.0041,  0.0556, -0.0176,  0.1523,
        -0.0085,  0.2582, -0.0011,  0.0794,  0.0549,  0.1009,  0.0601,  0.0627,
         0.1560,  0.3701, -0.0662,  0.1394, -0.0215,  0.2091,  0.5122,  0.0111,
         0.0175,  0.0293,  0.0379,  0.0540, -0.0493, -0.0453, -0.1737, -0.0137,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9504e-01,  5.1313e-01,  1.9387e-01,  9.2983e-02,  5.8072e-02,
         8.6208e-02,  1.5749e-01,  2.8804e-01,  2.9716e-02,  3.9576e-03,
         5.9637e-02, -5.4004e-02,  5.7933e-01,  8.9851e-02,  1.1501e-01,
        -1.0277e-01,  8.4112e-02,  6.7410e-02,  2.0119e-01,  3.0515e-02,
         5.0366e-02, -3.0695e-02,  2.5385e-01,  2.2054e-01,  3.9792e-02,
        -6.0593e-02,  7.1205e-02, -3.0930e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5990, -0.2542, -0.1327, -0.1143, -0.0842, -0.1016, -0.4085, -0.0215,
        -0.1120, -0.2795, -0.0307, -0.2334, -0.0341,  0.0175, -0.0152, -0.1849,
        -0.0384, -0.2368, -0.0905, -0.0319, -0.0144,  0.0317,  0.0161, -0.0964,
        -0.0847,  0.0610, -0.0676,  0.1328,  0.2415,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0837, -0.1184,  0.0106,  0.1306,  0.5308,  0.0462,  0.2129,  0.0735,
        -0.0111,  0.0207, -0.0972,  0.1234,  0.2206,  0.4269, -0.1296,  0.1813,
         0.2784,  0.6847,  0.1662,  0.0543,  0.0749, -0.0130,  0.0960,  0.0145,
         0.0631,  0.0587,  0.0553,  0.1372,  0.1644, -0.0887,  0.0419, -0.0211,
        -0.1147,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6367, -0.8956, -0.0235, -0.1894,  0.0249, -0.0392, -0.0580, -0.1815,
        -0.0651, -0.0729, -0.0311, -0.0326, -0.0635, -0.0346, -0.0775, -0.0400,
        -0.0034, -0.0641, -0.5266,  0.0786, -0.3172, -0.1132,  0.5401, -0.4875,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3592, -0.7432, -0.5704,  0.0179, -0.1160, -0.1831, -0.2846, -0.4114,
         0.0384, -0.1912, -0.1300, -0.0657, -0.0802, -0.0603,  0.0039, -0.0075,
         0.0210, -0.1119, -0.0446, -0.5013,  0.0376,  0.0201,  0.1319,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1905, -0.4290,  0.0361, -0.0909, -0.0465, -0.3287, -0.0200, -0.0025,
        -0.0278, -0.1312,  0.1323, -0.0377, -0.0899, -0.0352, -0.0929, -0.0369,
        -0.2627, -0.2125, -0.1349, -0.1135,  0.1279, -0.2342,  0.0957, -0.0469,
        -0.1481, -0.1140, -0.0773, -0.1144, -0.1133, -0.3702,  0.0231, -0.0235,
        -0.0697, -0.0711, -0.0798, -0.1412, -0.1002, -0.0209, -0.1195,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0379, -0.0958,  0.0732, -0.0025, -0.3025, -0.0879,  0.0575, -0.0082,
        -0.0725,  0.0131,  0.0101,  0.0139, -0.0310, -0.0809, -0.0227, -0.2094,
        -0.0663, -0.1431,  0.0541,  0.0071, -0.1741, -0.0779, -0.0220, -0.0503,
        -0.0926, -0.1914, -0.0770, -0.0288,  0.0108, -0.0444, -0.0487, -0.0692,
        -0.0256, -0.0095, -0.0476,  0.0213, -0.0044, -0.2077,  0.0232, -0.0472,
        -0.0329,  0.0638, -0.0351,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2817e-02, -7.6722e-01,  2.6709e-02,  2.8388e-02, -3.6507e-02,
        -1.0408e-02, -7.5657e-03, -3.3170e-02, -1.0017e-01,  7.7884e-02,
         8.2528e-02,  1.1884e-02, -6.2428e-02,  1.2917e-02, -9.5275e-03,
        -6.8843e-02, -9.4314e-02, -1.1274e-01,  1.5858e-02, -3.4651e-03,
         2.5701e-02, -2.0745e-02, -2.4005e-02, -5.8387e-03,  1.3158e-03,
         5.5024e-03,  9.9018e-02, -1.4628e-02,  3.0885e-02, -1.9088e-02,
         3.3929e-02, -3.7097e-02, -7.4033e-02,  1.2888e-01, -1.7875e-02,
         3.9090e-04,  1.1964e-02, -1.2342e-02, -1.8743e-01,  6.3572e-02,
        -1.0569e-01, -2.7184e-01, -3.1506e-02, -4.6091e-02, -1.7232e-02,
        -5.9239e-02, -1.9800e-02, -6.4981e-03,  2.1402e-02,  4.7663e-02,
        -3.9652e-02, -7.9104e-02, -1.3244e-01,  2.2057e-02,  6.1889e-02,
        -1.1322e-01, -6.5299e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0295, -1.1509, -0.3115, -0.0217, -0.2200, -0.1493, -0.2369, -0.4084,
        -0.0809,  0.0205, -0.1466,  0.0549, -0.1861,  0.1886,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3248,  0.6131, -0.1260, -0.1825, -0.0469,  0.0429,  0.0396,  0.0190,
        -0.0335,  0.0465,  0.1278,  0.6189,  0.1146,  0.1048,  0.1480,  0.0289,
         0.0305,  0.1547, -0.0308,  0.2237,  0.4112,  0.0886,  0.0454,  0.0141,
        -0.0342,  0.0126, -0.0306,  0.0365, -0.3303,  0.0829,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1305,  0.3449,  0.0470,  0.3786,  0.0458, -0.1292,  0.1553,  0.3129,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.1940, -0.9210, -0.2128,  0.1062, -0.0935, -0.2664, -0.0347,  0.0788,
         0.0104, -0.0624,  0.0174, -0.0979, -0.1456, -0.1164, -0.0230, -0.0642,
         0.0216, -0.0534, -0.0127, -0.0463, -0.0128,  0.0358, -0.0472,  0.1074,
        -0.1351,  0.0672, -0.1078,  0.0495, -0.1973, -0.1093, -0.0821,  0.2250,
         0.1207,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3328e-02,  1.8205e+00,  3.5549e-01,  3.2839e-01, -1.4064e-01,
         2.2156e-01,  1.3334e-01,  3.0089e-01, -2.8428e-02,  1.0792e-01,
        -2.4344e-02,  3.1709e-02,  1.7520e-02,  4.5107e-02, -1.0781e-01,
         8.1811e-02, -9.1391e-03, -6.3507e-02,  6.2377e-02, -3.2579e-02,
        -2.2697e-02, -7.3614e-03,  7.3488e-02,  1.4144e-03, -1.1625e-01,
         1.0343e-01,  1.8061e-01,  4.1497e-01, -4.1396e-02, -2.1586e-02,
         5.6963e-02,  2.3140e-02,  9.9153e-02,  4.3860e-01,  9.8451e-02,
         1.1877e-01,  3.8055e-02,  3.1802e-02,  2.1043e-02,  5.1675e-03,
        -1.8021e-02, -1.0134e-02,  3.6412e-02,  1.5885e-01, -2.6532e-02,
        -5.1079e-02,  8.1712e-02,  1.6519e-02,  4.0759e-02, -3.6320e-02,
         6.8251e-02,  5.2159e-02, -2.6396e-04,  1.9504e-02, -1.2321e-02,
        -7.9939e-02,  1.2442e-02,  6.6783e-03,  1.7615e-02,  9.9475e-02,
         5.5155e-02,  3.8592e-02,  4.2439e-01, -1.9854e-02,  1.4619e-02,
        -1.0245e-02,  3.5786e-02, -1.1759e-01,  1.4902e-02,  7.1171e-02,
        -7.1039e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2265, -0.1396, -0.0637,  0.0136, -0.1331, -0.0412, -0.5938, -0.0342,
         0.0123,  0.0712,  0.0523, -0.0914, -0.0449,  0.0324, -0.0353, -0.1101,
        -0.0123, -0.0062, -0.0145, -0.2725, -0.0395,  0.0412,  0.0420, -0.2800,
         0.2040, -0.1040, -0.1427, -0.0058,  0.0627, -0.0299, -0.0110, -0.0888,
        -0.1576, -0.2832, -0.0731, -0.1701, -0.1948, -0.0947,  0.0127,  0.0408,
        -0.0236, -0.0582,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4008, -0.5739, -0.0350, -0.0865,  0.0060,  0.0309,  0.0779, -0.0248,
        -0.1183, -0.2936, -0.0888, -0.1280, -0.2719, -0.0066,  0.0183, -0.0147,
         0.0068, -0.1443, -0.3849, -0.0749, -0.0694, -0.0241,  0.0075, -0.0384,
        -0.0972, -0.2259, -0.1130, -0.0547, -0.1749, -0.0572, -0.0945, -0.0445,
         0.0746, -0.0395, -0.0312, -0.0789,  0.1655,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6677, -1.5593, -0.2924, -0.2845, -0.1096,  0.3472, -0.1118,  0.0824,
        -0.0575, -0.2213, -0.0683, -0.1311, -0.2536, -0.0342,  0.0082,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0541,  0.4468, -0.0203, -0.2281,  0.2246,  0.3970,  0.2689,  0.1295,
         0.7011,  0.0658, -0.0678, -0.0130,  0.3366, -0.0110, -0.3366,  0.0644,
         0.0707, -0.2097,  0.0581, -0.1248,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0455, -0.0768, -0.0285,  0.0055, -0.5382, -0.0821, -0.0600, -0.0521,
        -0.1535, -0.0640, -0.0882, -0.0580, -0.2509, -0.0516, -0.1078, -0.0516,
         0.0105, -0.0734, -0.0206, -0.0840, -0.0300, -0.0013, -0.0295, -0.0245,
         0.0420, -0.1266, -0.2397, -0.0422, -0.1082, -0.0283, -0.1510, -0.1390,
        -0.0365, -0.1307, -0.0917, -0.0815, -0.1292, -0.0128, -0.0490, -0.1940,
         0.2653,  0.1895, -0.0145,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4329, -0.0142,  0.1485, -0.1044, -0.0721, -0.1440, -0.0856, -0.5757,
        -0.0822, -0.0267, -0.0914, -0.0150, -0.1432, -0.2618, -0.2723, -0.1050,
         0.0348,  0.0517,  0.0355, -0.0487, -0.0568, -0.0320,  0.1206,  0.0512,
         0.0511,  0.1266,  0.0357, -0.0042, -0.3349, -0.0473, -0.0353, -0.1113,
        -0.0646, -0.0907, -0.0919, -0.0036,  0.0396,  0.1136,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2454,  0.6984,  0.2677,  0.3788, -0.0809,  0.3745,  0.0495, -0.2837,
         0.1069,  0.4683,  0.0555,  0.0474,  0.1967,  0.0457,  0.0441,  0.0848,
         0.1746,  0.1510, -0.0054,  0.1024, -0.2463,  0.0492,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3292, -0.6182, -0.0917, -0.1213, -0.0068, -0.0292,  0.0359, -0.0365,
         0.0305, -0.3063, -0.1336, -0.0544, -0.0470, -0.0066, -0.0643, -0.1006,
        -0.4203, -0.1254, -0.1294, -0.1895, -0.1511,  0.0064, -0.1557, -0.1087,
        -0.0915, -0.0739, -0.0185,  0.0564, -0.0031, -0.1857, -0.0582,  0.0175,
        -0.0606, -0.0735, -0.0738, -0.2713, -0.0186,  0.0150, -0.0440, -0.1808,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1224,  0.2816,  0.0488, -0.3613,  0.0968, -0.0387,  0.1356, -0.0500,
        -0.1619, -0.0802,  0.0094,  0.0551, -0.0936, -0.4886, -0.1390, -0.0617,
         0.0312, -0.0693, -0.1333, -0.0508, -0.0857, -0.0531, -0.1081, -0.0957,
        -0.0367, -0.2701,  0.0320, -0.1383, -0.1494,  0.0074, -0.0318,  0.0793,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7970e-01, -1.5100e+00, -4.1153e-02, -8.0941e-02, -8.0862e-02,
         5.9941e-02,  5.4204e-02, -1.8306e-01, -3.2963e-01, -3.5115e-02,
        -9.9257e-04, -1.2669e-01, -7.1458e-02,  1.6379e-02,  7.6055e-02,
         6.4916e-02, -2.7882e-01,  7.4521e-02, -2.0619e-02,  1.0795e-01,
        -1.6767e-02, -5.5418e-02,  2.0388e-02, -9.6857e-02, -1.2201e-02,
         1.3451e-02, -5.8110e-02, -2.5085e-03,  2.0776e-02, -4.8352e-02,
         3.2739e-03,  9.5476e-03, -9.9251e-03, -2.4853e-01,  2.7144e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.3441, -0.4906, -0.3120, -0.1542, -0.2022, -0.1345, -0.3889, -0.2872,
        -0.0954, -0.3848,  0.0454, -0.0944, -0.1238, -0.1795, -0.1061, -0.0069,
         0.0018, -0.0217, -0.0234, -0.0289, -0.0727,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4135, -1.0526,  0.2922, -0.2982, -0.2336, -0.1190, -0.0850, -0.1924,
        -0.4506,  0.0599,  0.0450,  0.0188,  0.0143,  0.2084, -0.0044,  0.0447,
        -0.0488, -0.0961,  0.0039,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6973, -0.4562, -0.2056, -0.0463, -0.1654, -0.0157,  0.0013,  0.0815,
        -0.1410, -0.2909, -0.4031,  0.0649, -0.1115, -0.4702,  0.0419, -0.1276,
        -0.0421, -0.0659, -0.1129,  0.1013,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0049, -1.0099, -0.1167, -0.4334,  0.0686, -0.0190, -0.2763, -0.1092,
        -0.2459, -0.1780, -0.0551,  0.0184,  0.0167, -0.0387,  0.0159, -0.0367,
        -0.3825, -0.0300, -0.0655, -0.2070,  0.0550,  0.1294,  0.2410,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1111, -0.8225, -0.2840,  0.0468, -0.0069, -0.1268, -0.1614, -0.1214,
        -0.1101, -0.5221, -0.0699, -0.1804, -0.1374, -0.2996,  0.0361, -0.0470,
         0.0117,  0.0654, -0.0477, -0.0157, -0.0801, -0.0183, -0.0248,  0.0233,
        -0.0593,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1903,  0.6793,  0.0655,  0.0077,  0.0235, -0.0095, -0.0646,  0.2003,
         0.0507,  0.0539,  0.1893,  0.0614, -0.0147, -0.0204,  0.0331, -0.2296,
         0.0181, -0.0384, -0.0148,  0.1229,  0.0839,  0.3667,  0.1593,  0.2103,
        -0.0266,  0.0420,  0.0064,  0.1135,  0.0232,  0.0153, -0.0157, -0.1530,
        -0.0504,  0.3500, -0.2505,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1346, -0.0011,  0.1029, -0.1662, -0.0163, -0.1190, -0.0716, -0.0748,
        -0.4482, -0.0370, -0.0894,  0.0192,  0.0208, -0.1271, -0.0050, -0.3472,
        -0.0800, -0.2188, -0.1308, -0.0578,  0.0914, -0.0627,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7671, -0.2267,  0.0193,  0.0151, -0.2190, -0.2835,  0.0466,  0.0061,
        -0.1600, -0.5782, -0.0438, -0.1255, -0.0476, -0.1512, -0.0876, -0.2838,
        -0.0726, -0.0698,  0.1513,  0.1147, -0.3730,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0024, -0.8803, -0.1508,  0.1840, -0.1166, -0.0907, -0.2417, -0.5074,
         0.1470, -0.1115, -0.0294, -0.0391, -0.1116, -0.2131, -0.0706, -0.4409,
         0.1221, -0.0763,  0.1963,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8422e-01, -1.1934e-01, -1.3913e-01, -1.6181e-02, -7.7542e-03,
        -2.3440e-01,  1.4889e-02, -8.2025e-03, -1.3547e-02,  1.1502e-02,
        -5.0260e-04, -3.3487e-03, -1.7145e-02, -7.9557e-02, -7.5974e-03,
        -5.3995e-03, -4.2821e-03, -2.1367e-02, -2.7102e-02, -4.7793e-02,
        -2.8897e-02,  7.5294e-02, -6.3285e-02,  5.6283e-04, -1.3579e-01,
        -5.0147e-02, -2.2092e-04, -1.1862e-01, -7.2412e-02, -1.7503e-01,
         3.8957e-02, -1.3546e-01, -1.3238e-01, -4.0079e-01,  3.1510e-02,
        -1.3431e-02,  3.5517e-02,  2.1262e-02, -4.7557e-02, -6.7071e-02,
        -1.5297e-02,  3.6472e-02, -8.0948e-03,  2.9928e-02, -5.9542e-02,
         8.9901e-02,  9.0732e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1350e-01, -5.7143e-01, -9.3931e-02, -2.4681e-02,  5.5347e-02,
         6.5428e-02,  2.8763e-02, -3.0215e-02, -9.7270e-02,  1.7099e-02,
         5.7411e-02,  4.1782e-02,  8.8530e-03,  4.0087e-02,  1.0250e-02,
        -5.5088e-02, -1.0584e-01,  4.5386e-02, -4.0817e-02, -1.6019e-02,
        -8.7453e-02,  1.1542e-02, -2.2148e-01, -5.5374e-01, -2.7176e-02,
         1.8896e-01, -9.2783e-02, -7.5703e-02, -7.4960e-05,  8.0021e-02,
        -1.8026e-02, -1.1480e-02, -1.3403e-02, -2.8904e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4493, -1.1193, -0.0463,  0.1248, -0.0977, -0.1049, -0.0896, -0.0020,
        -0.0220, -0.0271, -0.0028,  0.0574, -0.1488, -0.3671, -0.0189,  0.0784,
         0.0420,  0.1915, -0.0301, -0.2151, -0.0953, -0.0563, -0.0710,  0.0969,
        -0.0342,  0.0126, -0.0388, -0.0695, -0.0164, -0.1229, -0.0445,  0.0488,
         0.0054, -0.2353,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.1349, -1.4031,  0.0596,  0.0919, -0.1277, -0.2322,  0.0417, -0.0260,
        -0.0641, -0.4132, -0.3010, -0.1059, -0.1327, -0.2833, -0.0333, -0.0885,
        -0.1568,  0.0413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0842,  0.0005, -0.1840,  0.2483, -0.0997,  0.0552, -0.0472,  0.0270,
        -0.3809, -0.4115,  0.0535, -0.0198,  0.1351, -0.0673, -0.0852, -0.0463,
        -0.0457,  0.1616, -0.0085, -0.3303,  0.0653,  0.0145, -0.1590,  0.0279,
         0.0576, -0.1770,  0.1033,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2750e-02,  2.1362e-02,  1.7933e-02, -4.6270e-02, -6.7716e-02,
        -1.1410e-01,  6.8872e-04, -5.0911e-02, -4.2862e-01,  2.7239e-01,
         1.5948e-01, -6.2843e-02, -8.3712e-01, -1.8483e-01, -3.8789e-02,
         1.0968e-01,  1.4933e-02, -1.4653e-01,  6.1548e-03, -6.4102e-01,
        -1.5906e-02, -4.5107e-03, -6.0113e-02, -4.4730e-02,  9.6210e-02,
         2.2527e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2839, -0.9417,  0.1192, -0.1589, -0.1519,  0.0722,  0.0250, -0.2100,
        -0.1194, -0.1807, -0.2945, -0.0138, -0.0648, -0.0689, -0.0928, -0.1474,
        -0.0434, -0.0427, -0.0460, -0.0706,  0.1242,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0840,  0.9756,  0.5598,  0.5543,  0.1465,  0.0269,  0.4963,  0.0999,
         0.0655, -0.4481,  0.0720,  0.1457, -0.0389, -0.1877,  0.0253,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2999,  0.0019,  0.2531,  0.0492, -0.2090,  0.2500,  0.0082,  0.0612,
         0.1151, -0.0170,  0.1462,  0.1487,  0.5321,  0.1406, -0.1262, -0.0569,
        -0.0149,  0.0611,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2807, -0.6564, -0.1040, -0.4858, -0.1326,  0.0311, -0.0355, -0.1615,
        -0.0582, -0.3508, -0.2971, -0.1751,  0.0384, -0.2536,  0.0859,  0.0070,
         0.0602, -0.1357,  0.2789,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1807, -0.5091, -0.2282,  0.1239,  0.0252, -0.0780,  0.0798,  0.1314,
        -0.0151,  0.0238, -0.0038,  0.0068, -0.0732,  0.0236, -0.0214, -0.0047,
        -0.0185,  0.0243,  0.0432, -0.0170, -0.3540,  0.0028, -0.2349, -0.1007,
        -0.1235, -0.1442, -0.1603,  0.0885, -0.0261, -0.1189, -0.0425, -0.1052,
        -0.1884, -0.0933, -0.0545, -0.0519, -0.2134,  0.0293, -0.0770,  0.0452,
         0.0732,  0.1277,  0.1012, -0.0601], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4385e-01, -1.1173e+00, -1.9190e-01, -1.2673e-01,  3.3779e-02,
         1.2231e-03, -6.0651e-02,  9.1172e-02, -2.4435e-01, -3.1352e-02,
        -2.1955e-01, -6.3577e-02,  8.0144e-02, -2.3742e-02,  7.9731e-02,
        -1.6872e-02, -1.1742e-01, -2.3395e-01,  4.8133e-02,  2.8258e-02,
        -2.2095e-01,  1.7332e-02,  3.3012e-02, -4.0073e-04,  7.9553e-02,
         1.0804e-02,  2.5292e-02, -2.8760e-01,  1.4335e-01, -2.8776e-02,
        -8.3205e-02, -2.8669e-01, -3.8419e-03, -3.5204e-01, -2.3186e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3194,  1.5984, -0.0065,  0.1333,  0.1808,  0.0945,  0.4293,  0.3564,
         0.0826, -0.0030, -0.0087,  0.1649,  0.3864, -0.0032,  0.0213,  0.2182,
        -0.0420, -0.0162, -0.1076, -0.0173, -0.0207,  0.1081, -0.1382, -0.0581,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9204e-01,  1.1794e+00,  1.1069e-01, -1.2215e-01,  7.8017e-02,
         6.2662e-02, -3.4049e-02,  2.7970e-02,  3.0557e-01,  6.5119e-02,
         1.1687e-01, -3.6976e-02, -3.6934e-02,  3.0573e-01,  2.6200e-01,
         1.0743e-03, -9.1847e-03,  5.0306e-02,  5.8317e-02,  1.1578e-02,
        -2.5453e-02,  2.3156e-01,  2.2188e-02,  3.6458e-03,  5.8866e-02,
         1.5123e-01,  1.6280e-01,  1.1674e-01,  8.6388e-02, -1.3404e-02,
         3.6399e-01,  8.3051e-02,  5.1871e-02,  3.8111e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9192e-01,  1.0125e-04, -2.9078e-02, -2.3999e-01,  7.2387e-02,
         9.5242e-02, -4.8145e-01, -7.9700e-02, -1.4097e-02, -5.9148e-02,
        -4.3595e-02,  7.3295e-02, -1.4826e-01, -7.6452e-02, -8.4813e-02,
        -5.5861e-01, -7.2294e-04,  2.3108e-02, -3.0064e-02,  5.7962e-03,
         9.0612e-02, -7.3352e-02, -3.8322e-02, -7.2107e-02, -1.2653e-01,
        -8.8483e-02, -1.1988e-01,  3.2252e-02, -1.6863e-01,  3.1474e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.1707, -0.5564, -0.1267, -0.8082, -0.0796, -0.0057, -0.0834, -0.0583,
        -0.0240, -0.6532, -0.2028,  0.0248, -0.2847,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1250, -0.0756, -0.1213,  0.0396, -0.0681, -0.0037, -0.1259, -0.1185,
        -0.4470, -0.1008, -0.1079, -0.1401,  0.0555, -0.0765, -0.3011, -0.1487,
         0.2156,  0.0813,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2261, -0.3681, -0.6414, -0.1157, -0.4539, -0.1512,  0.0488, -0.3346,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1798, -0.7411, -0.1668,  0.0177, -0.1440, -0.1756, -0.0313, -0.0594,
         0.0333,  0.0235, -0.0292,  0.0488, -0.0316,  0.0561, -0.0105, -0.0017,
         0.0252, -0.0071,  0.0220,  0.0024,  0.1118,  0.0983, -0.0869, -0.1299,
        -0.0097, -0.1272, -0.0313,  0.0229, -0.4103, -0.0073,  0.0743, -0.2920,
        -0.1029, -0.2482, -0.1779, -0.1921, -0.1075, -0.0439, -0.0879, -0.0329,
        -0.0467, -0.0857, -0.0098,  0.0580,  0.0502,  0.3972,  0.1556],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8168e-01,  5.7401e-01,  6.6203e-01,  7.5804e-01,  1.2856e-01,
        -3.8383e-01,  3.2370e-02, -1.3631e-01, -1.0780e-01,  1.0338e-01,
        -3.0656e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1931, -0.1934, -0.1403, -0.1856, -0.0694, -0.2094, -0.1940, -0.7911,
        -0.2628, -0.3340, -0.0706, -0.0825, -0.4048, -0.0525,  0.1405,  0.0027,
        -0.0157, -0.1762,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1660, -0.1869,  0.3830,  0.0572,  0.0117,  0.0846,  0.1325, -0.0095,
         0.1111,  0.1534, -0.0678,  0.2018,  0.3802, -0.3691,  0.0503,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6944, -1.0630,  0.0981,  0.0109,  0.1390, -0.1295, -0.3894,  0.0086,
         0.1966, -0.2059, -0.2417,  0.0230,  0.1264, -0.0477, -0.2484,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3392,  0.0170,  0.0714,  0.1224,  0.0467, -0.0106,  0.1166, -0.0680,
        -0.1111, -0.0891, -0.0512, -0.1938, -0.3113,  0.0385, -0.0866, -0.0843,
        -0.1950,  0.2671,  0.1110, -0.0767, -0.2194, -0.2173,  0.0336,  0.1701,
        -0.0117, -0.2642,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5118, -0.2994, -0.0542, -0.1281, -0.2474, -0.4019,  0.0040, -0.4711,
        -0.5324, -0.1100,  0.0413, -0.1574, -0.2085,  0.2198,  0.0958,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3481, -1.4849, -0.2091, -0.0246, -0.4991, -0.1931, -0.1307, -0.0038,
        -0.0628, -0.3954,  0.0475, -0.0116, -0.0473, -0.0771,  0.0305, -0.0077,
        -0.0249, -0.1069, -0.1902,  0.0093, -0.0628,  0.1137, -0.1064,  0.0558,
         0.0050,  0.0210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3932, -0.0201,  0.0570,  0.0585, -0.0809, -0.0637, -0.0360, -0.0653,
        -0.0579, -0.1212, -0.6315, -0.1000, -0.0616, -0.1648, -0.2507, -0.0863,
        -0.0942, -0.1604, -0.0460, -0.0886, -0.0246, -0.0106,  0.0091, -0.0557,
         0.2758,  0.1394,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.2787, -1.2539, -0.0299,  0.1767,  0.0950,  0.0165, -0.0477, -0.0157,
        -0.0317, -0.1053, -0.0033, -0.0702, -0.4533, -0.0089, -0.1147, -0.1089,
        -0.0347,  0.2579, -0.1455, -0.0874,  0.0846, -0.1081,  0.0180, -0.3178,
        -0.1056, -0.0013, -0.2178, -0.0701,  0.0690, -0.0063, -0.0268, -0.0628,
         0.0927, -0.0280, -0.0430,  0.2711,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0399, -0.3944, -0.1806, -0.0925, -0.1525, -0.0084, -0.2349, -0.3166,
        -0.0056,  0.0681, -0.1383,  0.0056, -0.0876,  0.0742, -0.0057, -0.0216,
         0.0440, -0.0360,  0.0041,  0.1388, -0.0310, -0.0695, -0.0604, -0.0125,
        -0.1645, -0.2766,  0.0497, -0.0701, -0.2402, -0.0124, -0.0102, -0.1023,
        -0.2200,  0.0309, -0.0395,  0.0316,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5272,  0.8022,  0.2185,  0.0345,  0.0209, -0.0097,  0.0866,  0.0345,
         0.1191, -0.0637,  0.0416, -0.0303,  0.0733, -0.0423, -0.0704,  0.0329,
         0.1103,  0.0459, -0.0714, -0.1777,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3484, -0.0745,  0.0215,  0.0091,  0.0543, -0.0977, -0.0787, -0.0261,
        -0.0498, -0.0764,  0.0586, -0.0619, -0.0527,  0.0016, -0.2361, -0.0045,
         0.0187, -0.1133, -0.0156, -0.0765, -0.1699, -0.0098, -0.0376,  0.0270,
        -0.1085, -0.1144, -0.0575, -0.0009, -0.0117, -0.1462, -0.0721, -0.0606,
         0.0639, -0.3333, -0.0374,  0.0645,  0.4384, -0.0057,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0586, -0.2529, -0.0958, -0.3064, -0.0621,  0.0315, -0.0217, -0.0011,
         0.0857, -0.0016, -0.2256, -0.2544, -0.0384,  0.0425, -0.0361, -0.2092,
         0.1782, -0.1694,  0.0136,  0.0865,  0.1182,  0.0778,  0.1271,  0.1442,
         0.0235, -0.0095,  0.0270, -0.0993, -0.3261,  0.0460, -0.0388, -0.0143,
         0.1092, -0.0112, -0.1010,  0.0082, -0.0344,  0.1066,  0.0786,  0.0631,
        -0.0461], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6188,  1.8135, -0.0704,  0.1676,  0.1284,  0.0954,  0.2057,  0.3774,
         0.1122,  0.0811,  0.1443,  0.3181,  0.2114,  0.0921,  0.0091,  0.2909,
         0.0838,  0.2490, -0.0089,  0.0630,  0.2038,  0.0373,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3232,  0.0054, -0.0336,  0.0180,  0.0171, -0.2391, -0.0635,  0.0103,
        -0.1116,  0.0332, -0.0976, -0.0764, -0.0437, -0.2122, -0.0851, -0.2332,
        -0.0764, -0.0942,  0.0359, -0.2068, -0.3392,  0.0598,  0.0570, -0.1722,
        -0.0444, -0.0062, -0.0725, -0.1305, -0.1547, -0.0103, -0.0842, -0.0804,
        -0.0908, -0.6522,  0.3996,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1103, -0.0795, -0.2394, -0.1110, -0.2591, -0.0904,  0.0265, -0.0709,
        -0.0656, -0.0557, -0.1447, -0.0126, -0.0309, -0.0339, -0.0100, -0.0980,
         0.1176, -0.0404, -0.2349,  0.0166, -0.1299, -0.0416, -0.0167,  0.0198,
        -0.0229, -0.0090, -0.1264, -0.1606,  0.3893,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0404,  0.7843,  0.2668,  0.1892,  0.0442,  0.1302,  0.0908,  0.0755,
         0.0479,  0.1036,  0.0480,  0.1374, -0.0747, -0.1194,  0.0164,  0.0669,
         0.0450,  0.0421,  0.0781, -0.0278,  0.0101, -0.0039, -0.0499, -0.0459,
         0.3020,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3980,  0.1641,  0.0462, -0.1328, -0.6794, -0.1561, -0.1622, -0.4587,
        -0.3693, -0.1438,  0.0048,  0.0397,  0.0092,  0.0094, -0.2357, -0.5003,
        -0.1021, -0.0714,  0.0289, -0.0285, -0.1300,  0.0212,  0.1781,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0609e-01,  1.0100e-01, -2.0537e-02, -3.5420e-02, -8.1455e-02,
         7.1736e-02,  5.4542e-02, -4.1108e-02, -1.8885e-03, -4.0930e-04,
         4.9600e-02, -3.8354e-02, -1.0783e-01, -2.4583e-02, -2.6970e-02,
        -9.5156e-02, -2.1370e-02, -7.9138e-02, -4.7843e-02, -4.6489e-02,
        -3.2483e-02,  6.6015e-02, -1.4147e-02, -2.5009e-02, -1.3310e-01,
        -2.0886e-01, -5.6757e-01,  1.1415e-02, -2.6807e-01, -1.1485e-01,
        -6.5875e-02,  2.6033e-02, -1.7474e-01, -1.2765e-01, -2.1906e-02,
         8.4717e-02,  8.6978e-02,  1.1790e-02,  1.7344e-01,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1469,  0.2693, -0.0079,  0.1230,  0.6871,  0.1843,  0.0510,  0.2718,
         0.5936,  0.0943,  0.0011,  0.0447,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.2326, -0.3252, -0.3408, -0.0011, -0.0302, -0.1948, -0.1162,  0.0994,
        -0.0382, -0.0655, -0.0042,  0.0285, -0.0194,  0.0310, -0.0433,  0.0441,
        -0.0501, -0.1347,  0.0301, -0.0860, -0.0224, -0.0757,  0.0930, -0.0845,
         0.0169, -0.0300,  0.0815, -0.1103,  0.0386,  0.0116,  0.0374, -0.1619,
        -0.1190,  0.0005, -0.0878, -0.0109, -0.1414,  0.0024,  0.0168, -0.0128,
         0.0669,  0.0109,  0.1346,  0.0096, -0.0388,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2356, -1.6753, -0.0804, -0.0689,  0.1166, -0.1588, -0.0917, -0.1943,
        -0.4846, -0.1429, -0.1128,  0.1212,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3949, -1.8336,  0.3231, -0.6218, -0.2034, -0.4622, -0.1840, -0.1861,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3845e-01,  6.8278e-01,  9.7897e-02, -6.4643e-02,  7.2990e-02,
         6.9202e-02, -2.0662e-02,  3.2703e-03,  2.9845e-02,  2.8682e-01,
         2.3725e-02, -3.4649e-02,  1.3832e-02,  5.1425e-02,  1.0129e-02,
         1.7850e-03, -1.7800e-04,  5.9572e-02,  6.5178e-02,  2.8035e-02,
         8.9035e-02, -2.0371e-02,  3.3034e-02,  7.5775e-02, -2.0128e-02,
         9.2851e-02,  3.2906e-02,  4.2420e-03,  7.3718e-02, -5.5929e-02,
         9.1276e-02,  4.3547e-02,  8.6408e-02,  1.4675e-03, -1.2058e-02,
         6.6441e-03,  1.1455e-01, -4.7218e-02,  3.9375e-03,  7.8902e-02,
         2.6859e-03,  4.9315e-02,  6.4590e-02,  1.4860e-02,  4.3930e-02,
        -3.9915e-01,  1.8331e-01,  2.3423e-02,  2.3953e-01,  2.4181e-01,
         4.5186e-02, -2.5808e-02,  8.5876e-02, -5.3910e-02,  8.2795e-03,
        -1.0235e-02,  6.4541e-02, -5.2833e-02, -2.9717e-02,  5.6854e-02,
         3.6277e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8688, -0.1016,  0.0896, -0.0860, -0.3883, -0.1024, -0.0384, -0.0771,
        -0.0417,  0.0498,  0.0081, -0.0700, -0.0395, -0.1034, -0.3101,  0.0103,
        -0.0854, -0.0553, -0.1177, -0.0675, -0.2813, -0.0959, -0.0228,  0.1135,
        -0.0262, -0.1070,  0.0019, -0.0357, -0.0425, -0.1757, -0.0978,  0.3990,
        -0.4149,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5894e-04, -5.7378e-01, -5.7042e-01, -2.2209e-01, -1.5993e-01,
        -8.6628e-02, -2.4322e-01,  8.5375e-02, -1.1267e-02,  7.5746e-02,
         6.6528e-02, -2.5321e-02, -1.1352e-02, -5.3143e-02, -9.4922e-02,
         2.6874e-01,  4.7117e-02, -2.4140e-01, -8.6565e-01,  3.3470e-02,
         2.7848e-01, -1.6998e-02,  4.0611e-01, -2.2769e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0636, -0.5116, -0.2567, -0.1836, -0.1872, -0.4148, -0.1506, -0.0734,
         0.0903, -0.5541, -0.1917, -0.2543, -0.4910, -0.1871, -0.0790, -0.0737,
         0.0599, -0.0469, -0.0311,  0.0759, -0.0433, -0.0368,  0.2153,  0.1018,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5830, -0.0334, -0.0800, -0.0708, -0.0730, -0.0143, -0.1255, -0.0121,
        -0.0903, -0.0980,  0.0728, -0.0952, -0.0545, -0.2558, -0.1930, -0.2641,
        -0.1172, -0.0353, -0.1838, -0.0577, -0.1196, -0.0039, -0.1221, -0.1240,
        -0.1195,  0.0222, -0.0546, -0.0409, -0.0306,  0.0450, -0.0934,  0.0040,
        -0.1139,  1.2657,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0511e-01,  9.9973e-03,  6.1165e-02,  7.1696e-02, -1.8817e-01,
         2.8669e-02,  1.0568e-02,  1.1693e-01, -1.4680e-01, -3.2270e-02,
         3.6182e-03, -6.0142e-02,  2.9755e-02, -6.0721e-01, -8.7028e-02,
         4.4670e-02, -2.3696e-01, -4.3266e-04, -7.5481e-02, -5.4138e-02,
        -3.4307e-02, -3.8517e-01,  2.5620e-02,  7.2866e-02,  4.7252e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1981, -0.0317, -0.0878, -0.0988, -0.0978,  0.0831, -0.3562, -0.1179,
        -0.2070, -0.8833, -0.0279, -0.0729, -0.0857, -0.0727, -0.2112, -0.1470,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0757,  0.5453,  0.0531,  0.2322,  0.1202,  0.1259,  0.6679, -0.0166,
        -0.1871, -0.0244, -0.0381,  0.0192,  0.1430,  0.0705,  0.2025,  0.0613,
         0.1001,  0.0492,  0.0643,  0.0188,  0.0699, -0.0919,  0.2294,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1352,  0.1077,  0.1270,  0.1812,  0.4163,  0.1216, -0.1484, -0.0637,
         0.0403, -0.0448, -0.0084, -0.0391, -0.2963,  0.0809,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.2861,  0.2596,  0.2562,  0.6414,  0.1248, -0.1338,  0.2065, -0.0417,
         0.1616, -0.0043,  0.1429,  0.2690,  0.0816, -0.0064, -0.0310,  0.1142,
         0.0268,  0.3136,  0.0310,  0.2615,  0.0401, -0.0072,  0.0362, -0.0649,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4457, -0.3685, -0.6040, -0.0689, -0.4699, -0.2158, -0.6977, -0.0096,
        -0.4078, -0.0479, -0.2342,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0691,  0.0093,  0.0100, -0.0506, -0.0015, -0.1194, -0.6874, -0.2045,
        -0.6026, -0.0918, -0.1246,  0.0105, -0.1215, -0.1962, -0.1556, -0.0512,
        -0.0587, -0.2730,  0.1854,  0.1383,  0.3033,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2095, -0.2471, -0.1620, -0.5303, -0.1672,  0.1365,  0.0423, -0.7174,
        -0.2002, -0.3092,  0.1204,  0.2790,  0.1387, -0.1898,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0836,  0.1439,  0.0791,  0.0930,  0.1339, -0.0818,  0.0028,  0.0958,
        -0.0831,  0.0950,  0.1576,  0.2814,  0.5878,  0.1149,  0.1646,  0.0858,
        -0.0060, -0.1004, -0.0048, -0.0338, -0.0704,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0911,  0.8793, -0.0581,  0.4508,  0.1684,  0.8624,  0.2776,  0.7975,
         0.3601,  0.3273,  0.2565, -0.1551, -0.1390,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1320e-01, -1.6119e-01,  1.3549e-02, -3.6974e-02, -3.4896e-02,
        -5.7532e-02, -8.6230e-02, -1.7352e-01, -1.1885e-01, -4.4280e-03,
        -8.4546e-02, -1.0628e-01,  1.9540e-02,  3.8285e-05, -2.2627e-02,
         2.3034e-02,  1.1932e-02,  2.0492e-02, -9.6636e-03, -8.5479e-02,
        -1.7846e-01,  1.0781e-02,  4.8324e-02, -1.8197e-01, -6.4603e-01,
         5.2589e-02, -7.1174e-01, -3.5236e-01, -3.0047e-01, -3.5922e-02,
        -2.9765e-02, -4.6771e-02,  7.9360e-02,  5.3787e-02, -1.1637e-02,
         5.6028e-02, -8.0560e-02,  1.5048e-02, -2.3309e-02, -1.2607e-02,
        -6.0337e-02, -7.1714e-04, -7.2988e-03,  2.0660e-01,  2.2518e-03],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2818, -1.0210, -0.1745,  0.0487, -0.0354,  0.1015,  0.0253, -0.1020,
        -0.1040,  0.0280,  0.0480,  0.1302,  0.0036,  0.0652, -0.1821,  0.0341,
        -0.4659,  0.0058, -0.1278, -0.0525, -0.0084, -0.3963,  0.0249, -0.1794,
         0.0698,  0.0654, -0.1157, -0.0167, -0.2745,  0.0714, -0.0244,  0.0101,
        -0.5603,  0.0791,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4493,  0.6216,  0.3043, -0.0221,  0.0501,  0.0393,  0.0324,  0.0832,
         0.2743,  0.0121,  0.0160,  0.0580, -0.0311, -0.1286,  0.0225,  0.0111,
         0.0287,  0.0575,  0.1645,  0.4373,  0.6529, -0.0875,  0.0297, -0.1561,
        -0.2657, -0.5523,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1249, -1.4065, -0.0337, -0.1370, -0.0265, -0.0932,  0.0904, -0.1215,
        -0.1396,  0.0197, -0.0801,  0.0105,  0.0516,  0.0081, -0.1945, -0.1114,
        -0.4339,  0.0124,  0.0037,  0.0069, -0.0386,  0.0710, -0.0535,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5026, -0.5175,  0.0682, -0.0947,  0.0317,  0.0775, -0.0975, -0.1660,
        -0.2753, -0.0582,  0.0767,  0.0568, -0.0546, -0.0516, -0.5853,  0.0895,
         0.0535, -0.1692, -0.1334, -0.0604,  0.0359,  0.0416,  0.0248,  0.0735,
        -0.0213, -0.0266, -0.1032, -0.3519,  0.0621,  0.0084,  0.0620,  0.0343,
         0.0052,  0.0270,  0.0284, -0.0478, -0.0258,  0.0313,  0.0413, -0.0589,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1193, -0.2862, -0.0821, -0.0944, -0.1900, -0.0573,  0.0286, -0.2535,
         0.0535, -0.0392, -0.0320, -0.0781, -0.1152,  0.0014, -0.1020, -0.1581,
        -0.0366, -0.0011, -0.0574, -0.0998,  0.0698, -0.1288, -0.1165,  0.0690,
        -0.0759,  0.0398,  0.0039, -0.2282, -0.0103, -0.0665, -0.0471, -0.0082,
         0.2506, -0.0714,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.2229, -0.2647, -0.0203, -0.0908, -0.0291,  0.0827, -0.0092, -0.0990,
        -0.0072,  0.0744, -0.0549, -0.0487,  0.0564, -0.0918, -0.3510, -0.0168,
        -0.2722, -0.0212, -0.1583,  0.0076, -0.0330,  0.0043, -0.1004,  0.0437,
        -0.1140, -0.3289,  0.1736, -0.0170,  0.1285,  0.0614, -0.0748,  0.0439,
         0.0472, -0.2620, -0.1375,  0.0014, -0.0919, -0.1168, -0.0726, -0.0525,
        -0.0424, -0.0366, -0.0354, -0.0390,  0.0105,  0.0718,  0.0558,  0.0273,
        -0.0177, -0.0658, -0.1902, -0.0601], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1643, -0.0538, -0.0761,  0.0839, -0.1840, -0.0166,  0.1645,  0.1563,
        -0.0364,  0.0203,  0.0154, -0.0072, -0.0360, -0.4734, -0.1728, -0.2467,
        -0.3865, -0.4921, -0.1522, -0.0787, -0.0846, -0.0692, -0.1980, -0.3220,
         0.0033,  0.0244,  0.1068, -0.0788,  0.1816,  0.0999, -0.2685,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0537, -0.6157,  0.1061, -0.0541,  0.0615,  0.0925, -0.0554,  0.1229,
        -0.0748,  0.0299, -0.1053, -0.1761, -0.0058, -0.2842, -0.2088, -0.1356,
         0.2176, -0.1969, -0.4116, -0.5556, -0.2425, -0.1352, -0.2026,  0.0813,
         0.0608,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3721,  0.0896,  0.2228,  0.0068,  0.0655,  0.7332,  1.2060, -0.2107,
         0.2549,  0.1662,  0.3574, -0.0787,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5876, -1.7069,  0.3633,  0.1518, -0.1782, -0.0343, -0.0234, -0.0207,
        -0.0238, -0.3116,  0.3577, -0.0554, -0.0508, -0.3935, -0.0663, -0.0246,
         0.0081,  0.0141,  0.0257,  0.0569, -0.0742,  0.0605, -0.0314,  0.0578,
         0.1485, -0.2444, -0.0520,  0.1579, -0.4728,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0203, -1.2971,  0.4520,  0.1326,  0.2647, -0.1848, -0.9186,  0.1163,
         0.1850, -0.0103, -0.0105,  0.0652,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2514, -1.0854, -0.3081, -0.1982,  0.0040, -0.3563, -0.5990,  0.1608,
        -0.1519, -0.1538, -0.3184, -0.2520,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1402, -0.5635,  0.5354,  0.2720,  0.4697, -0.0902,  0.1890,  0.1013,
         0.3420,  0.0957, -0.0743,  0.3023,  0.0507,  0.0378,  0.0076,  0.4999,
         0.0096, -0.0941,  0.1303,  0.0373,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2496,  1.5129, -0.0475,  0.0801,  0.2216,  0.0782,  0.1790, -0.2052,
        -0.0059,  0.0186,  0.0637,  0.1979,  0.1523,  0.1131, -0.1220, -0.0551,
         0.1336,  0.0083, -0.0731,  0.1838,  0.0960,  0.0180, -0.1022, -0.1050,
        -0.0762, -0.0117, -0.0455,  0.0713,  0.1023, -0.0636, -0.0397, -0.0039,
         0.0462,  0.3585,  0.1986, -0.0937,  0.2127,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4113,  2.4227,  0.2238, -0.2741,  0.2289, -0.1684, -0.1137, -0.2884,
         0.0986,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1653, -0.0958, -0.1287, -0.2730, -0.0971, -0.3242, -0.1720, -0.4181,
         0.0461, -0.0393,  0.1086, -0.0094, -0.1441, -0.5758, -0.0743, -0.0029,
        -0.0556, -0.1289, -0.1499, -0.2056,  0.0330,  0.0468,  0.0226, -0.0187,
        -0.1595,  0.1731,  0.1267,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4021, -1.2554,  0.1581, -0.1311, -0.0391, -0.1774, -0.0035, -0.0267,
        -0.0735, -0.0668,  0.0437, -0.1587, -0.0095,  0.0786,  0.0241, -0.1017,
        -0.1718,  0.0414, -0.0387,  0.0756,  0.0028, -0.2850, -0.2715,  0.0314,
        -0.0776,  0.0844, -0.0155, -0.1211, -1.0405, -0.0675,  0.0183, -0.1600,
         0.0679,  0.1663,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.1329, -0.0249,  0.0451, -0.0640, -0.1273, -0.0410, -0.0053,  0.0673,
         0.1148,  0.2861, -0.1781, -0.2746, -0.2453, -0.0937,  0.0843, -0.1838,
        -0.3531,  0.1702, -0.0807, -0.0935,  0.1374,  0.0183, -0.1510, -0.0984,
         0.1015,  0.0629,  0.0278,  0.1055,  0.0229,  0.1257,  0.2167, -0.2686,
        -0.5741, -0.0219, -0.1276, -0.0302, -0.3516, -0.1413,  0.0627, -0.0372,
        -0.0339, -0.2798, -0.0989,  0.0563, -0.1346,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0812, -0.0565, -0.2342, -0.1662, -0.0441, -0.2192, -0.3304,  0.0896,
        -0.0486, -0.3113, -0.1698, -0.1206, -0.2785, -0.0335, -0.1100, -0.0421,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1770, -0.1150,  0.0787, -0.0947, -0.0990,  0.0209,  0.0250,  0.1587,
         0.2641,  0.1594,  0.0329,  0.3534,  0.0292,  0.0342,  0.1263, -0.0484,
         0.0188,  0.0519, -0.0233,  0.0614,  0.0063, -0.0947,  0.0642,  0.0654,
         0.0772,  0.3702,  0.0721,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2470,  0.2129, -0.0759, -0.0480, -0.0744,  0.0052, -0.0392, -0.0327,
        -0.0683,  0.0197, -0.0081, -0.0344, -0.0518, -0.0007, -0.0301,  0.0118,
        -0.0331,  0.0358,  0.0478, -0.1391, -0.0929, -0.0835, -0.0676, -0.3423,
         0.0026, -0.0797, -0.1278, -0.1810, -0.0641, -0.1477,  0.0338, -0.0811,
        -0.1135, -0.0286, -0.0093, -0.0306, -0.0052, -0.0979, -0.0037,  0.0801,
        -0.0250, -0.1395,  0.0220, -0.0565,  0.0906,  0.0727], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1136,  0.2349,  0.0454, -0.0534,  0.0453, -0.1636,  0.0389, -0.1988,
         0.0058, -0.0838,  0.0375, -0.0247, -0.0451, -0.0189,  0.1533,  0.0283,
        -0.0511, -0.0802, -0.2185, -0.8783, -0.0686,  0.1160, -0.0608, -0.0343,
         0.0023, -0.0735, -0.2295, -0.0997,  0.0163, -0.1082,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5290, -0.0492,  0.3849,  0.2747,  0.3199,  0.1510,  0.4184,  0.5725,
         0.2610,  0.4385,  0.3338,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1337, -0.8790, -0.2649, -0.0950, -0.3032, -0.0723, -0.0896, -0.6828,
        -0.0629, -0.1906,  0.0220, -0.0768, -0.0738, -0.1371, -0.0346, -0.3179,
        -0.0682, -0.1594,  0.1279, -0.6193,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6962, -1.1602, -0.6809, -0.2619,  0.2887, -0.1513, -0.6891,  0.2293,
        -0.1128, -0.2373, -0.3078,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3993e-01, -8.9709e-01, -3.8775e-01, -6.1193e-01,  1.4165e-01,
         5.3277e-02, -5.8599e-02, -1.1139e-01, -2.7162e-01,  8.3233e-02,
        -6.1308e-02, -1.2216e-01, -2.5883e-02,  3.1543e-02, -1.0054e-02,
        -3.0774e-01,  5.8496e-02, -3.9005e-03, -2.2295e-02, -8.2575e-05,
         3.6434e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7831, -0.4911, -0.1999, -0.1490, -0.0949, -1.2210,  0.3472, -0.1250,
        -0.1045,  0.3973, -0.0284, -0.1533, -0.0174,  0.0071, -0.0253, -0.1238,
        -0.1174, -0.2009,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3108, -0.2188, -0.3041, -0.0959, -0.1083, -0.0286,  0.0123,  0.0064,
        -0.0362, -0.0624,  0.0606,  0.0103, -0.1131, -0.2007, -0.1688, -0.0931,
        -0.0681, -0.1817, -0.0839, -0.0067, -0.0868, -0.1148, -0.1424, -0.0007,
        -0.1475,  0.0464, -0.1212, -0.0546, -0.0533, -0.0817,  0.0116, -0.0336,
         0.0073, -0.0529, -0.0430,  0.0060,  0.0216, -0.1058,  0.2356,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1894, -1.2259,  0.0034, -0.1299, -0.0227, -0.0229,  0.0198, -0.0816,
        -0.1092,  0.0234, -0.0188, -0.3482, -0.1290, -0.0606,  0.0608, -0.0699,
        -0.1118, -0.0833,  0.0258, -0.0350,  0.0648, -0.1223, -0.0404,  0.0150,
        -0.0719, -0.0023, -0.0857, -0.0953, -0.1029, -0.2645, -0.0258, -0.0189,
         0.1452, -0.2198,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.6867,  1.8177,  0.3676,  0.5166, -0.0999,  0.1596, -0.3138, -0.1415,
         0.4014,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7703e-01, -1.0423e+00, -1.9890e-01, -2.1600e-01, -9.3740e-02,
        -1.6972e-01, -3.3046e-02,  3.8408e-02, -4.5561e-03,  1.1242e-01,
        -8.7492e-02, -1.5524e-02,  8.7345e-02,  5.7493e-03,  1.9711e-02,
        -1.6399e-01,  3.3670e-02, -7.6889e-01, -1.0324e-01,  8.3947e-04,
        -2.9175e-02,  2.4935e-02,  5.7854e-02,  3.9712e-02, -1.1059e-01,
        -8.7292e-02,  2.4567e-02,  8.4233e-02,  2.2121e-02, -2.0526e-02,
        -4.5658e-02, -4.7337e-02,  4.1813e-02, -5.1734e-03, -4.7360e-02,
         7.1841e-02,  3.0712e-02, -1.5439e-03,  3.7579e-01, -5.6900e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1164, -0.0986, -0.7453, -0.1646, -0.0737, -0.0844,  0.0291, -0.0715,
        -0.1263, -0.3699, -0.0248,  0.0207,  0.0161, -0.0095,  0.0237, -0.0083,
        -0.0620,  0.0546, -0.1858, -0.0126, -0.0379, -0.0103,  0.0916,  0.1000,
         0.0551, -0.0272, -0.2879,  0.0455, -0.2764, -0.2873, -0.1165, -0.0116,
        -0.0151, -0.0321, -0.0229, -0.0212, -0.1364, -0.1951,  0.0119,  0.0046,
         0.0710,  0.0289, -0.0033,  0.0158, -0.0500,  0.0498,  0.0271,  0.0587,
        -0.0275,  0.0407,  0.0842,  0.0166,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5224, -0.7831, -0.2346, -0.0257, -0.3217, -0.0493, -0.1570, -0.0160,
        -0.0640, -0.1042, -0.1162, -0.1577, -0.1528, -0.0507, -0.1209, -0.1802,
        -0.1088,  0.0495, -0.6088, -0.1887, -0.0164, -0.2405,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7827, -2.1961, -0.3051, -0.0382,  0.0575, -0.0854, -0.1002, -0.0819,
         0.0535,  0.0443, -0.0941, -0.2008, -0.1324, -0.1748,  0.0143, -0.1316,
        -0.1985,  0.0213,  0.3199, -0.0031, -0.0499,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0524, -0.4246, -0.0221,  0.0513, -0.0068, -0.0070, -0.0313,  0.0425,
        -0.0283, -0.0523,  0.0111, -0.0208,  0.0355,  0.0348, -0.0210, -0.0207,
         0.0280, -0.0261, -0.0901, -0.0079,  0.0022, -0.0035, -0.0128,  0.0162,
        -0.0068,  0.0494, -0.0963, -0.0706,  0.0381,  0.0089, -0.1805, -0.1346,
        -0.0263, -0.0342, -0.0237, -0.0019,  0.0578,  0.0007,  0.0057, -0.0030,
        -0.3114, -0.0133, -0.0033, -0.0621, -0.0801, -0.0150, -0.0790, -0.0893,
         0.0126,  0.0445, -0.0345,  0.0322, -0.0596,  0.0414, -0.0709],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1903, -0.0034,  0.1028, -0.0283,  0.0395, -0.0089, -0.2235, -0.0372,
        -0.0293,  0.0095, -0.0174, -0.0166, -0.0459, -0.0296, -0.1047, -0.0185,
        -0.0783, -0.0632, -0.2036, -0.2024, -0.1492, -0.0690, -0.0742, -0.0422,
         0.0442,  0.0085, -0.0629, -0.1785, -0.0610, -0.0336, -0.0816, -0.0097,
        -0.0577, -0.1964, -0.0297, -0.0736, -0.0391, -0.1023, -0.0951, -0.1355,
        -0.0460, -0.0156,  0.0704,  0.0546, -0.0970,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1935,  1.2354,  0.5400,  0.5018, -0.3490,  0.1909,  0.0984, -0.1123,
         0.2335,  0.1258,  0.0229,  0.2290,  0.0978,  0.2314,  0.1919,  0.1706,
        -0.0291,  0.1942,  0.0101,  0.0646,  0.2711,  0.2947,  0.2801,  0.0731,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1258, -0.0199, -0.0013,  0.0572,  0.0351,  0.0764, -0.0345, -0.0335,
         0.0224, -0.0233,  0.0113,  0.0149, -0.1726, -0.1546,  0.0093, -0.0106,
        -0.2888, -0.0966, -0.1181, -0.1869, -0.1067,  0.0495, -0.1273, -0.2210,
         0.0915, -0.1134, -0.1258, -0.0029, -0.0703, -0.1915, -0.0420, -0.0410,
         0.0191,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0773, -0.9621, -0.0336, -0.2457, -0.0774, -0.0082, -0.1634,  0.0645,
        -0.8986, -0.0943, -0.0236,  0.0644,  0.0965, -0.1878,  0.0095, -0.1419,
        -0.1112,  0.0418, -0.0126,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2367,  0.0581,  0.0767, -0.0598, -0.2981, -0.0194, -0.2738, -0.1830,
        -0.4140,  0.0297, -0.0657, -0.0828, -0.0404,  0.0444,  0.0314, -0.0613,
        -0.0182, -0.1699, -0.0515,  0.0349, -0.0973, -0.5878, -0.1878,  0.0831,
        -0.0842,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0870, -1.4280, -0.2225, -0.2231, -0.3937, -0.2485, -0.6366, -0.0089,
        -0.2804, -0.2250,  0.1051, -0.0801, -0.1064, -0.0429, -0.1000,  0.2129,
        -0.0864,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 3.9898e-01, -1.1645e-02, -1.0137e-01,  2.4937e-03, -2.0896e-02,
         3.7805e-02,  5.1522e-03, -7.5776e-02,  2.6418e-02, -2.0439e-01,
        -1.3083e-01, -4.6091e-02,  1.0518e-02, -2.3185e-02,  4.1953e-03,
         2.9342e-02, -1.9645e-01, -6.3924e-01, -1.4823e-02, -2.0751e-01,
        -9.9066e-02, -3.3839e-01, -5.6393e-01, -4.2576e-03,  6.1310e-04,
        -2.9137e-02, -2.5215e-01, -2.0230e-01, -1.7181e-01, -8.3875e-02,
        -1.9465e-02,  2.2510e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0280,  0.3648,  0.2725, -0.0171,  0.0637, -0.0056,  0.1668,  0.8901,
        -0.0345,  0.0082, -0.0039,  0.0054, -0.1206, -0.0865,  0.1338, -0.1076,
         0.0848,  0.0265,  0.3446,  0.2139,  0.0600,  0.0214,  0.3130,  0.3605,
         0.0534,  0.0528, -0.1078,  0.3382,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4069e-01,  3.5132e-01,  3.2352e-04,  1.1523e-01,  1.3715e-02,
         1.3137e-01,  7.1009e-01,  1.2828e-01,  1.6445e-01,  3.3375e-01,
        -4.9827e-02,  4.9067e-02,  4.8128e-02,  2.0566e-01,  3.0112e-02,
         2.6019e-01,  5.7229e-02,  2.8099e-01,  4.3445e-03,  5.7421e-03,
         6.0047e-02,  1.5069e-02,  4.6580e-02, -3.9032e-03,  1.3244e-01,
         3.3386e-02, -4.6816e-04, -1.6437e-01, -1.1738e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1593, -0.1142, -0.0409, -0.1128, -0.4364,  0.0521, -0.0618,  0.0919,
        -0.0282,  0.0222,  0.0382, -0.0864, -0.0550, -0.3608,  0.0959, -0.1552,
         0.0327, -0.5513, -0.0471, -0.1733,  0.0974,  0.0205, -0.0867, -0.0578,
        -0.0206, -0.0083, -0.0547, -0.0063, -0.0075,  0.0026, -0.0008,  0.0614,
        -0.2146,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2324, -1.4398, -0.0787, -0.0298, -0.2183, -0.2243, -0.1346, -0.1828,
        -0.0138, -0.2359, -0.0385, -0.0122, -0.0377, -0.1126, -0.0782,  0.0442,
        -0.2611, -0.0681, -0.7738, -0.0334, -0.4169, -0.2718,  0.1720, -0.3149,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1445, -0.5919, -0.5573, -0.0427, -0.1027, -0.0617, -0.1456, -0.2192,
         0.0222, -0.2002, -0.0925, -0.1374, -0.0620, -0.0966, -0.0660,  0.0076,
         0.0029, -0.0931, -0.0327, -0.6436, -0.2040, -0.0210,  0.1818,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1296, -0.3831,  0.0009, -0.0144, -0.0355, -0.1165,  0.0583,  0.0094,
        -0.0961, -0.1259, -0.0579, -0.0482, -0.0885, -0.0189,  0.0280, -0.0937,
        -0.1690, -0.0826, -0.0233,  0.1406, -0.0098, -0.1081, -0.0036, -0.0187,
        -0.2062, -0.1060, -0.0317, -0.0488, -0.0840, -0.1889, -0.2217, -0.0447,
        -0.0763, -0.0621, -0.0691, -0.0180,  0.0075, -0.1954, -0.0901,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2369, -0.4097, -0.1368, -0.0357, -0.7437, -0.0421, -0.0220, -0.0152,
        -0.0189,  0.0697,  0.0068, -0.0065,  0.0367, -0.0168,  0.0713, -0.0976,
        -0.0513,  0.0442,  0.0138, -0.0833, -0.1174, -0.0519,  0.0262, -0.0405,
        -0.5093, -0.3429,  0.0251,  0.0078, -0.0063, -0.0323, -0.0682, -0.0956,
        -0.1513, -0.0288, -0.0212,  0.0218, -0.0543, -0.3206, -0.0476,  0.0444,
        -0.0151, -0.0162, -0.2194,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4324e-01, -6.1534e-01, -3.9949e-03, -3.5608e-03, -1.6964e-02,
        -3.7885e-02, -1.0069e-01, -4.9671e-02, -2.1587e-01, -4.2347e-02,
         8.9479e-03, -1.0336e-02, -4.5053e-02,  1.1622e-02, -1.3520e-02,
        -2.7314e-02, -6.0272e-02, -1.6190e-01,  2.0155e-04, -6.4808e-03,
         3.5483e-02,  6.7412e-03, -4.2748e-02, -3.8516e-03, -2.1454e-02,
         3.1735e-02, -2.5148e-03,  7.3921e-05, -4.3900e-02, -4.8747e-02,
        -6.5270e-02, -3.2031e-02, -8.8699e-02,  3.4642e-02, -2.1774e-02,
        -2.1473e-02, -4.5803e-02, -1.4218e-02, -1.6295e-01, -7.3131e-02,
        -8.4924e-02, -2.0201e-01, -3.8704e-02, -3.1767e-02, -1.5506e-02,
         4.3732e-03, -2.4345e-02, -2.5896e-02, -5.7547e-02, -2.1136e-02,
        -4.3641e-02, -2.6143e-02,  6.5236e-02, -7.6416e-03, -2.9153e-02,
         6.4331e-02, -2.2277e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1423, -0.9755, -0.2263, -0.0843, -0.1790, -0.1178, -0.1489, -0.1229,
         0.2528, -0.1554, -0.2800, -0.1441, -0.1646, -0.0098,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7500,  0.9571,  0.2836,  0.1209, -0.0940, -0.0633,  0.0739,  0.0895,
         0.1434,  0.0450,  0.0693,  1.0903,  0.2220,  0.1235,  0.1192,  0.1787,
         0.0861,  0.2167, -0.0572,  0.3706,  0.3866,  0.1149,  0.1577, -0.0240,
        -0.0930, -0.1878, -0.0596, -0.1259,  0.4287, -0.2692,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0594, 0.3431, 0.5738, 0.8453, 0.7086, 0.2156, 0.2796, 0.0176, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0807, -0.2623, -0.0871, -0.1418, -0.1816, -0.0307, -0.0167,  0.0221,
         0.0063, -0.0556, -0.1626, -0.1690, -0.2941, -0.4020,  0.0115, -0.0192,
         0.0319,  0.0041,  0.0331, -0.0338, -0.0126,  0.0020, -0.0526,  0.0173,
        -0.1429, -0.0658, -0.1270, -0.0141, -0.1950, -0.5732, -0.0920, -0.0958,
         0.0383,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0441, -0.5347, -0.1759,  0.0382, -0.0105, -0.0136,  0.0013, -0.0933,
         0.0662, -0.0237,  0.0119,  0.0339, -0.0273,  0.0017,  0.0111,  0.0219,
         0.0172,  0.0267, -0.0215,  0.0091, -0.0025,  0.0306, -0.0135,  0.0155,
         0.0729, -0.0452, -0.0941, -0.2906,  0.0409, -0.0494, -0.0383, -0.0547,
        -0.0836, -0.0573, -0.0872,  0.0451, -0.1274,  0.0148, -0.0232,  0.0167,
         0.0108,  0.0293, -0.0137, -0.1639,  0.0614, -0.0362, -0.0167,  0.0014,
        -0.0466, -0.0170,  0.0040, -0.0444, -0.0160, -0.0439, -0.0419,  0.0225,
        -0.0222,  0.0381, -0.0369, -0.0717, -0.0344, -0.0221, -0.5531, -0.0671,
        -0.0149, -0.0258, -0.0329,  0.0161, -0.0102,  0.1090,  0.0991],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0936, -0.0137, -0.0160,  0.0039, -0.0586, -0.1607, -0.4200, -0.0861,
        -0.0162,  0.0867,  0.0071, -0.1042,  0.0146,  0.0041,  0.0133,  0.0384,
        -0.0052,  0.0032, -0.0568, -0.1778, -0.0006,  0.0788,  0.0791, -0.2165,
         0.0664, -0.0964, -0.0138,  0.0057,  0.0190, -0.0159, -0.0253, -0.0828,
        -0.1505, -0.2306,  0.0105,  0.0074, -0.2082, -0.0893, -0.0105, -0.0161,
         0.1050,  0.0220,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9816, -1.1531,  0.0693,  0.2740,  0.2245, -0.1112,  0.0577, -0.2730,
         0.1384, -0.2034, -0.2513, -0.1318, -0.1636, -0.1220,  0.1724,  0.0645,
         0.0161, -0.2155, -0.4288, -0.1107, -0.0358, -0.0411, -0.0057, -0.0286,
         0.0775, -0.2320, -0.1746, -0.0593, -0.1665,  0.0110, -0.0530,  0.0397,
        -0.0807, -0.1026, -0.1118,  0.4502,  0.0524,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1715,  1.8074, -0.0271,  0.2357,  0.1551, -0.0204,  0.0715,  0.0310,
        -0.2178,  0.0845, -0.0066,  0.1213,  0.1849, -0.0123, -0.2443,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2484, -0.2689,  0.1327,  0.0718, -0.0548, -0.3239, -0.1250, -0.0860,
        -0.4957, -0.1073,  0.0332, -0.0335, -0.0401, -0.0391,  0.0429,  0.0712,
        -0.0671, -0.3641,  0.0587, -0.0144,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0765,  0.0350, -0.1142, -0.0043, -0.2277, -0.0875, -0.0725, -0.0409,
        -0.1712, -0.1269, -0.1243, -0.2613, -0.0658, -0.0688, -0.1036, -0.0941,
        -0.0522, -0.1173,  0.0233, -0.1369, -0.0191, -0.0197,  0.0048,  0.0175,
         0.1066, -0.0808, -0.4061, -0.0193, -0.0504, -0.0403, -0.2517, -0.0684,
        -0.0382, -0.1019, -0.0633, -0.1847,  0.0433, -0.0135, -0.0506,  0.0148,
         0.1897,  0.0183,  0.0030,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0287, -0.0052,  0.0040, -0.0785, -0.0993, -0.1574, -0.1608, -0.5516,
        -0.1363, -0.0665, -0.1462,  0.0131, -0.1289, -0.1397, -0.3442, -0.0421,
        -0.0655,  0.0204, -0.0159, -0.1505, -0.2485, -0.0259, -0.0272,  0.1359,
        -0.0574, -0.0023,  0.0525, -0.0566, -0.1580, -0.0476,  0.0216, -0.0673,
        -0.0204, -0.0737, -0.1151, -0.0194, -0.0934,  0.0066,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4119, -0.7165, -0.4020, -0.8077, -0.1210, -0.4820,  0.1511,  0.1755,
        -0.0247, -0.5087, -0.0465, -0.0911, -0.2603, -0.1212,  0.0593,  0.0859,
        -0.0925, -0.1883,  0.0339,  0.1227,  0.1995,  0.1761,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0210,  0.6346,  0.0914, -0.0480,  0.3051,  0.0387,  0.0303,  0.0924,
         0.2088,  0.4147,  0.1094,  0.0565,  0.1048, -0.0624,  0.1413,  0.1410,
         0.5302,  0.1240,  0.1483,  0.2608,  0.1044, -0.0792,  0.1519,  0.1898,
        -0.0152,  0.0078,  0.0180,  0.0226, -0.0331,  0.1085,  0.1589, -0.1120,
         0.0387,  0.0745,  0.1254,  0.3834, -0.0888,  0.0329, -0.0997,  0.3115,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2403, -0.0784, -0.0241, -0.0005, -0.0267,  0.0593,  0.0117, -0.1444,
        -0.1395, -0.0728, -0.0275, -0.0310, -0.0029, -0.3390, -0.2051, -0.0199,
         0.0203,  0.0591, -0.1178, -0.0240, -0.0995,  0.0088, -0.1405,  0.0558,
         0.0420, -0.4023, -0.0237, -0.2545, -0.3880, -0.1038, -0.1615,  0.1097,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1150, -1.1697, -0.0167,  0.0957, -0.0420,  0.0457,  0.0138, -0.1190,
        -0.2691, -0.0642,  0.0769, -0.1287, -0.1579,  0.0081,  0.0064, -0.0204,
        -0.1771, -0.0057,  0.0336,  0.0404,  0.0183, -0.0925, -0.0056, -0.2281,
        -0.0151,  0.0750, -0.0976, -0.0156,  0.0172, -0.1163,  0.0088, -0.0877,
        -0.0254, -0.1337, -0.1094,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.2772, -0.1716, -0.4048, -0.2903, -0.1178, -0.0072, -0.4196, -0.4844,
         0.0027, -0.3426, -0.0643, -0.0977,  0.0728, -0.2122, -0.0931,  0.0846,
        -0.0280, -0.0700,  0.0577,  0.1441, -0.0668,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0033, -2.0110,  0.0530, -0.4979, -0.3909, -0.2957, -0.1695, -0.4873,
        -0.5726,  0.1088, -0.0240,  0.1786, -0.0406,  0.2126, -0.1585, -0.0769,
        -0.0633,  0.0465, -0.1616,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7057, -0.7920, -0.2401, -0.0496, -0.0633, -0.0439,  0.0618,  0.0752,
        -0.1123, -0.1018, -0.4489,  0.0014, -0.0815, -0.3964, -0.0545, -0.1870,
        -0.0166, -0.1151,  0.0038, -0.1290,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2093, -1.4414, -0.1613, -0.3128, -0.1336, -0.1603, -0.0297, -0.1536,
        -0.1513, -0.1786, -0.0563,  0.0036,  0.0892, -0.1241, -0.2086, -0.1115,
        -0.2404,  0.0201, -0.0468, -0.1634,  0.0268,  0.2511,  0.3147,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0523, -0.7633, -0.1435, -0.0940,  0.1278, -0.0519, -0.0143, -0.0696,
        -0.2149, -0.4252, -0.1246, -0.3238, -0.0811, -0.4453,  0.1012, -0.0238,
         0.0565,  0.0856, -0.0280,  0.2021, -0.1461,  0.0457, -0.0429, -0.0846,
        -0.0479,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3958e-01,  1.4811e+00, -1.4688e-02,  6.2863e-02, -4.0571e-03,
         6.0239e-02, -9.1772e-02, -1.7864e-02, -5.3748e-02, -9.7594e-02,
         2.7761e-01,  5.0440e-02,  5.8234e-03,  6.3400e-02, -1.3411e-02,
        -2.2416e-02,  1.2405e-02,  9.9339e-04, -9.8616e-02,  1.1186e-01,
         2.1212e-01,  2.4996e-01,  1.8880e-01,  1.0910e-01,  5.1653e-02,
         6.0525e-02,  7.6152e-02, -7.1120e-02, -1.2363e-02, -9.6819e-02,
        -2.2281e-02, -1.1480e-01, -2.2652e-02, -5.7012e-03, -2.3820e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1141,  0.1165,  0.1334, -0.4916,  0.0624,  0.0265, -0.0171, -0.1047,
        -0.4773,  0.0676,  0.3106,  0.1670,  0.3009, -0.1258, -0.1346, -0.3105,
        -0.0131, -0.4665, -0.2387,  0.0226, -0.0307, -0.2890,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2110, -0.1776,  0.0556, -0.0767,  0.2007, -0.1158,  0.0214, -0.0580,
        -0.1002, -0.3526, -0.0080, -0.1213,  0.0270, -0.0812,  0.1089, -0.1712,
        -0.3130,  0.0770, -0.1586,  0.0993, -0.1171,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0802, -1.1047, -0.1651,  0.0778,  0.0045, -0.0329, -0.2629, -0.5890,
        -0.0636,  0.0761, -0.2793, -0.0868, -0.0487, -0.2600, -0.0888, -0.2239,
        -0.0464,  0.0526,  0.1817,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3936,  0.0041, -0.0623, -0.0112,  0.0071, -0.2562, -0.0509, -0.0189,
         0.0409, -0.0154, -0.0471, -0.0326,  0.0067, -0.0534,  0.0269,  0.0044,
        -0.0334, -0.1263,  0.0047, -0.0529, -0.0500, -0.1109, -0.0529,  0.0106,
        -0.0813, -0.1154,  0.0696, -0.1920, -0.1527, -0.4202, -0.0656, -0.1060,
        -0.0536, -0.1780, -0.0015, -0.0289,  0.0621,  0.0162,  0.0927, -0.0312,
         0.0517,  0.0019,  0.0160,  0.0156, -0.0027, -0.0042,  0.0396],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4331e-01,  1.2621e+00,  1.7799e-01,  4.1244e-02, -7.4223e-04,
        -1.0185e-02, -2.3074e-03, -1.6888e-01,  1.8924e-01, -9.5869e-02,
         3.1526e-02, -2.5882e-02, -4.5188e-02,  4.9754e-02,  9.9167e-02,
         4.6515e-04,  3.7868e-01,  1.2853e-02,  2.4183e-02,  8.4991e-04,
         1.3381e-02,  1.8526e-01,  3.0992e-01,  3.8199e-01,  4.6259e-03,
        -1.4263e-01, -1.0370e-01,  1.6104e-02,  7.8068e-02,  1.1487e-01,
         1.2343e-01, -6.6583e-02,  1.7169e-01,  1.0762e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0046, -1.1421, -0.0933,  0.0614, -0.0529, -0.1477, -0.0563, -0.0049,
        -0.0391, -0.0464,  0.0402, -0.0356, -0.0912, -0.3280,  0.0259,  0.0137,
         0.0032,  0.1172,  0.1399, -0.4347,  0.0495, -0.0624, -0.0642, -0.1065,
        -0.0120, -0.0201, -0.0524, -0.0693,  0.0067, -0.0585,  0.0159,  0.0580,
        -0.1113, -0.0625,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.4753, -1.0498, -0.1582,  0.0614, -0.0047, -0.3122,  0.1083,  0.1541,
         0.1086, -0.2919, -0.2474, -0.0747, -0.1208, -0.4978, -0.0396, -0.1325,
         0.1373,  0.0318,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3608,  0.0352,  0.2253, -0.1365,  0.1009, -0.1030,  0.1110,  0.0916,
         0.2492,  0.8735, -0.0466, -0.0090,  0.0219, -0.0062, -0.0020,  0.1095,
         0.0253,  0.0725,  0.1426,  0.4919,  0.0083,  0.0473,  0.1442, -0.0483,
         0.0238, -0.0658, -0.2910,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2100,  0.2038,  0.0537,  0.0101,  0.1589, -0.0851,  0.0861,  0.1689,
         0.4418,  0.0748,  0.0721, -0.0823,  0.9212,  0.3178, -0.0076, -0.0142,
        -0.0762,  0.0957, -0.0332,  0.3163,  0.0710,  0.1254,  0.1520, -0.0015,
         0.2210,  0.1983,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0380,  1.4038,  0.1348,  0.0313,  0.5163,  0.1143,  0.0751,  0.1606,
         0.0739,  0.1952,  0.1259,  0.1167, -0.0094, -0.0101, -0.0510,  0.3402,
         0.1057,  0.0409,  0.1376,  0.2279, -0.3760,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3524, -1.0667, -0.4859, -0.4375,  0.0632, -0.2535, -0.4621, -0.0785,
        -0.1622,  0.1103, -0.1135, -0.2500, -0.1933,  0.1936, -0.1111,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4506, -0.0492,  0.0777, -0.0081, -0.1524,  0.4220,  0.1979,  0.2781,
        -0.0486,  0.1093,  0.2107,  0.0890,  0.6699,  0.0988, -0.0170, -0.1069,
         0.5139,  0.2624,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2153, -0.9715, -0.2254, -0.3387, -0.0208, -0.0882, -0.3789, -0.2395,
        -0.0124, -0.4254, -0.3917, -0.2498,  0.1037, -0.2663, -0.0132,  0.0394,
        -0.1060, -0.0859,  0.3039,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1345, -0.3004, -0.1785, -0.0261, -0.0429, -0.0324,  0.1177,  0.0464,
        -0.0156, -0.0172, -0.0068,  0.0354, -0.0431,  0.0255,  0.0020, -0.0310,
        -0.2803, -0.0084, -0.0386,  0.0893, -0.1649, -0.0263, -0.2016, -0.0863,
        -0.1451, -0.1412, -0.1659,  0.0464, -0.0874, -0.0520, -0.0275, -0.1170,
        -0.0896,  0.0388,  0.0031, -0.0128, -0.1599,  0.0214, -0.0094, -0.0485,
         0.0354,  0.0718, -0.0916, -0.1469], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5521e-01, -8.9619e-01, -2.0097e-01, -1.1810e-01, -5.1388e-02,
         7.7491e-02, -1.7674e-01,  1.0638e-02, -2.6192e-01,  7.3882e-02,
         4.5060e-03,  7.4848e-02,  5.3119e-02,  4.0605e-02,  1.3658e-01,
        -5.4323e-02, -3.1984e-02, -3.5039e-01,  1.1017e-01, -4.6964e-02,
        -1.0407e-01, -1.9778e-04,  6.0772e-02, -8.0317e-02, -1.2294e-02,
         5.3111e-02,  6.9116e-02, -2.6993e-01,  1.1017e-01, -2.0046e-02,
        -7.1587e-02, -4.5602e-02,  2.1339e-02, -1.1300e-01,  1.4907e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1157, -1.1330, -0.1222, -0.1316, -0.0225, -0.0508, -0.3445, -0.1697,
         0.0930, -0.0028,  0.1128, -0.1041, -0.1561, -0.0151,  0.1218, -0.2045,
        -0.0450, -0.1240, -0.1648,  0.0323, -0.0798, -0.0388,  0.1219, -0.2127,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6668e-01, -7.5397e-01, -1.1383e-02, -1.7602e-01, -8.2385e-03,
         6.3498e-02,  6.2483e-03, -1.8066e-01, -4.2879e-01, -1.3346e-01,
        -3.1238e-02, -9.0075e-02, -3.4253e-04, -1.1258e-01, -4.4528e-01,
        -5.2270e-02,  5.2082e-02, -1.7401e-02, -4.7960e-01,  7.5565e-02,
         3.3154e-02, -9.5835e-02,  9.3448e-02, -7.2577e-02, -4.1003e-02,
        -1.8439e-01, -1.0001e-01, -1.2293e-01, -1.0735e-01,  2.4565e-02,
        -3.9500e-01, -9.1583e-02,  7.1575e-02, -1.4702e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0131e-01,  3.5510e-02,  9.9568e-02, -1.9087e-01,  7.4293e-02,
         3.8529e-03, -4.4810e-01, -1.3234e-01,  6.3809e-02, -6.7140e-02,
        -8.6440e-03,  4.0317e-02, -3.3763e-01, -5.3870e-02, -2.7699e-01,
        -4.7026e-01, -6.9654e-02, -2.6648e-02,  3.3897e-02, -7.9169e-03,
        -4.3940e-04,  9.6765e-03,  4.9871e-02, -1.7854e-01, -2.4865e-02,
        -3.9465e-03, -7.4280e-02, -1.4588e-01, -2.2695e-01, -8.3644e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.2186,  0.1454,  0.5353,  0.9503,  0.1128,  0.3174,  0.0191,  0.1648,
         0.1019,  0.4578,  0.1139, -0.2831, -0.2754,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0600, -0.0686,  0.3000,  0.2425, -0.0641, -0.1527,  0.0246, -0.2959,
        -0.4266, -0.0583,  0.0087, -0.2155, -0.0314, -0.1933, -0.2400, -0.0930,
         0.1288,  0.0953,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1213,  0.5048,  0.6775,  0.4324,  0.5387, -0.1786,  0.1549, -0.0273,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0593e-01, -1.2702e+00, -5.7109e-02,  1.3799e-01, -9.5882e-02,
        -2.0560e-01, -7.0225e-02, -1.8211e-01,  1.5941e-01,  1.7433e-01,
         2.5790e-02,  1.6375e-01, -6.5882e-02,  5.4387e-02, -3.6258e-02,
        -2.6487e-03, -7.3624e-03,  3.2631e-03, -2.0531e-02, -3.1046e-03,
        -5.4579e-03, -9.2218e-02, -2.3231e-01, -4.7748e-02,  2.1012e-03,
        -1.7577e-01,  3.4656e-03, -5.3678e-02, -3.6114e-01, -1.5977e-01,
        -3.5970e-02, -2.4331e-01, -1.5963e-01, -1.1998e-01, -2.0638e-02,
        -1.8200e-01, -1.4779e-01, -1.1636e-01,  7.0355e-02,  4.9315e-03,
        -1.5210e-02, -7.9486e-04, -3.8849e-02,  3.0101e-02,  1.2263e-01,
        -1.5291e-01, -8.1681e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4769, -0.6699, -0.8774, -0.6053, -0.2457,  0.0946,  0.1775,  0.0683,
        -0.2890,  0.0060, -0.0700,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2194,  1.0566,  0.2031,  0.0390,  0.0915,  0.3473,  0.3197,  0.6383,
         0.2404,  0.2166,  0.1146,  0.0679,  0.2650, -0.1021,  0.2080, -0.1189,
        -0.0184, -0.4535,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1321,  0.1226,  1.1763, -0.0821,  0.0421,  0.0134,  0.1510,  0.1045,
         0.1767,  0.1509,  0.0419,  0.1107, -0.0859, -0.0963, -0.5201,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1536, -1.5909, -0.1947, -0.0229, -0.1187, -0.1331, -0.4623, -0.0718,
        -0.0589, -0.1216, -0.3463, -0.0480,  0.0040, -0.0658,  0.1657,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0404, -0.0427, -0.0597,  0.0244,  0.1293, -0.0774, -0.0835, -0.2149,
        -0.2068, -0.0510, -0.0507, -0.0653, -0.3066,  0.1001, -0.1191, -0.2093,
        -0.1981, -0.0503,  0.0059,  0.0372, -0.1216, -0.3307, -0.0701,  0.1029,
        -0.0879, -0.0338,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3237,  0.1677,  0.2308,  0.0042,  0.0409, -0.0581, -0.0380,  0.6082,
         0.9804,  0.2584, -0.1315, -0.1225,  0.2878, -0.3038,  0.0431,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2480, -1.0178, -0.1219,  0.0188, -0.3854, -0.1018, -0.1952,  0.1044,
        -0.0794, -0.8246,  0.1110, -0.0141, -0.1537,  0.0337, -0.0625, -0.0129,
         0.0164,  0.1110, -0.2278, -0.1538, -0.1005,  0.1213,  0.1283,  0.1650,
        -0.0390,  0.4325,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0238, -0.0295,  0.0743, -0.0531, -0.1454, -0.1330,  0.0482, -0.0596,
        -0.0490, -0.1542, -0.7062, -0.0580, -0.1053, -0.2402, -0.1995, -0.1471,
         0.0361, -0.1279,  0.1211, -0.1653, -0.0322, -0.1063, -0.0710, -0.0505,
         0.1550, -0.1182,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.2651, -1.2581, -0.0253,  0.0272,  0.0152,  0.1394, -0.0407,  0.0464,
        -0.0730, -0.1889, -0.1481, -0.0863, -0.2596, -0.0413, -0.0880,  0.0349,
        -0.0329, -0.0553, -0.1864,  0.0252, -0.0247, -0.1146, -0.0735, -0.2301,
        -0.2930, -0.0760, -0.1136,  0.0036, -0.0078,  0.0085, -0.0682, -0.1363,
         0.1165,  0.0557, -0.1798,  0.2244,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1400, -0.3726, -0.2508, -0.0667, -0.1705, -0.1047, -0.1912, -0.2741,
        -0.0095,  0.1242, -0.0359, -0.0124,  0.0252,  0.0718, -0.0296, -0.0219,
         0.0155, -0.0129,  0.0253,  0.0526, -0.0096, -0.1085, -0.0840, -0.1680,
        -0.1305, -0.1906, -0.0336, -0.0707, -0.1885,  0.0589,  0.0183, -0.1142,
        -0.1919,  0.0412,  0.0096,  0.1192,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4779,  0.9610, -0.0471, -0.0030, -0.0898,  0.0640,  0.1267, -0.2121,
        -0.0242, -0.1379,  0.0955,  0.0030,  0.1327, -0.0377, -0.3608, -0.1441,
         0.1226,  0.0839,  0.0023,  0.2687,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0626e+00, -2.8785e-01,  1.3734e-02, -9.2854e-02, -1.7297e-02,
         2.8999e-03, -2.2855e-01, -1.9656e-01, -3.7981e-02, -1.3303e-01,
         7.4517e-03, -1.3814e-01, -3.8412e-02, -6.3002e-02, -3.6443e-01,
        -1.3757e-01, -5.9445e-02, -1.6805e-01,  2.1671e-03, -2.4044e-03,
         7.8402e-02, -4.2875e-02,  1.5980e-02,  1.4155e-02, -2.6683e-01,
        -8.0297e-04,  3.6952e-02, -1.8896e-02,  3.8366e-02, -2.2393e-01,
        -7.3317e-02, -1.8684e-01, -4.0156e-02, -2.5914e-01,  2.7168e-02,
         6.0681e-02, -1.7429e-02, -1.9307e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3692, -0.2564,  0.0774, -0.2244, -0.0447, -0.0325, -0.0107, -0.0098,
        -0.1126, -0.0334, -0.1720, -0.4029,  0.0576,  0.0304,  0.2149, -0.3727,
        -0.0667, -0.3323, -0.0995, -0.0120, -0.0309,  0.0209,  0.0233,  0.0551,
         0.0641, -0.0606, -0.0128, -0.0373, -0.2672,  0.0235, -0.0277, -0.0117,
         0.1267,  0.0178, -0.0962,  0.0351, -0.0418,  0.0508, -0.0237,  0.0789,
         0.0140], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3370, -1.6848, -0.0311, -0.1803, -0.0902, -0.0781, -0.1297, -0.1223,
        -0.1158, -0.0317, -0.1013, -0.0801, -0.1950, -0.2692,  0.0882, -0.2816,
        -0.1334, -0.2012, -0.0329, -0.1209, -0.0188, -0.1315,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2213,  0.0140,  0.0261, -0.0191,  0.0170, -0.1150, -0.0682,  0.0098,
        -0.0706,  0.0133,  0.0128, -0.1879, -0.0603, -0.0702,  0.0513, -0.1436,
        -0.0071, -0.0797, -0.0264, -0.1651, -0.4383,  0.0783, -0.0281, -0.0820,
        -0.0404, -0.0029,  0.0126, -0.0419, -0.2696,  0.0210, -0.0244, -0.0484,
        -0.0983,  0.0258,  0.0461,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5495,  0.1663,  0.3144,  0.3547,  0.1725,  0.1058,  0.0400, -0.0730,
         0.3162,  0.0516,  0.1267,  0.0814,  0.0787,  0.0511, -0.0423,  0.2016,
        -0.1986,  0.2336,  0.3600,  0.0219, -0.0141,  0.0676, -0.0225,  0.1095,
         0.1011,  0.0357,  0.0873, -0.0347, -0.5336,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3762, -2.1256, -0.6105, -0.5539, -0.2645, -0.4581, -0.1626,  0.2658,
         0.2474,  0.0116, -0.0642, -0.0091, -0.0173, -0.0869,  0.0081, -0.4077,
        -0.0960,  0.1304,  0.0095, -0.0938,  0.3576, -0.1766, -0.1728, -0.0324,
         0.3672,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1816,  0.2129, -0.1900, -0.0356, -0.6642,  0.0511, -0.1514, -0.3008,
        -0.2564,  0.0815,  0.1594,  0.1019, -0.0227,  0.1797, -0.2815, -0.5479,
        -0.1183, -0.0831, -0.1236, -0.0812,  0.0292, -0.4519,  0.3501,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4258,  0.0193,  0.0280,  0.1039,  0.1764, -0.0288,  0.0252, -0.0350,
        -0.0377, -0.0049,  0.0009, -0.0414, -0.0908, -0.8652,  0.0055, -0.1360,
        -0.0125, -0.1018,  0.0217,  0.0377,  0.0398,  0.0581, -0.0503,  0.1055,
        -0.1071, -0.1191, -0.4278, -0.0510, -0.1805, -0.0861, -0.0749, -0.1207,
        -0.2173, -0.0724, -0.0829,  0.2238, -0.1791,  0.3217, -0.2595,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1195,  0.0261,  0.4204, -0.4081, -0.9986, -0.0391, -0.0143, -0.1465,
        -0.4484,  0.3283,  0.2506,  0.1786,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 5.8864e-01, -2.7741e-01, -8.5627e-01, -2.8537e-02, -7.8431e-03,
        -1.7130e-02, -7.8951e-02,  1.6813e-02,  9.3939e-03, -2.7086e-02,
         8.4286e-03, -2.4060e-03, -6.5197e-03,  2.5393e-02, -3.2126e-02,
         1.6205e-03,  1.2953e-02, -1.4918e-01,  2.5270e-03, -3.5109e-02,
         1.3965e-03, -1.3318e-01, -2.1452e-02, -2.4363e-02, -1.4538e-02,
         2.1213e-03,  1.2371e-01,  5.5982e-02, -2.7406e-01,  6.7385e-02,
        -8.4211e-04,  2.8145e-02, -1.2034e-01, -6.1913e-02, -5.8249e-02,
        -4.6527e-02, -9.3830e-02, -2.8260e-02, -3.7700e-03, -7.1205e-03,
         1.6499e-02,  2.6199e-02,  1.7908e-02, -1.5538e-02,  8.7912e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7114, -1.5314, -0.1994, -0.2946,  0.3577,  0.2708,  0.4874, -0.1397,
        -0.0017,  0.1462, -0.1819, -0.3583,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0339,  1.9893,  0.1396,  0.3223,  0.3740,  0.3954,  0.2510, -0.1066,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0795, -0.3643, -0.1424,  0.0008, -0.0851, -0.0456, -0.0482, -0.0616,
        -0.0269, -0.0878, -0.0840, -0.0689,  0.0017, -0.0260, -0.0373,  0.0052,
         0.0138, -0.1630, -0.1243, -0.0805, -0.0539,  0.0063, -0.0394, -0.0691,
        -0.0338, -0.0411,  0.0341, -0.0607, -0.1151,  0.1100, -0.0618, -0.0293,
         0.0008,  0.0029,  0.0361, -0.0282,  0.0271,  0.0343, -0.0106,  0.0391,
        -0.0305, -0.0483, -0.0493, -0.0066,  0.0583,  0.1164, -0.2851, -0.1028,
        -0.3999, -0.2012, -0.0172,  0.0089, -0.1034, -0.0556, -0.0605,  0.0310,
        -0.0414,  0.0169, -0.0777, -0.0317,  0.1384], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2614, -0.1717,  0.0606, -0.0992, -0.3278,  0.1900, -0.0812, -0.0735,
        -0.0583,  0.0257,  0.0124, -0.0209, -0.0063, -0.0536, -0.3013,  0.0084,
         0.0119,  0.0071, -0.1365, -0.1014, -0.2290, -0.1988, -0.0974, -0.0277,
        -0.1620, -0.0893, -0.0587, -0.0358, -0.0716, -0.0887,  0.0332, -0.0714,
         0.0265,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3049, -0.4898, -0.7591, -0.3188, -0.0811, -0.0856, -0.3300, -0.0331,
         0.0132, -0.1423,  0.0336, -0.0392, -0.0684, -0.0219, -0.0662,  0.1539,
        -0.0443, -0.2096, -0.8554, -0.0252,  0.2102,  0.0473,  0.2108, -0.0304,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2453, -0.2657, -0.1433, -0.0938, -0.0993, -0.5389, -0.0476, -0.1079,
         0.0842, -0.2974, -0.2875, -0.3183, -0.1225, -0.0176, -0.0976, -0.0178,
         0.0774, -0.0508,  0.1705,  0.0714, -0.0356,  0.0535,  0.3937, -0.0734,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0089, -0.1086, -0.1560, -0.1326, -0.0067, -0.0571,  0.0704, -0.3136,
        -0.0850,  0.0497,  0.0814, -0.0043,  0.0103,  0.0084, -0.0839, -0.2517,
        -0.1694,  0.0279,  0.0998, -0.0653, -0.1599,  0.0876, -0.1127, -0.3194,
        -0.0150, -0.0888, -0.1989, -0.0748,  0.0574,  0.0232, -0.0588,  0.0096,
        -0.3047,  0.1313,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0725,  0.0513,  0.1314,  0.0915, -0.2656,  0.1594,  0.0543,  0.0929,
        -0.0526,  0.0447, -0.1600, -0.0547,  0.0813, -0.6952, -0.1090,  0.0110,
        -0.3625,  0.1256, -0.1097, -0.1506, -0.0965, -0.4300,  0.0261,  0.0194,
         0.1489,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0810, -0.0286, -0.0544, -0.0297, -0.1691,  0.0128, -0.3628, -0.2378,
        -0.2656, -0.9574,  0.3149, -0.0892, -0.1088,  0.1470, -0.1671, -0.0670,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6710,  0.8493,  0.0678,  0.4936,  0.1491,  0.1422,  2.4134,  0.1155,
        -0.1828, -0.1603, -0.0204, -0.0141,  0.2937,  0.1114,  0.1577, -0.0774,
         0.0635,  0.1498,  0.0853, -0.0298, -0.0474, -0.2083, -0.3378,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3570,  0.4511,  0.7193,  0.6965,  0.8911,  0.1031,  0.0609, -0.1496,
         0.1419, -0.2254, -0.0576, -0.2510, -0.4847, -0.0949,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.1710, -0.1690, -0.2269, -0.5987,  0.0240, -0.0282, -0.1053,  0.0649,
        -0.1765, -0.1450, -0.2882, -0.2255, -0.2226, -0.0430,  0.0108, -0.0290,
        -0.2073, -0.6018, -0.3131, -0.4321, -0.1609,  0.0450, -0.1246, -0.0225,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2343e-01, -2.8628e-01, -3.4231e-01, -6.7492e-02,  1.1524e-01,
        -2.0537e-01, -7.1158e-01, -2.4471e-01, -2.2071e-01,  5.3450e-04,
        -8.0418e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0861,  0.0818, -0.0017, -0.0670, -0.0673, -0.1554, -0.3578,  0.0289,
        -0.2685, -0.1023, -0.1277,  0.1168, -0.0673, -0.0548, -0.0600,  0.0399,
        -0.0814, -0.1730,  0.1196, -0.0512,  0.2084,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4911, -0.3106, -0.2973, -0.0665, -0.2755, -0.1953, -0.2181, -0.9647,
        -0.1042, -0.5855, -0.3610,  0.0435,  0.0252, -0.0120,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3349,  0.1420, -0.0315, -0.0526, -0.0547, -0.0642, -0.0313, -0.2586,
        -0.0740,  0.1135,  0.0274,  0.2970,  0.8701,  0.1033,  0.1222,  0.3608,
        -0.4481,  0.0838, -0.0804, -0.0762,  0.3579,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2187, -1.1651,  0.0033, -0.0844,  0.1559, -0.5554,  0.0369, -0.6571,
        -0.0961, -0.0996, -0.1624,  0.2486,  0.1959,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0305, -0.1412,  0.0559, -0.0016,  0.0408,  0.0378, -0.0988, -0.1122,
         0.0941,  0.0051,  0.0519, -0.1153,  0.0895,  0.0028, -0.0637, -0.0190,
         0.0612,  0.0419,  0.0783, -0.0508,  0.0702,  0.0813,  0.1208,  0.0706,
        -0.4626,  0.0307, -0.6188, -0.3234, -0.2881,  0.0412, -0.1382, -0.1879,
         0.0728,  0.0557,  0.0754, -0.0713, -0.0644, -0.0319,  0.0334, -0.0787,
        -0.0283,  0.0531,  0.0437, -0.3135,  0.0677], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0425, -0.1096, -0.1649, -0.0479,  0.0941,  0.0288, -0.0226, -0.0419,
        -0.0122, -0.0174, -0.1821,  0.0120, -0.0138,  0.1183, -0.2999, -0.1056,
        -0.5401, -0.1710, -0.0675, -0.0433, -0.0343, -0.2305, -0.0048, -0.2302,
        -0.0504,  0.0350, -0.0500,  0.0208, -0.1403, -0.0756,  0.0009,  0.0263,
        -0.0263,  0.2328,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5771,  0.6028,  0.3387,  0.3448,  0.1797,  0.1457,  0.1120,  0.0361,
         0.0753,  0.0961,  0.0456,  0.0229,  0.0220,  0.0966,  0.0069, -0.0337,
        -0.0067,  0.1008,  0.0473,  0.5920,  1.1363, -0.0500,  0.1344, -0.0602,
        -0.1017,  0.0077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2590, -0.9013, -0.0365, -0.1837,  0.1322,  0.0032, -0.0927, -0.4026,
        -0.1860, -0.1430, -0.0779,  0.0313, -0.0523,  0.0103,  0.1162, -0.2442,
        -0.5122, -0.0196, -0.0702, -0.0062, -0.1653,  0.1412,  0.0864,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2617, -0.3496,  0.0724, -0.0449, -0.0011,  0.0020, -0.0692, -0.0671,
        -0.2042, -0.0114,  0.0640, -0.0336, -0.0051,  0.0527, -0.2966,  0.1697,
        -0.0079, -0.1669, -0.2876, -0.2201, -0.0553, -0.0016, -0.0112,  0.1652,
        -0.0153, -0.0421, -0.0816, -0.3784,  0.0138, -0.0250,  0.0710, -0.0170,
         0.0439,  0.0180, -0.0074, -0.0990,  0.0296,  0.0049, -0.1719,  0.3014,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0552, -0.2417, -0.1518, -0.0501, -0.1539,  0.0048, -0.0030, -0.1620,
         0.0362, -0.0205, -0.0598,  0.0077, -0.1430, -0.0707, -0.1451, -0.1448,
        -0.1125, -0.0383, -0.0341, -0.0808,  0.0101, -0.1270, -0.1030, -0.0220,
        -0.0479,  0.0079,  0.0266, -0.1606, -0.0342, -0.0183, -0.1331, -0.0220,
        -0.2355,  0.0828,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.8709e-01,  2.3331e-02, -9.2804e-04,  2.4578e-02, -1.8758e-01,
         1.3394e-01,  2.8985e-02, -1.1445e-04,  1.2625e-02,  2.5652e-02,
        -2.6059e-02, -3.4585e-02,  6.4001e-02, -7.1110e-02, -5.7498e-01,
         2.3744e-02, -5.4595e-02, -5.0670e-02, -1.9101e-01,  2.0764e-02,
        -6.0253e-02,  1.3996e-02,  1.3583e-02,  5.8982e-02,  2.1468e-02,
        -2.2063e-01,  1.4298e-02, -4.7149e-02, -2.2403e-02, -4.4901e-02,
        -4.8360e-02, -1.9263e-02,  2.0436e-02, -4.5799e-02, -1.2274e-01,
        -3.3297e-02, -1.3882e-02, -1.0624e-01, -7.3793e-02, -1.3812e-01,
        -1.1876e-01, -4.7051e-02, -8.8692e-02, -5.4314e-02, -5.1322e-02,
         9.2560e-03,  5.2651e-02,  3.8001e-02, -2.9731e-02,  1.5231e-02,
        -8.4999e-02, -8.0543e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4385, -0.1193, -0.2495, -0.1426, -0.3421, -0.1428,  0.0602,  0.0957,
         0.0141, -0.0876,  0.0164,  0.1119, -0.0097, -0.2531, -0.0413, -0.2798,
        -0.3717, -0.1179, -0.1288, -0.0461, -0.0970, -0.0699, -0.1031, -0.2386,
         0.0011,  0.0150,  0.0250,  0.0247, -0.0069,  0.0436,  0.1013,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7309, -0.3974,  0.2448,  0.1228, -0.1157,  0.0668, -0.0593,  0.0873,
        -0.1837,  0.0744, -0.1358, -0.0841,  0.0938, -0.0514, -0.1194, -0.3019,
         0.0702, -0.1622, -0.3020, -0.3409, -0.1191, -0.0349, -0.1468,  0.1606,
        -0.2302,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1138,  0.0109,  0.1388, -0.0276,  0.2561,  0.4541,  0.5832,  0.2317,
         0.1918,  0.2329,  0.1511, -0.1711,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2718, -0.8482,  0.0080, -0.3013, -0.2814, -0.0093,  0.0424, -0.0146,
         0.0498, -0.0576,  0.0174, -0.0783, -0.1515, -0.3674,  0.1258, -0.1162,
         0.0289, -0.0194,  0.0601,  0.0594, -0.0879,  0.0437, -0.0681, -0.0403,
         0.0929, -0.3765,  0.0212,  0.0097, -0.1199,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0922, -1.1722,  0.2222,  0.2264, -0.0207, -0.3381, -0.7257,  0.2443,
        -0.1366,  0.2376,  0.2170, -0.1847,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0773, -1.0902,  0.0731, -0.2383, -0.1120, -0.2601, -0.7094, -0.0311,
        -0.3082, -0.0974, -0.2461, -0.2173,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0395,  0.2281, -0.4979, -0.0161, -0.3307,  0.0615, -0.1085, -0.3441,
        -0.4533, -0.0418, -0.0439, -0.2466, -0.1003,  0.0560,  0.1641, -0.1994,
        -0.0797, -0.0281,  0.3670, -0.0187,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1037, -1.0950, -0.0632, -0.0761, -0.7021, -0.2072, -0.1464, -0.0097,
        -0.0705,  0.0089, -0.0058, -0.1870, -0.1571, -0.0415,  0.0938,  0.0427,
        -0.1041, -0.0455, -0.0266, -0.0799, -0.1030,  0.0107, -0.0459,  0.0670,
         0.0637,  0.1018,  0.0484, -0.0929, -0.0406,  0.0297,  0.0155, -0.0181,
        -0.1207, -0.3002, -0.0216,  0.1494, -0.0586,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1351,  2.4477, -0.0390, -0.2146,  0.1912, -0.1083, -0.1159,  0.2158,
         0.1026,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0875,  0.0661,  0.2766, -0.3520, -0.0782, -0.0284, -0.1609, -0.2234,
        -0.0186, -0.0069,  0.0260,  0.0369, -0.1999, -0.2109,  0.0016, -0.1221,
        -0.0811,  0.0623, -0.1378, -0.1901,  0.0520,  0.0386, -0.0218,  0.1088,
        -0.0109,  0.0522,  0.1071,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2674,  1.4941, -0.0855,  0.1874,  0.0728,  0.0989, -0.0563, -0.0475,
        -0.0084,  0.0317,  0.0959,  0.0256,  0.0535, -0.1109,  0.0388, -0.1436,
         0.3317, -0.0224,  0.0895,  0.0315, -0.1034,  0.2803,  0.1477,  0.0317,
        -0.0089, -0.0360,  0.0566,  0.0865,  0.3459,  0.0662,  0.0473, -0.1995,
         0.2155,  0.3607,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.8507e-01,  2.4660e-02,  5.6187e-03,  1.5448e-02, -8.3757e-02,
        -4.5835e-02, -1.2987e-02,  2.1213e-04,  5.4722e-02,  5.6011e-02,
        -4.7271e-02, -2.7343e-01, -1.3934e-01, -4.3466e-02,  5.7325e-04,
        -2.2520e-02, -2.4019e-01,  2.2266e-01, -4.5755e-03, -1.8868e-02,
         4.6601e-02,  1.0559e-02, -1.9117e-01, -1.0341e-01, -7.9110e-02,
         1.7405e-02, -2.5209e-02, -2.0793e-02,  2.7440e-02,  9.7407e-04,
         2.5262e-02, -1.8966e-01, -4.4917e-01, -1.8339e-02,  2.5057e-02,
        -8.4053e-02, -2.1474e-01,  2.3528e-02,  2.9476e-02,  2.5376e-02,
         1.8588e-02, -1.3832e-01,  6.0016e-02,  1.4361e-01,  4.1874e-01,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2295, -0.0157, -0.0659, -0.0699, -0.1890, -0.3065, -0.5254, -0.1342,
        -0.1038, -0.2966, -0.1095, -0.1385, -0.2719,  0.1408, -0.1112,  0.1850,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1005,  0.1202,  0.3144,  0.2495, -0.1469,  0.3627, -0.0821,  0.4264,
         0.4784,  0.3542,  0.2709,  0.0866,  0.1378,  0.2702,  0.1715, -0.0456,
         0.0690, -0.0649,  0.0556,  0.0885, -0.0477, -0.0332,  0.1464,  0.0783,
         0.1852, -0.3979, -0.1718,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0713,  0.0254, -0.1354, -0.0324, -0.0020, -0.0432, -0.1578, -0.0088,
        -0.1234, -0.0220,  0.0077, -0.0004, -0.0251, -0.0100, -0.0454, -0.0361,
         0.0110,  0.0337,  0.1086, -0.0986,  0.0151, -0.0844, -0.0092, -0.1984,
         0.0026,  0.0109, -0.1307, -0.1894,  0.0036, -0.2598, -0.0131, -0.1288,
        -0.1300, -0.0491, -0.0379, -0.0871, -0.0257, -0.1096, -0.0006,  0.0204,
        -0.0076, -0.0772, -0.0068, -0.0466, -0.1806,  0.0641], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5992,  0.0208, -0.0173, -0.0432, -0.0092, -0.1690, -0.0172, -0.2526,
         0.0384,  0.0759, -0.0007,  0.2034,  0.0456, -0.0131, -0.0611,  0.0358,
        -0.1124, -0.0796, -0.2191, -0.6361, -0.0965, -0.0761,  0.0232, -0.1284,
         0.0232, -0.1359, -0.4321, -0.3115,  0.1372, -0.0677,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5092,  0.3215, -1.1123, -0.1071,  0.2968,  0.5045, -0.0158,  0.8388,
         0.3983, -0.2930,  0.1461,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5385, -0.8992, -0.2560, -0.0356, -0.1630, -0.0652, -0.1712, -0.6692,
         0.0837, -0.2370,  0.0388, -0.1367, -0.0685, -0.0095,  0.0329, -0.2132,
        -0.0247, -0.1242, -0.0445, -0.0036,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7207,  2.8621,  0.5887,  0.5847, -0.2351,  0.4744,  0.4467,  0.0241,
         0.1071,  0.1407, -0.1419,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0848,  0.9091,  0.4638,  0.4357,  0.1540,  0.1422,  0.0116,  0.0617,
         0.2016, -0.0669,  0.0887, -0.0913, -0.0080,  0.1752,  0.0414,  0.0925,
        -0.0095,  0.0048, -0.0233,  0.1721, -0.2381,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9477,  0.5825,  0.3474,  0.1861, -0.1921,  1.2304, -0.3258, -0.1775,
         0.2413, -0.0887, -0.0568,  0.0503, -0.1052, -0.0534,  0.0865,  0.1987,
        -0.0679, -0.0717,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2314e-01, -2.6757e-01, -5.6306e-01, -1.7502e-01, -1.4566e-01,
        -7.2210e-03,  2.8151e-03,  2.1218e-02, -3.3153e-02, -2.1963e-02,
         6.8009e-02,  1.5770e-03, -4.6434e-04, -1.7194e-01, -1.5256e-01,
        -4.4936e-02, -9.0674e-02, -1.5006e-01, -1.3051e-01, -1.0937e-01,
        -7.8470e-02, -1.2011e-01, -2.1460e-01, -2.1480e-02, -7.9422e-02,
         3.8794e-02, -1.0579e-01, -7.2940e-02, -3.9887e-02, -1.7200e-01,
        -9.3738e-03, -3.6193e-02, -2.1123e-02,  1.3757e-02, -4.7089e-02,
        -1.6627e-02, -2.1348e-02, -1.2910e-02,  1.2269e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7715e-01, -1.3364e+00, -5.8065e-02, -7.2339e-02, -1.0114e-01,
         1.8684e-04, -1.8964e-02, -2.9481e-02,  1.4411e-01, -1.8694e-02,
        -7.0943e-02, -1.5204e-01, -1.8588e-03, -3.5648e-02,  5.0138e-02,
        -1.7431e-01, -3.0589e-01, -9.7694e-02, -2.4974e-02, -6.5095e-02,
        -5.9590e-02, -8.2883e-02,  6.6392e-02,  8.6299e-03, -1.8120e-02,
         1.2954e-02, -7.6170e-02,  3.5757e-02, -2.0323e-01, -2.2485e-01,
        -1.0732e-03, -4.1024e-02, -2.1519e-01,  1.7438e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.8044,  1.0845,  0.7471,  0.3598,  0.0125,  0.3986,  0.1069,  0.0440,
         0.1132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0303,  0.9906,  0.1741,  0.2973,  0.1940,  0.6284,  0.0708,  0.1478,
        -0.0644,  0.0814,  0.1743,  0.0473, -0.1471, -0.0187,  0.1011,  0.1839,
         0.0089,  0.4197, -0.0131,  0.1135,  0.0813, -0.0519,  0.0205,  0.0425,
         0.1708,  0.0276,  0.0811,  0.0231, -0.1279, -0.0158, -0.0106,  0.0629,
        -0.0308,  0.0320, -0.0237, -0.1062, -0.0868, -0.1029,  0.0834,  0.3916,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0956e-02, -2.3508e-01, -6.0632e-01, -8.1434e-02, -6.9290e-06,
         8.0832e-02, -2.8764e-02, -1.6190e-02, -5.6094e-02, -2.2076e-01,
        -6.2803e-03,  3.3366e-02,  1.8796e-02,  2.9619e-02,  4.7005e-02,
         1.2987e-02,  3.3853e-03,  5.0632e-02, -1.7514e-01,  3.5963e-02,
        -1.9934e-02, -2.5462e-02,  6.0377e-02, -7.5348e-02,  4.4222e-02,
        -4.4247e-02, -2.3878e-02,  2.1851e-02, -2.4298e-01, -2.2383e-01,
        -4.3445e-02, -1.4846e-01,  2.9094e-02, -2.3690e-02,  3.0694e-02,
         6.1346e-02, -8.6948e-02, -1.9321e-01,  3.0661e-02, -9.9064e-02,
         1.3195e-01, -2.1975e-02,  1.8625e-02,  2.5879e-02,  5.1441e-03,
         2.0709e-02,  3.8077e-02,  1.3411e-02,  4.6572e-02, -1.1954e-01,
        -1.3369e-01, -1.3409e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6535, -1.1237, -0.2299, -0.0425,  0.3062,  0.0107, -0.2253,  0.0176,
        -0.1249, -0.0343,  0.0739, -0.1869, -0.3959, -0.0255, -0.1652, -0.1234,
        -0.2626, -0.2463, -0.3245, -0.0153,  0.3669, -0.2076,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2532e-04,  2.2992e+00,  2.9174e-01,  1.5930e-02,  9.5554e-02,
        -3.4952e-03,  5.5906e-02,  2.3619e-01,  2.3580e-03, -8.1032e-03,
         2.3086e-01,  1.2742e-01,  1.1557e-01,  6.3181e-02,  1.1807e-01,
         2.1772e-01, -5.2327e-02, -3.1535e-03,  7.3481e-02, -2.4207e-01,
        -1.5364e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0176, -0.2426, -0.0764, -0.0828,  0.1211, -0.0087, -0.0847, -0.0495,
        -0.0557, -0.0066, -0.0152,  0.0376,  0.0078, -0.0054,  0.0101,  0.0052,
         0.0243,  0.0231, -0.0605, -0.0217, -0.0055,  0.0284,  0.0207,  0.0111,
         0.0278,  0.0261, -0.1116, -0.1036, -0.0382, -0.0443, -0.3156, -0.2325,
        -0.0612, -0.0716, -0.0550,  0.0035,  0.0537,  0.0159, -0.0060,  0.0315,
        -0.5160, -0.0201, -0.0525, -0.1055, -0.1690, -0.0201, -0.0914, -0.0901,
         0.0255,  0.1089, -0.0887, -0.0061,  0.0013, -0.0184, -0.2305],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0974, -0.0578,  0.0971, -0.0561, -0.0263, -0.0693, -0.1771,  0.0186,
         0.0598, -0.0110,  0.0804, -0.0151, -0.0334, -0.1099, -0.0357, -0.0550,
         0.0193, -0.0378, -0.0271, -0.0668, -0.1620, -0.0817, -0.1159, -0.0081,
         0.1098, -0.0073,  0.0144, -0.3858,  0.0069, -0.0699, -0.0402, -0.0751,
        -0.1013, -0.2384,  0.1480, -0.1154, -0.0478, -0.1545, -0.1175, -0.1713,
        -0.0304,  0.0251,  0.0033,  0.0692,  0.0163,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6843,  0.5898,  0.4019,  0.3139, -0.0300,  0.0636,  0.1156,  0.1366,
         0.2317,  0.0156,  0.1092,  0.2198,  0.0889,  0.2177,  0.2135, -0.3018,
         0.1290,  0.4734,  0.0678,  0.0398,  0.3230, -0.0315,  0.2455, -0.0191,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0116,  0.1478, -0.0424,  0.0443, -0.0632,  0.0245, -0.0895, -0.0989,
         0.0386,  0.0427, -0.0042,  0.0112, -0.0350, -0.1887, -0.0486,  0.1134,
        -0.2634, -0.1401, -0.1381, -0.1378, -0.0370, -0.0105, -0.1616, -0.1221,
         0.1747, -0.1761, -0.1215, -0.0792, -0.1374, -0.2213, -0.0813, -0.0449,
        -0.1440,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3317, -2.1316, -0.1711, -0.1709, -0.0353,  0.0097, -0.0306, -0.0478,
        -0.5648, -0.0586,  0.1328, -0.0425,  0.1350, -0.0635,  0.3772, -0.0268,
        -0.0190,  0.1155, -0.0055,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5323, -0.1004,  0.1163, -0.2029, -0.4298,  0.0475, -0.2405, -0.1846,
        -0.3947, -0.0899, -0.0411, -0.1107, -0.0475, -0.1375, -0.1188,  0.0499,
        -0.0729, -0.2413, -0.0457, -0.0577, -0.1462, -0.5148, -0.0767,  0.0463,
         0.0988,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7455, -0.9655, -0.2920, -0.1128, -0.2453, -0.2825, -0.6951,  0.1234,
        -0.0235, -0.1033, -0.0501, -0.0155, -0.1343,  0.0072, -0.0505,  0.5106,
         0.3357,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.1532,  0.0998, -0.1734,  0.1451, -0.1200, -0.0277,  0.0493, -0.1995,
        -0.1262, -0.2550, -0.0345, -0.1635,  0.0266, -0.0318, -0.0789, -0.0539,
        -0.1263, -0.4360, -0.0614, -0.2303,  0.0745, -0.4315, -0.4047, -0.0267,
        -0.0980, -0.1297, -0.0752, -0.2057, -0.0877, -0.0743,  0.0515, -0.0938,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1667,  0.5702,  0.4414,  0.1038, -0.1015,  0.0956,  0.2630,  0.3506,
         0.0598,  0.0046, -0.0453,  0.1492,  0.0044,  0.0415,  0.2337, -0.0835,
         0.0066,  0.0384,  0.2025,  0.0111,  0.1382, -0.0940,  0.1795,  0.1347,
         0.1915,  0.0413,  0.0792, -0.0115,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2192,  0.2233, -0.0180, -0.0426,  0.1252,  0.1861,  0.3710,  0.0057,
         0.1995,  0.1829,  0.1240,  0.3233,  0.0865,  0.0508,  0.0614,  0.3141,
         0.0895,  0.1179,  0.1538, -0.0283,  0.0730,  0.0489,  0.0417,  0.1160,
         0.1789, -0.1141,  0.0102,  0.2496, -0.2400,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1966, -0.1595, -0.0905, -0.1842, -0.5050, -0.0651,  0.1167,  0.0052,
        -0.0076,  0.1224,  0.0204, -0.0783, -0.1363, -0.3947,  0.0765, -0.1553,
        -0.1038, -0.5368, -0.0700, -0.1801,  0.0364,  0.0534, -0.1679, -0.1071,
        -0.0634, -0.0398, -0.0462, -0.0606, -0.0398,  0.1388,  0.0305,  0.2072,
        -0.1313,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2740, -0.9719, -0.1228, -0.0896, -0.1676, -0.2693,  0.1401, -0.2843,
        -0.0292, -0.2154, -0.1572,  0.0054, -0.0376, -0.0687, -0.0615,  0.0013,
        -0.2027, -0.0601, -0.8075,  0.1288,  0.0725, -0.0582,  0.0922, -0.0923,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2432, -0.7540, -0.3634, -0.0545, -0.0288,  0.3204, -0.1508, -0.2752,
         0.0233, -0.1400, -0.1586, -0.3078, -0.1195, -0.0812,  0.0008, -0.0385,
         0.0177, -0.1281, -0.0203, -0.4728, -0.1168, -0.0150, -0.1447,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6922e-02, -8.7122e-01, -1.0960e-01,  1.2453e-01, -6.4853e-02,
        -1.9699e-01,  2.8770e-02, -5.3422e-03,  5.9504e-02, -1.8728e-01,
         1.6645e-01, -4.2115e-02, -7.7169e-02,  6.4439e-03, -2.5119e-02,
        -2.7864e-01, -2.5774e-01, -1.9259e-01, -1.2132e-01, -6.9554e-02,
        -5.6282e-03, -1.4476e-01, -3.7852e-02,  4.2895e-02, -3.0125e-01,
        -9.3699e-02, -4.3925e-02, -6.7051e-02, -3.7359e-03, -2.6484e-01,
        -1.8506e-01, -7.9022e-02,  1.1272e-02, -2.6935e-02, -1.0115e-01,
        -1.2650e-01, -1.0854e-01, -1.6108e-01,  3.0635e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4777e-02, -3.4382e-01, -1.2857e-01, -1.8347e-02, -5.0298e-01,
        -7.6232e-03, -8.3771e-04, -7.0348e-04,  1.5800e-02,  7.3020e-02,
        -1.5510e-02, -2.5710e-02,  1.2266e-04,  1.7622e-02,  1.6130e-02,
        -2.5954e-01, -4.7222e-02,  1.6237e-02, -8.7991e-03, -3.1900e-02,
        -8.5429e-02,  9.7647e-03, -6.1276e-03,  1.1839e-02, -2.7347e-01,
        -2.1154e-01, -6.7821e-02, -6.8388e-03, -2.7198e-02, -1.5476e-01,
         4.4781e-03,  4.0855e-03, -2.3891e-02, -2.6005e-02, -3.0274e-02,
        -3.3776e-02, -5.6275e-03, -2.0704e-01,  1.7123e-02,  1.0573e-02,
        -5.6699e-02,  5.9674e-03,  1.2892e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.8044e-02, -7.8782e-01,  8.4297e-02, -1.0530e-02, -4.2961e-02,
        -8.4362e-02, -1.7525e-02,  7.6529e-02, -2.6220e-01,  9.5100e-02,
        -3.5171e-02, -2.8236e-02, -4.8988e-02, -1.7125e-02, -5.8097e-02,
        -1.3881e-01, -1.3834e-01, -3.8006e-01, -3.5757e-02, -3.2395e-02,
         2.3429e-02, -3.3193e-02, -2.5657e-02, -1.6158e-02, -1.6179e-02,
        -5.5488e-02, -2.0105e-02,  5.8899e-04, -6.6135e-02, -3.2353e-02,
        -3.6928e-03, -1.2944e-01, -2.4780e-01,  1.1208e-01,  8.4810e-02,
         7.1390e-02,  1.1032e-01, -6.3296e-02, -1.1856e-01, -1.1678e-01,
        -8.6927e-02, -2.5035e-01, -5.4608e-02,  6.4279e-02,  2.3325e-02,
        -1.7179e-02, -2.3890e-02,  1.5748e-02, -1.5728e-02, -7.6199e-02,
         1.2793e-01, -9.8424e-03, -1.1044e-01, -1.6142e-02,  8.0938e-02,
         1.8144e-02, -1.4440e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1048,  1.5062, -0.0022,  0.3380,  0.3533,  0.4340,  0.2804,  0.3408,
         0.1992,  0.0607,  0.4339, -0.0271,  0.0837,  0.0466,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0576,  0.7266,  0.1931,  0.0277,  0.0787,  0.1951,  0.0822,  0.0463,
         0.0098,  0.1325,  0.1775,  1.1382,  0.2436, -0.0057,  0.2333,  0.2037,
         0.0867,  0.1740,  0.0241,  0.2648,  0.2422,  0.0164,  0.2620, -0.0124,
        -0.0239, -0.0065, -0.0589,  0.0709, -0.0344, -0.0322,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5604,  0.4850, -0.0162,  0.2721,  0.3710,  0.2253, -0.0019, -0.1779,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-6.9825e-01,  9.4287e-01,  3.5936e-01,  1.9584e-01,  2.0930e-01,
         4.2418e-01, -6.8549e-02,  8.5802e-02, -8.7753e-02,  3.4374e-02,
         5.1923e-02,  2.2537e-01,  1.6565e-01,  3.2748e-01, -8.2312e-02,
         2.9998e-02,  2.2819e-05,  3.2169e-02,  2.6179e-03, -7.4131e-03,
        -1.7623e-01, -2.4094e-02,  5.3439e-02,  2.0158e-01,  1.6783e-01,
         2.2204e-02,  1.6572e-01, -4.1372e-02,  2.7958e-01,  5.3729e-01,
         1.7036e-02,  3.0447e-01, -3.2969e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2778e-01, -7.6741e-01, -3.5542e-01, -1.2812e-01,  2.7395e-02,
        -1.4927e-01, -8.0188e-02, -2.2885e-01,  2.1949e-01,  1.5657e-02,
        -1.5668e-02, -3.6986e-04, -6.4387e-02, -1.4353e-02,  1.3724e-02,
        -6.4842e-03, -7.5975e-03,  2.7558e-02, -6.9759e-02,  4.4856e-02,
         5.9319e-02, -1.4095e-02, -2.4919e-02, -1.6362e-03,  6.9579e-02,
        -7.4795e-02, -1.1408e-01, -1.4821e-01,  2.1543e-02, -3.3538e-02,
        -4.9955e-02,  2.1521e-02, -1.0092e-01, -2.6247e-01,  3.6162e-02,
        -9.7700e-03, -4.9173e-02, -1.1354e-02, -1.0057e-03,  4.4285e-02,
         5.8859e-03, -1.2808e-02, -2.2194e-02, -1.4171e-01,  3.9611e-05,
         2.6687e-04, -4.6270e-02,  5.5820e-02, -6.4703e-02, -5.6685e-02,
        -2.3306e-02, -7.2878e-02, -1.5498e-02, -6.6807e-03, -3.2982e-02,
         6.3776e-02,  4.7102e-02,  1.1302e-02, -3.7353e-02, -7.5474e-02,
        -4.9598e-02, -1.4009e-02, -3.5720e-01,  2.3419e-02, -2.2500e-02,
        -1.4762e-02,  1.4234e-03,  1.8098e-01,  4.6722e-02,  1.7130e-02,
        -1.6272e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2287, -0.0269, -0.0315, -0.0427, -0.0776, -0.0761, -0.5923, -0.0375,
         0.0421, -0.0177, -0.0426,  0.0599,  0.0394,  0.0771,  0.0164, -0.0225,
        -0.0372,  0.0098, -0.0861, -0.2088,  0.0195, -0.0013,  0.2218, -0.1468,
         0.0531, -0.0934, -0.0529, -0.0869, -0.0504, -0.0644,  0.0537,  0.0796,
        -0.2336, -0.2209, -0.0769, -0.1090, -0.1492, -0.0218, -0.0459,  0.0168,
         0.1854, -0.1343,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4575, -1.0632,  0.0179,  0.0335, -0.0975,  0.0196,  0.1203,  0.1084,
        -0.1267, -0.3286, -0.0633, -0.1152, -0.3122, -0.0322,  0.0354,  0.0248,
        -0.0063, -0.1946, -0.4758, -0.0528, -0.0705, -0.0020, -0.0229, -0.0269,
         0.0686, -0.1792, -0.1042, -0.0164, -0.3088, -0.1054, -0.0616, -0.0025,
         0.0585, -0.0147, -0.0246,  0.0986,  0.1124,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0221, -1.9624, -0.6141, -0.6310, -0.2516, -0.0780, -0.0640, -0.0134,
        -0.0541,  0.0138, -0.0848,  0.1294, -0.2957, -0.0050,  0.0968,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8407,  0.2415, -0.1342,  0.0159,  0.3370,  0.2634,  0.0923,  0.1368,
         0.5568,  0.1723, -0.1241,  0.1051,  0.2529,  0.2231, -0.1660, -0.1750,
         0.2700,  0.2303, -0.0237, -0.0723,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3988, -0.0147,  0.0166,  0.0180, -0.4917, -0.0675, -0.0693, -0.0723,
         0.0275, -0.0646, -0.1616, -0.2800, -0.1525, -0.0510, -0.1076, -0.0696,
         0.0516, -0.0604, -0.0185, -0.1807, -0.0139,  0.0062,  0.0271,  0.0070,
         0.0296, -0.1169, -0.1099,  0.0219, -0.0335, -0.0214, -0.1986, -0.0804,
         0.0106, -0.0715, -0.0971, -0.2134, -0.0593, -0.0097, -0.0628, -0.0627,
         0.0841,  0.0657, -0.0606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5224, -0.0362, -0.1387, -0.1067, -0.0929, -0.1198, -0.0736, -0.4542,
        -0.1283, -0.0466, -0.1119, -0.0302, -0.0402, -0.2647, -0.4706, -0.0971,
         0.0371,  0.0196,  0.0104,  0.0515, -0.1557, -0.0483, -0.0150, -0.0514,
         0.0168, -0.1609, -0.0557, -0.0296, -0.6038, -0.0100, -0.0921, -0.2100,
        -0.0323, -0.1214, -0.1499, -0.0201,  0.0687, -0.0193,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6431, -1.2464, -0.4532, -0.6667, -0.0422, -0.5089,  0.1601,  0.3165,
        -0.0423, -0.3322, -0.1192, -0.0805, -0.1263, -0.0365,  0.1034, -0.1013,
        -0.1228, -0.2192,  0.1040,  0.1145,  0.0443,  0.1241,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3102,  0.3798, -0.0699, -0.2060,  0.0543,  0.1618,  0.0549,  0.1120,
         0.1987,  0.6865, -0.0381, -0.0425,  0.1413, -0.0241,  0.1845,  0.2203,
         1.0545, -0.0315,  0.0761,  0.3013,  0.0897,  0.0100,  0.2444,  0.1191,
         0.4265,  0.0488,  0.0700, -0.0473,  0.0916,  0.0748, -0.2585, -0.2352,
        -0.0864, -0.1215,  0.0744,  0.7994, -0.0606,  0.0994, -0.0586,  0.0847,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1408, -0.0687, -0.0505, -0.2293,  0.0028, -0.1001, -0.0150, -0.0904,
        -0.3084, -0.0940, -0.0345, -0.0416, -0.0693, -0.4887, -0.2396,  0.0098,
        -0.0201, -0.0256, -0.2632, -0.0130, -0.0318,  0.0365,  0.0286, -0.0182,
        -0.0458, -0.3332, -0.0121, -0.2163, -0.2552,  0.0637, -0.0246, -0.2173,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0175,  2.1650,  0.0415, -0.0391,  0.0562, -0.1698, -0.0172,  0.3454,
         0.5278,  0.1148, -0.0910,  0.1199, -0.0434, -0.0619, -0.0372,  0.0439,
         0.1965,  0.0315, -0.0305,  0.0260,  0.0404,  0.0557, -0.0702,  0.4430,
         0.0398, -0.2009,  0.1328,  0.0828, -0.0197,  0.1014,  0.0469,  0.0801,
         0.1279,  0.0682,  0.0420,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.8801, -0.2956, -0.5269, -0.4368, -0.3894, -0.2524, -0.7671, -0.6655,
        -0.2568, -0.3632, -0.0740, -0.1158, -0.0589, -0.2566, -0.0604, -0.0932,
        -0.0802, -0.0195,  0.0481, -0.1749,  0.1904,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2582, -1.4043, -0.3061, -0.2200, -0.2433, -0.1642, -0.0273, -0.1355,
        -0.3553, -0.0302,  0.0146,  0.0053,  0.0220,  0.0680, -0.1622,  0.1298,
        -0.2167,  0.0287, -0.0073,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1361, -0.7083, -0.3675,  0.1316, -0.1838, -0.0854,  0.0455,  0.0099,
        -0.0235, -0.4348, -0.7549,  0.0877, -0.0715, -0.6286, -0.0737, -0.1707,
        -0.0924, -0.2168, -0.0280,  0.1675,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1672e-01, -1.5411e+00, -3.3006e-02, -4.0135e-01, -1.0025e-01,
         1.5369e-01, -5.4357e-01, -3.1116e-01,  5.3660e-02,  5.1683e-02,
        -8.4134e-02, -6.4933e-02,  5.7466e-04, -1.1859e-01,  3.0041e-02,
        -1.2195e-01, -2.9955e-01,  1.1432e-02, -1.6800e-01, -2.5325e-01,
         4.3672e-02, -6.3660e-02,  3.7123e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0596e-01,  1.2373e+00,  2.0118e-01,  2.1259e-01, -9.2248e-02,
         4.7121e-02,  3.8399e-01,  7.3190e-02,  1.1929e-01,  7.3516e-01,
         8.1890e-02,  8.1051e-02,  1.4478e-01,  4.8506e-01,  1.7678e-02,
        -6.8292e-04, -1.2019e-01,  4.9769e-02,  8.7235e-02, -7.6502e-02,
         5.4589e-02, -7.0628e-02, -7.4024e-02,  2.5430e-01, -4.0500e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5545,  1.5467,  0.1717,  0.1165, -0.1276, -0.0383, -0.0026, -0.1404,
        -0.1909,  0.0837,  0.2986, -0.0045,  0.0087, -0.1183,  0.0776, -0.0989,
         0.0905,  0.0257, -0.0784,  0.0206,  0.3270,  0.1733,  0.1304,  0.1420,
         0.0916,  0.1238,  0.0886, -0.0714,  0.1978,  0.1220,  0.1107, -0.1263,
         0.0125,  0.0900, -0.1550,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3643,  0.0776, -0.1535, -0.2482, -0.0692,  0.0090, -0.2075, -0.0594,
        -0.4659, -0.0173,  0.0160,  0.0875,  0.1410, -0.1551, -0.1964, -0.4063,
        -0.0774, -0.4681, -0.0363, -0.1080, -0.0275,  0.0473,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1102, -0.1711,  0.1256, -0.1240, -0.1072, -0.1528, -0.0540,  0.0022,
        -0.0702, -0.4842, -0.0992, -0.1021,  0.0363,  0.0087, -0.2328, -0.2576,
        -0.1213, -0.1266,  0.0966,  0.2349,  0.0755,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2874, -1.4527, -0.1900, -0.1897, -0.0666,  0.0545, -0.1641, -0.4938,
        -0.0451,  0.1222,  0.0427, -0.1260,  0.3819, -0.2152,  0.2175, -0.4444,
         0.1354, -0.0143, -0.0632,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4490, -0.1241, -0.1253, -0.0571, -0.0743, -0.3546, -0.0055, -0.0166,
         0.0246, -0.0425,  0.0175, -0.1217, -0.0648, -0.2015, -0.0832,  0.0275,
        -0.0842, -0.2057,  0.0376, -0.0616, -0.0635, -0.2230, -0.0681,  0.0166,
        -0.0116,  0.0475, -0.0606, -0.2389, -0.1099, -0.5808, -0.1915, -0.2068,
        -0.1290, -0.2944,  0.0371,  0.0068,  0.0738,  0.0345,  0.0459, -0.0587,
        -0.0006,  0.0334,  0.0442, -0.0137, -0.0391, -0.1107, -0.1958],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5452, -0.6967, -0.3029, -0.0434,  0.0048, -0.0533, -0.0761,  0.0472,
        -0.0530,  0.0757,  0.0192, -0.0216,  0.0471,  0.0068,  0.1867, -0.0352,
        -0.5460, -0.0718, -0.1013, -0.0443,  0.0559, -0.0569, -0.2775, -0.6986,
        -0.1496,  0.1416, -0.2159, -0.0282, -0.0338, -0.0853,  0.0399, -0.0892,
         0.1960,  0.2278,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0413, -1.3870, -0.0151,  0.0813,  0.1731, -0.1624,  0.0145, -0.0093,
        -0.0118, -0.0551,  0.1524,  0.0540, -0.2882, -0.4208,  0.0177, -0.0758,
        -0.0762, -0.0474, -0.0690, -0.1004,  0.0281,  0.0404, -0.0145, -0.0651,
        -0.0522,  0.0192, -0.0103, -0.0615,  0.0245, -0.1392,  0.0225,  0.0532,
         0.0067,  0.0640,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.2332, -1.3999, -0.1969, -0.2974, -0.2762, -0.2671, -0.0172, -0.1559,
        -0.2494, -0.3599, -0.1652, -0.0492, -0.2744, -0.1276, -0.0455, -0.1034,
        -0.0398,  0.1760,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0357, -0.0352, -0.2373, -0.2031, -0.0444,  0.0844, -0.0771,  0.0061,
        -0.5936, -0.9141,  0.1174, -0.1609, -0.0730, -0.0961, -0.1043, -0.0657,
        -0.0630,  0.0019,  0.0710, -0.5740, -0.0035, -0.0709, -0.0341, -0.0378,
        -0.0108,  0.0222, -0.1736,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7819,  0.0600,  0.2363,  0.0588, -0.0326,  0.0556,  0.1274,  0.0100,
        -0.3397,  0.0449,  0.0153, -0.1448, -0.9260, -0.1888, -0.0097,  0.0134,
        -0.0279,  0.0541,  0.0551, -0.8927, -0.0273, -0.0226,  0.0816,  0.0367,
        -0.0287, -0.0476,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1990, -1.4055,  0.0710, -0.1881, -0.6429, -0.0964, -0.1532, -0.1231,
        -0.0851, -0.1672, -0.3079, -0.0631,  0.0502,  0.0970, -0.1589, -0.2960,
        -0.1217,  0.0351, -0.0346, -0.0680, -0.0396,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1855, -1.2942, -0.5650, -0.7662, -0.1135, -0.0409, -0.3631, -0.0378,
         0.0252,  0.2332,  0.0108, -0.1007, -0.0549,  0.0740, -0.1186,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3863, -0.0414,  0.0860, -0.0125,  0.2614, -0.0234, -0.1315, -0.1358,
        -0.0836, -0.1104, -0.2161, -0.0139, -0.4743, -0.0241,  0.0117, -0.0108,
        -0.1718,  0.1789,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8298e-01, -7.6819e-01, -3.7528e-01, -5.8183e-01, -1.0446e-01,
        -7.3474e-02, -4.3082e-02, -8.4818e-02, -7.2832e-02, -3.0733e-01,
        -2.9442e-01, -1.5025e-01,  1.7405e-01, -2.6092e-01,  7.6645e-04,
        -3.0712e-03, -3.8166e-02,  1.5249e-02,  2.1338e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2656, -0.6307, -0.2135, -0.1746, -0.2471, -0.0308,  0.1137, -0.0266,
        -0.0507, -0.0177,  0.0022,  0.0071, -0.0307,  0.0193, -0.0534, -0.0863,
        -0.1014, -0.0089,  0.0787, -0.0969, -0.3149,  0.0420, -0.2093, -0.0884,
        -0.2194, -0.0762, -0.0929,  0.0580,  0.0543, -0.1063, -0.0070, -0.0725,
        -0.1412, -0.0439, -0.0865, -0.0253, -0.1743, -0.0893, -0.0418, -0.0376,
         0.0812,  0.0232, -0.0162,  0.0284], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1010, -0.7462, -0.1211, -0.1194, -0.2572, -0.2219, -0.2461,  0.0047,
        -0.3818, -0.0874, -0.0840,  0.0166, -0.0652, -0.0277,  0.0671,  0.0387,
         0.1379, -0.2913, -0.0060, -0.0227, -0.1154,  0.0357,  0.0525, -0.0014,
         0.0148, -0.0279,  0.0369, -0.1891,  0.0582, -0.0070, -0.0713, -0.0458,
         0.0349,  0.0163,  0.0285,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0110, -1.7000, -0.1274, -0.2916,  0.0632,  0.0278, -0.3714, -0.2557,
         0.2092, -0.0587,  0.0480, -0.0581, -0.2409,  0.0500, -0.0071, -0.2165,
         0.0308, -0.0192,  0.0502,  0.0886,  0.0433, -0.0361, -0.1702,  0.3915,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2602, -1.0248, -0.0721, -0.1496, -0.0802, -0.1509,  0.0627, -0.0833,
        -0.2980, -0.0686, -0.0440,  0.0014,  0.0494, -0.1290, -0.2705,  0.0613,
         0.0536, -0.0288, -0.3026, -0.0727, -0.0278, -0.1881, -0.0385, -0.0776,
        -0.0656, -0.1700, -0.0736, -0.0744, -0.0262, -0.0111, -0.1459,  0.0018,
        -0.1278,  0.0468,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6479e-01,  1.0964e-01,  1.8846e-02, -5.7619e-02,  1.2945e-01,
         1.2051e-01, -5.1398e-01, -1.0595e-01,  3.0258e-02, -4.5281e-02,
        -6.2110e-02, -4.1503e-02, -4.2333e-01, -4.7355e-02, -1.8811e-01,
        -7.9348e-01, -8.9528e-05,  2.5549e-02, -3.1051e-02,  5.7700e-02,
         7.7019e-02,  1.2924e-02,  8.3560e-02, -1.9687e-01, -6.4575e-02,
        -1.4886e-02, -5.8434e-02, -1.6224e-01, -4.3886e-02, -6.1926e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.2734,  0.2959,  0.3006,  0.9403,  0.1118,  0.0408,  0.2718,  0.1606,
         0.1193,  0.7357, -0.1616, -0.6349,  0.5982,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0542,  0.0944,  0.1925,  0.2669, -0.3159, -0.1397, -0.0076, -0.3647,
        -0.5399, -0.0562, -0.0905, -0.3051, -0.2787, -0.0487, -0.2884, -0.1925,
         0.1059,  0.2207,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3557,  0.8758,  0.8754, -0.0393,  0.6770,  0.1714,  0.4361,  0.3530,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1521, -1.5141,  0.0402,  0.1713, -0.1238,  0.0032,  0.0270,  0.1145,
         0.0582,  0.0531,  0.0862,  0.0647, -0.0744, -0.0261,  0.0294, -0.0026,
         0.0162,  0.0393,  0.0087,  0.0103,  0.1094, -0.0244, -0.1378, -0.0548,
        -0.0208, -0.1083, -0.0338, -0.0623, -0.3258, -0.1696, -0.0159, -0.2439,
        -0.1467, -0.3506, -0.0295, -0.2689, -0.1296,  0.0243,  0.0755,  0.0392,
        -0.0242, -0.0692, -0.0548,  0.0048,  0.1477, -0.1813,  0.0100],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1145,  0.9046,  0.5427,  0.7854,  0.2198,  0.0873, -0.1823,  0.0060,
        -0.1160,  0.1301,  0.0298,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5374,  1.2828,  0.1076,  0.2627,  0.0886,  0.0878,  0.1997,  0.5960,
         0.0695,  0.2724,  0.1447,  0.0186,  0.2767,  0.1164,  0.0192, -0.0036,
         0.1315, -0.0539,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0013,  0.1971,  0.4175, -0.1762, -0.0819,  0.0008,  0.4141,  0.1070,
         0.3173,  0.0832, -0.3181,  0.0109,  0.2991, -0.0209, -0.0678,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0770, -1.3004, -0.1374, -0.0760,  0.0035, -0.4355, -0.5189,  0.0962,
         0.0334, -0.2459, -0.3203, -0.0559,  0.0780, -0.0319, -0.0348,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7583, -0.0334,  0.0101,  0.1086, -0.0316, -0.0804, -0.0045,  0.1278,
        -0.2164, -0.0462,  0.0507, -0.1600, -0.4031,  0.0264, -0.0403, -0.0543,
        -0.2899, -0.0727,  0.0430, -0.0513, -0.3745, -0.3406, -0.0799, -0.0488,
         0.0061, -0.1742,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2003, -0.3128, -0.1920,  0.0594, -0.1901,  0.0681,  0.0330, -0.4139,
        -1.0906, -0.0675, -0.0310, -0.0273, -0.3136,  0.3319, -0.1178,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0018, -1.4744, -0.2208, -0.1343, -0.3222, -0.0572, -0.2528,  0.1348,
        -0.0566, -0.5032,  0.1033,  0.0401, -0.0126, -0.0305,  0.0474, -0.0405,
         0.0027,  0.1121, -0.2784, -0.0385,  0.0080,  0.0538,  0.0166, -0.0039,
        -0.0276, -0.1683,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3323, -0.1037,  0.1385,  0.0202, -0.0907, -0.1083,  0.0421, -0.0746,
        -0.1138, -0.1773, -0.9909, -0.0437, -0.0867, -0.1664, -0.3638, -0.1347,
        -0.0499, -0.1334,  0.0731, -0.1311,  0.0232, -0.0715, -0.0949, -0.0573,
         0.2858, -0.0446,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-7.7317e-02, -9.9221e-01,  1.3014e-01, -2.6399e-02, -1.2786e-02,
         2.9765e-02, -5.7426e-02, -1.9659e-01, -5.2924e-02, -6.4973e-02,
        -6.6050e-02, -1.0507e-01, -4.6889e-02,  8.3295e-04, -1.3205e-01,
         5.2641e-02, -2.4417e-02, -9.3932e-02,  1.8579e-02,  1.0985e-02,
         1.2112e-03, -1.0905e-01, -7.4713e-03, -1.8539e-01, -1.8431e-01,
        -6.8193e-02, -9.6051e-02, -4.4686e-02,  2.6301e-02, -3.7242e-04,
         1.5110e-02, -2.4867e-02, -6.2475e-02,  2.5500e-02,  5.4611e-02,
        -3.8397e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6183, -0.6758, -0.2750, -0.0338, -0.0977, -0.0791, -0.3585, -0.4223,
        -0.0439,  0.0457, -0.1420,  0.0186, -0.0177, -0.0644, -0.0301, -0.0059,
         0.0596, -0.0303,  0.0738, -0.0934,  0.0892, -0.1224,  0.0353,  0.0010,
        -0.1711, -0.2217,  0.1035, -0.1306, -0.2232, -0.0420, -0.0953, -0.1139,
        -0.1986,  0.0397,  0.2955, -0.6992,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3365, -0.9260,  0.0450, -0.1767,  0.1888, -0.1887, -0.1051,  0.0658,
        -0.0579,  0.2345,  0.0635, -0.1434, -0.5399,  0.0709,  0.0868, -0.0846,
        -0.0950, -0.1770,  0.0733, -0.0132,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4305, -0.2160, -0.0492, -0.0921,  0.0128, -0.0656, -0.1825, -0.2081,
        -0.0220, -0.2138,  0.0014,  0.1155,  0.0309,  0.0232, -0.3955, -0.1302,
        -0.1690, -0.2919, -0.0430, -0.0657, -0.1861, -0.0754, -0.0154,  0.0425,
        -0.1462, -0.0155, -0.0785, -0.0335, -0.1904, -0.0105, -0.1610, -0.0073,
         0.0121, -0.1957,  0.1734, -0.1483,  0.1598, -0.0497,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2686, -0.5728, -0.1787, -0.3890, -0.0356,  0.0478, -0.0114,  0.0709,
        -0.1624, -0.1857, -0.4187, -0.3785,  0.0380,  0.0751,  0.0899, -0.1862,
         0.1473, -0.1865, -0.0362,  0.0757,  0.1005,  0.0415,  0.0331,  0.0339,
        -0.0207, -0.0797, -0.0329, -0.0422, -0.2912,  0.0031, -0.0512, -0.0764,
         0.0550, -0.0231, -0.1828,  0.0192,  0.0201, -0.0195,  0.0662, -0.0361,
        -0.0411], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2725, -1.4816,  0.1387,  0.0142, -0.0229, -0.1367, -0.1609, -0.1738,
        -0.0983, -0.0143, -0.0440, -0.1033, -0.1711, -0.1069, -0.0300, -0.2192,
         0.0764, -0.2295, -0.0228,  0.3487, -0.3352,  0.0830,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1119,  0.0149, -0.0078,  0.0209,  0.0309, -0.0439, -0.0274,  0.0089,
        -0.0531,  0.0196, -0.0287, -0.1937, -0.0406, -0.1017,  0.0042, -0.2275,
        -0.0223, -0.0648,  0.0207, -0.2569, -0.3383,  0.0934,  0.0773, -0.1458,
        -0.0577, -0.0183,  0.0111, -0.1154, -0.2282, -0.0323, -0.0326, -0.1530,
        -0.1760,  0.0313, -0.1782,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1540,  0.3713,  0.8515,  0.4414,  0.3937, -0.0256, -0.0541, -0.0504,
        -0.0391,  0.0843,  0.2107,  0.0304,  0.3798, -0.1310, -0.1552,  0.2597,
        -0.1660, -0.0722,  0.1443,  0.0026,  0.1557, -0.0888,  0.5043,  0.0763,
         0.0205,  0.0251,  0.0055,  0.0842,  0.2357,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2698e-01, -1.3412e+00, -3.0237e-01, -4.3857e-01, -1.4603e-01,
        -3.6915e-01, -1.7305e-01, -1.4103e-01, -3.0108e-02, -5.0941e-02,
        -5.9174e-02, -1.0127e-01, -6.5564e-02,  7.5313e-02,  6.3191e-04,
        -5.3554e-02, -9.0880e-02,  5.7084e-02, -2.9496e-03,  9.8392e-03,
        -2.7813e-02, -6.6992e-02,  5.0733e-02,  2.5429e-01,  3.2501e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0923,  0.1629,  0.0015, -0.2757, -0.9402,  0.0421, -0.1149, -0.4576,
        -0.3876,  0.0662,  0.0377,  0.2002, -0.1237,  0.1182, -0.2980, -0.6880,
         0.0865, -0.0589,  0.0476, -0.0389, -0.0559,  0.0525, -0.0363,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2341, -0.0250,  0.0026,  0.0295,  0.0064,  0.0716, -0.0300, -0.0248,
         0.0038, -0.0453, -0.0012, -0.0414, -0.1906, -0.3865,  0.1172, -0.0152,
        -0.0207, -0.0649, -0.0534,  0.0067, -0.0348, -0.1063,  0.0132, -0.0054,
        -0.0863, -0.1589, -0.3030,  0.1136, -0.1814, -0.1802, -0.0528, -0.0671,
        -0.1261, -0.1058, -0.0037,  0.0543,  0.0106,  0.3826, -0.1328,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4988,  0.2349,  0.3045, -0.4119, -0.8907, -0.1502, -0.0351, -0.2227,
        -0.5299,  0.0585,  0.2212, -0.1316,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.6212, -0.3147, -1.2459,  0.0070, -0.0838,  0.1117, -0.1188,  0.1122,
        -0.0392, -0.0222, -0.0139,  0.0042, -0.0252, -0.0056, -0.1056,  0.0263,
        -0.0546, -0.2191,  0.0988, -0.0839, -0.0484, -0.0709,  0.1132, -0.0355,
         0.0120, -0.0273,  0.0779, -0.0209,  0.0848,  0.1913, -0.2630, -0.1165,
        -0.0893,  0.0419,  0.0129, -0.0497, -0.0434,  0.0273, -0.0171,  0.0404,
         0.0690,  0.0962,  0.1479,  0.0500, -0.0632,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0424, -1.7299,  0.0580, -0.1564,  0.2156,  0.3737,  0.1232, -0.3701,
        -0.9370,  0.1274,  0.2578, -0.3849,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0768,  2.7406,  0.5807,  0.4604, -0.3340, -0.1454,  0.0723,  0.0631,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8817e-04, -5.2718e-01, -5.4768e-02, -2.2721e-03,  7.6458e-03,
        -8.1898e-03, -9.0302e-03, -2.4989e-03, -1.7257e-02, -2.4267e-01,
        -1.0941e-01,  2.2438e-02, -1.4362e-02,  2.1297e-02,  5.3311e-04,
         7.1341e-03,  2.2020e-02, -6.3408e-02,  1.4765e-02, -1.6510e-02,
        -5.2097e-03,  2.1776e-02, -9.0417e-03, -6.3592e-03,  1.6110e-02,
        -4.2713e-02, -4.6619e-04, -4.8358e-02, -5.9387e-02,  5.4418e-03,
        -4.3527e-02, -1.8595e-02, -1.2280e-02,  3.8687e-03, -2.3802e-02,
        -2.1353e-02,  3.8259e-03,  4.5478e-02,  1.4409e-02,  5.3497e-03,
        -3.8867e-02,  8.2460e-03,  7.4257e-03, -2.3949e-02,  4.2241e-02,
         2.5035e-01, -3.4457e-01, -7.7843e-02, -8.6158e-02, -1.4024e-01,
        -5.4491e-02, -5.3945e-02, -1.0135e-01,  3.1989e-02, -5.2408e-02,
        -1.1426e-02, -5.0938e-02, -3.4195e-02, -5.3160e-02, -2.7951e-03,
         1.8228e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3300, -0.1897,  0.1265, -0.0909, -0.3605,  0.0712, -0.1467,  0.0102,
        -0.0636, -0.0443,  0.0846, -0.2394,  0.0347, -0.0444, -0.4731,  0.0759,
        -0.0112,  0.1212, -0.1551, -0.1930, -0.2553, -0.2685, -0.1978,  0.0615,
        -0.1729, -0.0386, -0.0471, -0.0137,  0.0170, -0.1900, -0.0125,  0.0250,
        -0.2907,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2413, -0.7882, -0.8975, -0.4556, -0.1349, -0.0853, -0.2457,  0.1267,
         0.0132, -0.0763,  0.0552,  0.0206, -0.0687,  0.0024, -0.0973,  0.3007,
         0.1913, -0.0849, -1.1404, -0.0807,  0.1586,  0.0642,  0.4046,  0.3459,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7029e-01, -4.2059e-01, -5.2532e-01, -9.8949e-02, -1.4370e-01,
        -4.7666e-01, -1.4668e-01, -2.1290e-01, -1.4806e-01, -3.9290e-01,
        -3.2670e-01, -2.3040e-01, -7.8156e-01, -3.0340e-02,  6.6195e-04,
        -1.8781e-02,  3.7091e-02, -1.2364e-02,  2.3542e-01, -2.1092e-02,
        -8.4081e-02,  4.6948e-02,  2.0294e-03, -2.3208e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2842, -0.2310, -0.3140, -0.4910, -0.1504, -0.0211, -0.2449, -0.2506,
        -0.1033, -0.0727,  0.0544, -0.0147, -0.0259,  0.0483, -0.2538, -0.2720,
        -0.1870,  0.0944,  0.0616, -0.0770, -0.1855, -0.0565, -0.1366, -0.2348,
         0.0155,  0.0282, -0.2489, -0.0357, -0.0114,  0.0110, -0.0306, -0.0050,
        -0.1641,  0.1767,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5326, -0.1279,  0.0354,  0.0580, -0.2539,  0.1944,  0.0194,  0.1062,
        -0.1405,  0.0739,  0.0373,  0.3101,  0.2137, -0.7551, -0.1968, -0.2118,
        -0.2315,  0.1419, -0.0496, -0.1640, -0.4171, -0.5044, -0.2667, -0.0056,
        -0.0627,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2515,  0.0610, -0.2379, -0.0845, -0.1845,  0.0813, -0.4706, -0.1441,
        -0.3365, -1.4136,  0.1900, -0.0490, -0.1126,  0.1234,  0.0178, -0.2645,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2346, -0.9025, -0.0381, -0.2687, -0.1838, -0.0720, -0.9612, -0.0445,
         0.1417, -0.0383,  0.0151, -0.1090, -0.2681, -0.0654, -0.1877, -0.2180,
        -0.1322, -0.1531, -0.1600, -0.0413,  0.0443,  0.2922, -0.3739,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4205,  0.1211,  0.1013,  0.4432,  0.7871,  0.0587, -0.0243,  0.0188,
         0.0333, -0.0706, -0.0595,  0.1612,  0.0056,  0.2717,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-1.1117,  0.1785,  0.1799,  0.3813, -0.0428, -0.3769,  0.2942,  0.0533,
         0.1214,  0.0782,  0.2449,  0.1463,  0.1302,  0.1133, -0.0372,  0.0449,
        -0.2063,  0.4156,  0.0127,  0.7086,  0.1353,  0.0809,  0.1267, -0.2189,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1272, -0.1774, -0.5136, -0.1664, -1.0321, -0.1303, -0.0950, -0.0184,
        -0.1260, -0.7501,  0.0585,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1220, -0.0151, -0.0616, -0.0746,  0.0673, -0.3387, -0.8677, -0.1011,
        -0.5199, -0.1177, -0.1223,  0.0014, -0.1142, -0.3101, -0.1102, -0.0651,
         0.0305, -0.4023, -0.0617,  0.3548,  0.0781,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2233, -0.1015, -0.0642,  0.0421,  0.0177, -0.0906,  0.0033, -0.7391,
        -0.1518, -0.9872,  0.0431,  0.1404,  0.0222, -0.2619,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1556, -0.0520, -0.0097,  0.1036, -0.0013, -0.0423,  0.0192, -0.0642,
        -0.0232, -0.0705, -0.0993,  0.1353,  0.6444,  0.0732,  0.0129,  0.0399,
         0.0401,  0.1398, -0.0015,  0.0353,  0.0032,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1906e-01, -1.4877e+00,  4.5757e-02, -1.0013e-01,  2.0993e-01,
        -2.7241e-01, -3.1484e-02, -6.6893e-01,  4.5757e-04, -2.3737e-01,
        -2.3929e-01, -6.2637e-03,  2.0711e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4518, -0.2182, -0.0103, -0.0452, -0.0290, -0.0449, -0.1175, -0.0872,
        -0.2132, -0.0775, -0.0210,  0.0845,  0.0313,  0.0084, -0.0158,  0.0289,
         0.0050,  0.0605, -0.0364,  0.1545,  0.0462,  0.0109, -0.0395,  0.0034,
        -0.4210, -0.0181, -0.5126, -0.2807, -0.3101, -0.1893, -0.2968, -0.1098,
        -0.0374,  0.0443, -0.0748, -0.0699, -0.0603, -0.0085, -0.0018, -0.0178,
         0.0164,  0.0276,  0.0863, -0.0416,  0.0107], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0257, -0.2624, -0.1816, -0.1069, -0.0318, -0.0771, -0.1071, -0.0966,
        -0.0109, -0.0151, -0.1324,  0.0130, -0.0360,  0.0697, -0.2350, -0.1439,
        -0.5753, -0.0480,  0.0148,  0.1186, -0.1758, -0.2446,  0.0216, -0.4780,
         0.0984,  0.0458, -0.0880, -0.1934, -0.3462, -0.0620, -0.0869, -0.0240,
         0.1624,  0.1045,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5262,  0.0913, -0.4014, -0.3512, -0.1789,  0.0018, -0.2172, -0.0944,
         0.1505, -0.1496, -0.0714, -0.0915,  0.1469, -0.0325,  0.0941,  0.1363,
         0.0630,  0.0305, -0.2053, -0.6333, -1.1944,  0.0159, -0.1304,  0.0449,
        -0.2406, -0.1358,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1755, -1.6849,  0.4039, -0.6587, -0.1200, -0.1195, -0.0461, -0.2514,
        -0.0150, -0.2601,  0.0193, -0.0762,  0.0258,  0.0869, -0.2876, -0.2417,
        -0.5208,  0.1216, -0.0899, -0.0221, -0.1488, -0.1692,  0.2195,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0543, -0.6317, -0.2255, -0.0201, -0.0073, -0.0039, -0.1092, -0.0543,
        -0.2511, -0.0206,  0.1251,  0.0372, -0.0246, -0.4344, -0.2441,  0.0731,
         0.0676, -0.1284, -0.3946, -0.0684,  0.0522, -0.1031,  0.0644, -0.1346,
         0.0728,  0.0556, -0.1400, -0.1520, -0.0586, -0.0841,  0.0253, -0.2008,
         0.0402, -0.0165,  0.0818, -0.0028, -0.1564, -0.0064, -0.7679, -0.1183,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1091, -0.1891, -0.0700, -0.1525, -0.4200, -0.0382,  0.0207, -0.2946,
        -0.0190, -0.0776, -0.0392, -0.0443, -0.1168, -0.0928, -0.2001, -0.2394,
        -0.1721, -0.0432, -0.0547, -0.1355,  0.0088, -0.2297, -0.1499,  0.1070,
         0.0134,  0.0312, -0.0100, -0.2147,  0.0238,  0.0376, -0.1902,  0.0506,
        -0.0278, -0.1050,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.0572, -0.3095,  0.0732, -0.0423, -0.1218,  0.0788, -0.0157,  0.0273,
         0.0217,  0.1608, -0.0013, -0.0238,  0.0716, -0.0222, -0.7531, -0.0488,
         0.1291,  0.0203, -0.1307,  0.0358,  0.0355, -0.0138, -0.1063,  0.0106,
        -0.1990, -0.2051, -0.1471, -0.0914, -0.0302,  0.0149,  0.0911,  0.0067,
         0.0180,  0.1153, -0.1669, -0.0382, -0.1201, -0.1205, -0.0438, -0.1401,
        -0.1155, -0.0752, -0.0949, -0.0312, -0.0122, -0.0299,  0.0271,  0.0134,
        -0.0434, -0.0762,  0.0623,  0.0370], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3574, -0.1944, -0.1950, -0.2172, -0.5604, -0.0466, -0.1411,  0.0276,
        -0.0183, -0.0233,  0.0700, -0.0464, -0.0746, -0.3319, -0.1797, -0.2248,
        -0.2271, -0.2023, -0.3041, -0.0812, -0.1481, -0.0875, -0.0989, -0.2632,
         0.0045, -0.0287, -0.0443,  0.0114,  0.0649, -0.1398,  0.3103,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3500e-02,  5.0350e-01, -4.2518e-02, -5.5036e-03, -8.1947e-02,
        -1.3142e-01,  1.2160e-01, -8.3252e-02,  3.5917e-02,  1.6562e-02,
         7.7457e-02,  5.0599e-02,  4.8678e-02,  8.2539e-02,  2.4211e-01,
         9.5460e-02, -3.0860e-04,  3.2191e-01,  5.9404e-01,  7.3208e-01,
         3.2979e-01,  5.2967e-02,  3.5951e-01,  3.3453e-01,  5.8183e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1245, -0.0713,  0.1302, -0.0059, -0.1513, -0.5055, -0.7217,  0.1017,
        -0.3337,  0.0354, -0.0330, -0.3826,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7756e-02, -1.0009e+00, -8.0075e-02, -3.2464e-01, -3.1125e-01,
        -6.0308e-02,  1.4940e-01,  2.2115e-02, -4.2190e-02, -2.8897e-02,
         8.0827e-02,  7.2228e-02, -1.1409e-01, -5.8946e-01, -2.1289e-01,
        -1.8063e-01,  3.8476e-02,  2.1797e-01, -3.8654e-02,  1.7422e-02,
        -7.5718e-03, -4.1598e-02, -1.5670e-05, -3.6463e-01, -1.7705e-01,
        -2.2911e-01,  1.8752e-02,  2.3472e-01, -1.5250e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7856, -1.4329, -0.0659, -0.1033,  0.0486, -0.3205, -0.4322,  0.1867,
        -0.0623,  0.1489,  0.5265,  0.0366,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4689, -1.3369, -0.4509, -0.4424, -0.2057, -0.6319, -0.4225,  0.2122,
        -0.0850, -0.0446, -0.3042,  0.1488,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0091,  0.2999, -0.3509, -0.0802, -0.3872,  0.0258, -0.0515, -0.0450,
        -0.2832, -0.1848, -0.1807, -0.7677, -0.1554, -0.1376, -0.0217, -0.2853,
        -0.0817,  0.0404, -0.2600,  0.0087,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1092, -1.2020, -0.0586, -0.1457, -0.5383, -0.1262, -0.1357,  0.0312,
        -0.0590, -0.0216, -0.0512, -0.0663, -0.0521, -0.0034,  0.0539, -0.0504,
        -0.0388,  0.0256,  0.0347, -0.0143, -0.0384, -0.0494,  0.0293,  0.0750,
        -0.0661, -0.0417, -0.0427, -0.1413, -0.1747, -0.0092,  0.0058, -0.0020,
        -0.1362, -0.2976, -0.0291, -0.0610,  0.1069,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0962,  1.5694,  0.2094,  0.0608,  0.0933, -0.0508,  0.0690,  0.0486,
        -0.3931,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7412,  0.1520, -0.1512, -0.7352, -0.0188,  0.0324, -0.1444, -0.4277,
        -0.0799, -0.0145, -0.2094, -0.2163, -0.3388, -0.3266, -0.0630,  0.1728,
         0.0119,  0.0348, -0.1328, -0.1224,  0.0347,  0.0833, -0.0240, -0.0213,
        -0.0550,  0.2064,  0.0356,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1155,  1.3097,  0.1178,  0.1084,  0.0604,  0.1324,  0.0109, -0.0545,
         0.0346, -0.0235, -0.0246,  0.0553,  0.0069,  0.1050, -0.0247, -0.0999,
         0.1508,  0.0250, -0.1511, -0.0106,  0.1460,  0.3162,  0.2176, -0.0089,
         0.0509,  0.0363, -0.0237,  0.0997,  0.5486,  0.0179, -0.0169,  0.0717,
         0.0073, -0.0228,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0263,  0.0234,  0.0015,  0.0561, -0.1032, -0.0784,  0.0289, -0.0216,
        -0.0597,  0.1507, -0.0702, -0.4335, -0.0124, -0.1113, -0.2216, -0.2774,
        -0.4574, -0.1599, -0.1161, -0.0670,  0.0933, -0.0484, -0.2208, -0.0815,
         0.0190, -0.0823, -0.0493, -0.0381,  0.0009, -0.0334, -0.0440, -0.2716,
        -0.3095, -0.1126, -0.0821, -0.1302, -0.3958, -0.0600, -0.1646, -0.0347,
        -0.0345, -0.1536,  0.0043,  0.0641,  0.1537,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4316,  0.0419,  0.0361, -0.0170,  0.3149,  0.5102,  0.9772,  0.1628,
         0.0886,  0.2858, -0.0621, -0.0998,  0.1867, -0.0683, -0.1004, -0.1641,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5723, -0.1134,  0.2936,  0.1161,  0.0142,  0.0022,  0.1304,  0.0372,
         0.5039,  0.4773,  0.2076,  0.2342, -0.0783,  0.0457,  0.1091, -0.0720,
         0.0660, -0.0311,  0.0452,  0.0453,  0.0234, -0.0595,  0.0316, -0.0167,
         0.0666, -0.0671,  0.0998,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5431, -0.0881, -0.1322,  0.0043, -0.0959,  0.0157, -0.0124,  0.0027,
        -0.1535, -0.0645, -0.0085, -0.0392, -0.0828, -0.0028, -0.0497, -0.0123,
        -0.0600, -0.0283, -0.1536, -0.1445, -0.0632, -0.1071, -0.0193, -0.4090,
         0.1035, -0.0304, -0.1258, -0.1918, -0.0216, -0.0727,  0.0235, -0.1789,
        -0.1062, -0.1773, -0.1552, -0.2583, -0.1130, -0.1789,  0.0054,  0.0558,
        -0.0834, -0.1952, -0.0253, -0.0952,  0.1566,  0.0628], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2165e-01,  1.2265e-01, -1.9368e-02, -4.3927e-02, -1.2535e-04,
        -5.9020e-02, -3.6843e-02, -4.8900e-01,  1.2129e-01, -4.3566e-02,
        -4.0637e-02,  5.9523e-02, -8.6775e-02, -3.1440e-02,  4.1574e-02,
        -1.3336e-03, -6.1578e-02, -3.8818e-02, -2.9625e-01, -7.6146e-01,
        -1.1137e-01, -1.0305e-01, -3.7888e-02,  1.9966e-02, -7.6392e-02,
        -4.4543e-02, -1.7164e-01, -2.2642e-01, -2.3814e-03, -2.2908e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4463, -0.0262,  0.0793,  0.0097,  0.3735,  0.0328, -0.0137,  0.2870,
        -0.3405, -0.0684,  0.4817,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1956,  1.1683, -0.0020,  0.0688,  0.2862,  0.0740,  0.2693,  0.2878,
         0.0405,  0.0658,  0.0106,  0.0726, -0.2103,  0.0578,  0.0837,  0.2299,
         0.0770,  0.1066,  0.0395,  0.0317,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3474, -1.5224, -0.6146, -0.1842,  0.0812, -0.3780, -0.3949,  0.0854,
        -0.0820, -0.3417,  0.0734,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3985,  0.9996,  0.3730,  0.5055,  0.0743, -0.0143,  0.0576,  0.2028,
         0.2897,  0.0145,  0.0875,  0.0957,  0.0815,  0.0113,  0.0677,  0.1721,
        -0.0635, -0.0523,  0.0410, -0.0139, -0.3177,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1610, -1.2240, -0.2065, -0.0094,  0.6383, -1.5496,  0.1456, -0.0400,
        -0.0957,  0.0251,  0.0565, -0.0647,  0.0886,  0.0075, -0.1787, -0.0923,
         0.2875, -0.2445,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2479, -0.4048, -0.6390, -0.0381, -0.0814, -0.0090, -0.0729,  0.0358,
         0.0091,  0.0063,  0.0385,  0.0296,  0.0820, -0.1317, -0.1377,  0.0559,
        -0.0741, -0.1501, -0.0702, -0.0346,  0.0055, -0.1683, -0.2701, -0.0389,
        -0.0919, -0.1016, -0.1762, -0.0582, -0.0609, -0.1546, -0.0466, -0.0393,
         0.0198,  0.0388, -0.0482,  0.0067, -0.0100,  0.0146,  0.1016,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2525, -1.2312,  0.0244, -0.2730, -0.0422, -0.0683,  0.0307, -0.1068,
         0.0378,  0.0020, -0.0891, -0.1805, -0.0611, -0.0078,  0.0570, -0.0032,
        -0.1955, -0.0512,  0.0473, -0.0368, -0.0163,  0.0444,  0.0578,  0.0289,
         0.0081, -0.0569, -0.0944, -0.0213, -0.2719, -0.3520, -0.0248, -0.0276,
        -0.0479, -0.0255,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.2925, -0.4029, -0.8121, -0.8657,  0.0511, -0.3489,  0.0269, -0.2482,
        -0.3690,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2867, -0.8315, -0.1975, -0.1182,  0.0102, -0.0931,  0.0477, -0.0577,
         0.0762,  0.0273, -0.1572, -0.0018,  0.0677, -0.1156,  0.0406, -0.2385,
        -0.0442, -0.7073, -0.0419, -0.0185,  0.1131,  0.0584, -0.0443,  0.0590,
        -0.0633, -0.0381,  0.0056,  0.0755,  0.0205,  0.0501,  0.1233, -0.0803,
         0.0631,  0.0197,  0.0321, -0.0039,  0.0429, -0.0916, -0.1536,  0.1526,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0301, -0.2176, -0.4679, -0.0172, -0.1290, -0.0096,  0.0150, -0.0995,
        -0.0850, -0.3006,  0.0161,  0.0223,  0.0100, -0.0108,  0.0384,  0.0772,
         0.0010,  0.0618, -0.1381,  0.0548, -0.0017, -0.0063, -0.0051,  0.0614,
         0.0165, -0.1075, -0.1981,  0.0584, -0.2541, -0.3422, -0.0978,  0.0370,
         0.0025, -0.0236,  0.0550,  0.0197, -0.1377, -0.3061, -0.0480, -0.0445,
        -0.0059,  0.3225, -0.0568,  0.0156,  0.0134,  0.1379, -0.0602, -0.0346,
        -0.0313, -0.0190,  0.0841,  0.0391,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0819, -0.8635, -0.1108, -0.1445, -0.0365, -0.1059, -0.3025,  0.0586,
        -0.1241,  0.0073,  0.0358, -0.2374, -0.3529, -0.1582, -0.0589, -0.3176,
        -0.1893,  0.1150, -0.1500,  0.0345,  0.0292, -0.1429,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0803,  1.8214,  0.3146,  0.2586, -0.1867,  0.0960,  0.4221,  0.1287,
        -0.1584,  0.2509,  0.2648,  0.1497,  0.0109,  0.0207, -0.0449,  0.3951,
         0.1427, -0.0755, -0.0073,  0.4608, -0.5739,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1019e+00, -7.1928e-01, -1.8531e-02, -4.4759e-02, -6.2689e-04,
         1.6985e-02,  1.4060e-01,  6.4958e-02, -6.5938e-02,  1.7227e-01,
         6.8371e-03,  8.8450e-03,  1.3420e-02, -5.9549e-02,  6.6661e-02,
        -1.9766e-02, -3.5322e-02,  5.8759e-02,  4.2667e-03,  2.7630e-02,
         1.7743e-02, -1.6548e-02,  1.1819e-01, -2.6521e-03, -3.0354e-02,
         4.8543e-02,  2.2225e-01, -1.3692e-01,  1.2134e-01, -4.8555e-02,
        -4.4067e-01, -2.2415e-01,  6.2116e-03, -7.6958e-02, -1.3107e-01,
         9.8477e-03,  3.6618e-02,  8.8791e-02,  1.8122e-02, -2.3877e-02,
        -3.3875e-01,  2.0245e-02, -3.4886e-02,  5.1405e-05, -6.9943e-02,
         1.0021e-02, -1.1155e-01, -5.9129e-02, -8.0357e-02,  5.7868e-02,
        -2.6394e-02, -9.2444e-02,  9.4388e-02,  1.2372e-01, -5.0205e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2852, -0.1226,  0.1731, -0.0277, -0.0442, -0.1100, -0.2664, -0.0072,
        -0.0162, -0.0143, -0.0005,  0.0048,  0.0216, -0.0930, -0.0928, -0.1182,
        -0.0132, -0.1703, -0.1651,  0.0048,  0.0417, -0.0866, -0.2124, -0.0252,
         0.0466,  0.1185, -0.0928, -0.4528, -0.0518, -0.0147, -0.0061,  0.0227,
        -0.0364, -0.2830,  0.0536, -0.0401, -0.0152, -0.0926, -0.1688, -0.0742,
        -0.0327, -0.0706,  0.0810,  0.0969,  0.1774,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4214,  0.9126,  0.4434,  0.4062, -0.0656,  0.0325,  0.2129,  0.0424,
         0.2588,  0.0247,  0.0972,  0.1444,  0.0551,  0.1549,  0.2017,  0.1720,
         0.1082,  0.3442,  0.0521,  0.0146,  0.5710, -0.0105,  0.0189,  0.0106,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5330,  0.0867,  0.0478, -0.0609, -0.0507, -0.0291, -0.0813, -0.1682,
         0.0393, -0.0834,  0.0311, -0.0013,  0.0101, -0.1879, -0.0147,  0.0144,
        -0.2291, -0.2006, -0.2476, -0.1543, -0.2787, -0.0448, -0.1452, -0.1553,
         0.1540, -0.1397, -0.0135, -0.1480, -0.2574, -0.3154, -0.0406,  0.1584,
        -0.0664,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4623, -1.2156, -0.0527, -0.3328,  0.0572, -0.1691,  0.0636, -0.2538,
        -0.3242,  0.0790,  0.0910, -0.0337,  0.2428, -0.4126,  0.1424,  0.0584,
         0.0152, -0.0410, -0.7034,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3499,  0.0596, -0.0022, -0.2666, -0.4822,  0.0343, -0.2002, -0.1691,
        -0.4024, -0.0156,  0.1026,  0.1936,  0.0319, -0.0338, -0.0234, -0.0763,
        -0.0375, -0.2348,  0.0117, -0.0390, -0.0370, -0.3821, -0.0020,  0.1956,
        -0.0964,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0844, -1.2724, -0.0563, -0.2526, -0.3015, -0.4087, -1.3713,  0.1560,
        -0.1396, -0.1552, -0.1946, -0.1527, -0.2268, -0.1157, -0.0973,  0.2421,
         0.6917,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.1998,  0.0163, -0.1780,  0.0120, -0.0576, -0.0025, -0.0580, -0.2034,
        -0.1314, -0.2045, -0.0439, -0.1531, -0.0578,  0.0483,  0.0847, -0.0867,
        -0.2623, -0.4804,  0.0251, -0.0955, -0.0508, -0.4336, -0.4396, -0.0142,
        -0.2118, -0.0970, -0.1796, -0.1284,  0.1821, -0.0375, -0.0070,  0.1791,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0475e-01,  3.9693e-01,  7.9861e-01,  1.0460e-01,  1.2051e-01,
         1.2493e-01,  2.1164e-01,  4.3394e-01,  5.0593e-02, -1.9258e-02,
         6.4735e-03,  7.0383e-02,  9.1462e-03,  4.7941e-02,  1.8094e-02,
        -1.0664e-01,  8.9718e-02, -7.7190e-04,  1.7643e-01,  2.0768e-01,
         5.4686e-02, -2.1051e-02,  1.5929e-01,  2.5275e-01,  8.7565e-02,
         9.7572e-02,  1.4601e-01, -1.5812e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2628,  0.6462, -0.0206,  0.0750, -0.0635,  0.1232,  0.0686, -0.0424,
         0.0135,  0.1090,  0.0177,  0.3702,  0.0364,  0.1405,  0.0424,  0.3088,
         0.1291,  0.3767,  0.0978, -0.0407,  0.0776,  0.0848,  0.0211,  0.1228,
         0.3247,  0.0556,  0.0662, -0.1289,  0.0415,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0312, -0.2177, -0.0947, -0.2666, -0.5457, -0.1206,  0.0628, -0.0268,
        -0.0504,  0.0836,  0.2445, -0.0891, -0.1664, -0.3822,  0.1410, -0.1514,
        -0.0803, -0.2556, -0.0456, -0.0426,  0.0957,  0.0359,  0.0031, -0.0265,
        -0.0605, -0.0551, -0.0458, -0.0480, -0.0189,  0.0033, -0.0258,  0.1317,
         0.0751,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3494, -1.6847, -0.0824, -0.0468, -0.1144, -0.1841,  0.2227, -0.0424,
        -0.0263, -0.2133, -0.1174, -0.0324, -0.1736, -0.1239,  0.0047,  0.0879,
        -0.1121, -0.0088, -0.2248, -0.1033, -0.3210, -0.0247,  0.3776, -0.0547,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4503, -0.4069, -0.4922, -0.0532, -0.0952, -0.1371, -0.1406, -0.3859,
        -0.0334, -0.1997, -0.0720, -0.1277, -0.1619, -0.1857, -0.2011, -0.0141,
        -0.0185, -0.1982, -0.0124, -0.5850, -0.0328,  0.1522,  0.0767,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0522, -0.3822, -0.0551, -0.0181, -0.0544, -0.1493, -0.1001,  0.0011,
         0.0286, -0.0195,  0.0967, -0.1401, -0.0500,  0.0199, -0.0355, -0.0876,
        -0.1732, -0.1449, -0.0199,  0.2206, -0.0138, -0.0816,  0.0475, -0.0513,
        -0.1834, -0.0111, -0.0015, -0.0206, -0.0683, -0.3576, -0.4856, -0.0146,
        -0.1924,  0.0759, -0.1246, -0.2125, -0.0676, -0.1351, -0.0444,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1711e-01, -4.5044e-01, -4.5090e-02, -3.1661e-02, -5.1131e-01,
        -5.0691e-02, -2.7151e-03,  1.1003e-02, -1.9916e-02,  3.3073e-02,
         1.0765e-02,  1.6889e-01,  2.4463e-02, -5.9828e-02,  8.3524e-02,
        -1.0392e-01, -4.2374e-02, -4.7993e-03, -2.4156e-02, -8.0625e-02,
        -1.5325e-01, -7.3080e-02, -5.4845e-02,  5.4095e-02, -1.6879e-01,
        -2.4441e-01, -1.0446e-01, -8.0858e-03,  5.2804e-02, -9.7451e-02,
        -1.3470e-02,  1.4628e-05,  5.3841e-02, -3.9599e-02,  1.1239e-03,
         1.7010e-02, -4.0354e-02, -2.9054e-01, -6.0516e-02, -3.7300e-02,
        -2.1339e-03, -9.6401e-02,  1.9239e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1850e-02,  1.3490e+00, -8.6956e-02, -2.5454e-02,  5.4837e-03,
        -3.0015e-02,  1.0782e-01, -7.0229e-02,  2.1473e-01, -8.2568e-03,
        -1.1371e-01,  2.9763e-02,  1.4987e-01,  5.5772e-03,  5.0462e-02,
         1.6139e-01,  1.0463e-01,  2.7074e-01,  4.3228e-03,  1.2689e-01,
        -1.2956e-03, -9.1485e-03,  5.4977e-02,  3.3905e-02, -1.0276e-01,
        -1.1369e-02, -1.0366e-01, -1.2664e-02, -8.4387e-02, -1.0312e-02,
        -1.2179e-01,  3.5865e-01,  6.3451e-01,  1.6646e-01, -9.6690e-03,
        -3.2301e-02, -1.8420e-01,  7.8477e-02,  2.8786e-01,  7.7903e-02,
         1.1030e-01,  2.0703e-01, -1.4908e-02,  5.9815e-02, -1.6997e-02,
        -5.2886e-02, -2.5335e-02, -4.2128e-03,  1.4186e-02,  3.9029e-02,
        -1.4428e-01,  2.8091e-02,  2.6122e-02, -4.0627e-02,  1.1064e-01,
         9.3078e-02,  2.4029e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0831,  2.0415,  0.3286,  0.0242,  0.1519,  0.0103,  0.2845,  0.2912,
         0.2482,  0.2158,  0.2046, -0.4478,  0.0231, -0.7549,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4306, -0.8671, -0.2762,  0.0377, -0.0718, -0.0704, -0.0101, -0.0428,
        -0.0392, -0.0338, -0.0888, -0.5475, -0.1234, -0.1042, -0.1065, -0.0826,
        -0.0472, -0.2385,  0.0815, -0.2694, -0.2047, -0.1575, -0.1811, -0.0758,
         0.1368, -0.0568, -0.0273,  0.0342,  0.0643,  0.0898,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5565,  0.4168, -0.0937,  1.2225,  0.1888,  0.3504,  0.5838,  0.4621,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0094, -0.7468, -0.0826, -0.0581, -0.0833, -0.0659, -0.0012, -0.0072,
         0.0438,  0.0161,  0.0238, -0.0706, -0.2230, -0.2973,  0.0219, -0.0221,
         0.0134, -0.0132, -0.0127,  0.0267, -0.0456, -0.0294, -0.0161, -0.0498,
        -0.2529, -0.0672, -0.1015,  0.0112, -0.1227, -0.4008,  0.0251, -0.0071,
        -0.0621,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6320, -0.9236, -0.3418, -0.2791, -0.0918,  0.0286,  0.0453, -0.0175,
         0.1567, -0.0815,  0.0052,  0.0019, -0.0279, -0.0744, -0.0560, -0.0607,
         0.0087,  0.0699, -0.0919,  0.0657, -0.0376, -0.0319, -0.0198, -0.0308,
         0.0124, -0.0804, -0.1257, -0.1112, -0.0583, -0.0291, -0.0989, -0.0932,
        -0.1235, -0.3308, -0.0924, -0.1464, -0.0734,  0.0157, -0.0259,  0.0241,
         0.0558, -0.0508,  0.0114, -0.3035, -0.1932, -0.0202, -0.0749, -0.0774,
        -0.0481, -0.0295,  0.0517, -0.0161, -0.0174, -0.0642,  0.0265,  0.0392,
         0.0153, -0.0357,  0.0207, -0.1181, -0.0243, -0.0549, -0.1856, -0.0174,
        -0.0306, -0.0265, -0.0029,  0.0153,  0.0342, -0.1805, -0.1500],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9878e-01, -5.8178e-02, -3.8476e-02,  1.2267e-03, -1.2586e-01,
         1.2594e-01, -8.6518e-01, -7.5108e-02, -1.3468e-02,  3.4740e-02,
        -1.0611e-01, -2.2715e-02, -1.0192e-03, -2.8642e-03, -1.1477e-02,
        -4.9805e-02, -8.2112e-03,  9.4811e-02, -1.6150e-02, -2.7287e-01,
        -1.7221e-01,  7.6079e-02,  1.3195e-01, -3.0977e-01,  1.4512e-01,
        -1.3527e-01, -6.1630e-02, -4.9408e-02,  3.7206e-02, -7.5481e-02,
         1.4665e-01,  5.2865e-02, -2.0969e-01, -3.4215e-01, -2.9769e-02,
        -8.2169e-02, -2.9857e-01, -6.3000e-02,  3.4473e-02,  9.7319e-05,
         2.1006e-01,  9.0847e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0796, -1.5860, -0.3335, -0.1580,  0.0421,  0.0757,  0.0240,  0.0458,
         0.0115, -0.3101, -0.1482, -0.1495, -0.3479, -0.0253,  0.0973,  0.0719,
         0.0433, -0.2943, -0.5020, -0.1190, -0.0288, -0.0730, -0.0317,  0.0405,
         0.0803, -0.2105, -0.2357, -0.1683, -0.4095,  0.0250, -0.0082,  0.0542,
        -0.0121, -0.0451,  0.0875,  0.1633, -0.0631,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0515,  1.9322,  0.3572, -0.0063, -0.1475, -0.1168, -0.0821,  0.1179,
        -0.0331,  0.2522,  0.0686,  0.0693,  0.2002,  0.2200, -0.0415,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1118, -0.1440,  0.0844,  0.0018, -0.2594, -0.3470, -0.3662, -0.2356,
        -0.6478, -0.2440,  0.0032, -0.2493, -0.2663,  0.2062,  0.1840, -0.0336,
         0.0201,  0.2357,  0.1884,  0.3437,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2157, -0.5180, -0.4107,  0.5669, -0.4742, -0.0847,  0.0024,  0.0104,
        -0.0590, -0.0488, -0.1803, -0.2039, -0.0734, -0.0156, -0.0753, -0.1312,
         0.0290, -0.1547,  0.0198, -0.1598, -0.0117, -0.0290, -0.0454,  0.0077,
        -0.0759, -0.1674, -0.3091,  0.0191, -0.0314, -0.2418, -0.2721, -0.1053,
        -0.0033, -0.0575, -0.1414, -0.1471, -0.0307, -0.0577, -0.0153,  0.0294,
         0.1454, -0.0600, -0.1676,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0549, -0.0161, -0.0644,  0.0617, -0.1967, -0.0606, -0.0741, -0.4340,
        -0.0495, -0.0519,  0.0914,  0.0924, -0.0935, -0.2654, -0.4460, -0.1629,
         0.0797,  0.0559,  0.0116, -0.1545, -0.1185, -0.0095, -0.1015,  0.0698,
        -0.0785, -0.0429, -0.0419, -0.0726, -0.2953, -0.0271, -0.0189, -0.1970,
        -0.0726, -0.0950, -0.1295, -0.0518, -0.1057,  0.2207,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5623, -0.9258, -0.1952, -0.3743, -0.0150, -0.7110,  0.0669,  0.0285,
        -0.1010,  0.0435, -0.0772, -0.1292, -0.1306,  0.0311, -0.2117, -0.4348,
         0.0769, -0.7519, -0.0807,  0.0413,  0.5721, -1.3497,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4223, -0.3128, -0.0302, -0.0433, -0.0407,  0.0564, -0.0828, -0.0082,
        -0.2673, -0.4719, -0.0751, -0.0801, -0.0523, -0.0248, -0.0688, -0.1625,
        -0.5862, -0.1530, -0.0716, -0.1452,  0.1272,  0.0373, -0.1501, -0.1489,
         0.0216,  0.0188, -0.2159, -0.0065,  0.0263, -0.1786, -0.2551,  0.2349,
         0.0615, -0.0303, -0.0296, -0.4156, -0.0107,  0.0086,  0.4700,  0.0399,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0351,  0.0515, -0.1303,  0.0411, -0.0052, -0.1399,  0.0070,  0.0039,
        -0.0877, -0.1466,  0.0211, -0.0890, -0.0452, -0.3990, -0.2816, -0.0364,
        -0.0306,  0.0483, -0.2955, -0.0044, -0.0131, -0.0126, -0.0461,  0.0153,
        -0.0121, -0.3246, -0.0239, -0.2201, -0.3247, -0.1581, -0.1312, -0.2507,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6765, -1.0554,  0.0267,  0.0662, -0.0669,  0.1021,  0.0444, -0.1876,
        -0.6749, -0.0406,  0.0256, -0.2404, -0.0789,  0.0396,  0.0304,  0.0999,
         0.1124,  0.0639,  0.0280,  0.0502, -0.0287,  0.1041,  0.1100, -0.3962,
        -0.0449,  0.0323, -0.1165, -0.0157, -0.0243, -0.0059, -0.0316, -0.0328,
         0.0024,  0.0739,  0.7633,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.3429,  0.1686,  0.5686,  0.1026,  0.3149, -0.0322,  0.6178,  0.2591,
         0.5003,  0.6224,  0.0280,  0.2852,  0.2448,  0.1815,  0.1379,  0.0223,
        -0.0244,  0.0791, -0.0830,  0.1619, -0.3804,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4438, -1.4057, -0.2515, -0.2233, -0.2005, -0.2299, -0.0627, -0.2985,
        -0.5329,  0.1277,  0.0181, -0.0018,  0.1039,  0.0642, -0.1288,  0.0632,
        -0.1040,  0.4273,  0.0498,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1460, -0.4753, -0.4496,  0.0427, -0.1952, -0.0513,  0.1951, -0.1048,
        -0.0871, -0.3795, -0.6074, -0.0722, -0.1288,  0.1791, -0.0180, -0.0814,
         0.0350, -0.1059,  0.2907,  0.1995,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0266, -0.8516, -0.0084, -0.3313, -0.0019,  0.0277,  0.0033, -0.1504,
        -0.1547, -0.0924, -0.0496, -0.0248, -0.0625, -0.1777,  0.0343, -0.1464,
        -0.1455, -0.0444, -0.0965, -0.1190, -0.0336,  0.0261,  0.0709,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3384,  0.6092,  0.1751,  0.1074,  0.0508,  0.2297,  0.0517,  0.1016,
         0.3120,  0.5935,  0.0712,  0.3204,  0.1006,  0.0506, -0.0634, -0.0266,
         0.0205, -0.0049,  0.0874,  0.0919,  0.2107,  0.0159, -0.0045,  0.3175,
        -0.2050,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0812e-01,  1.4332e+00,  1.3129e-01,  1.5269e-01, -1.7866e-02,
         7.8865e-02,  3.3944e-02,  1.2906e-01,  9.5155e-03,  3.1913e-01,
         3.4243e-01, -1.0034e-02,  6.0477e-02, -7.3888e-02, -9.4470e-02,
        -1.1444e-01,  3.5288e-02, -4.0857e-02,  3.8184e-02,  1.7982e-01,
         4.3958e-01,  3.4015e-01,  2.8566e-01,  1.4043e-01, -1.2465e-01,
         1.2939e-01,  1.2060e-01, -6.6548e-02,  2.0426e-02, -8.3355e-04,
        -5.9677e-02,  2.2898e-02,  7.1030e-02,  2.9581e-02,  1.2085e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1686,  0.0049,  0.0151, -0.4028, -0.0859, -0.0595, -0.1953, -0.0501,
        -0.3729, -0.1218, -0.2024, -0.0989,  0.0172, -0.2114, -0.3707, -0.4654,
        -0.0877, -0.3531,  0.1202, -0.0368,  0.1051, -0.0141,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0994,  0.1759,  0.2346, -0.0052,  0.1798,  0.2013,  0.1903, -0.0071,
        -0.1937,  0.6165,  0.1258,  0.2350,  0.4010, -0.3830, -0.0169,  0.0784,
         0.0860,  0.0554,  0.2102,  0.1647, -0.3210,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3595e-02,  1.3325e+00,  5.2989e-03, -5.4847e-02,  1.2296e-01,
         7.4494e-02,  2.8030e-01,  6.1070e-01, -1.4959e-01, -7.4490e-02,
         9.6551e-02,  2.0375e-01,  8.2716e-02,  2.7447e-01, -1.9634e-02,
         4.0651e-04, -3.8561e-01, -2.6437e-01, -3.2669e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3750,  0.0549,  0.0038,  0.0344,  0.0849, -0.1894, -0.0038, -0.0367,
         0.0305, -0.0159, -0.0052,  0.0015, -0.0794, -0.0969, -0.0293,  0.0243,
        -0.0106, -0.2176, -0.0400, -0.1224, -0.0710,  0.0650, -0.2483,  0.0294,
        -0.1923, -0.1103,  0.0950, -0.1338, -0.0371, -0.5401, -0.0639, -0.1813,
        -0.1545, -0.1531,  0.0414,  0.0278,  0.1673,  0.0073,  0.0589,  0.0504,
         0.0206,  0.0052,  0.0639,  0.0086,  0.0177,  0.0019, -0.0318],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1935e-01,  9.3086e-01,  3.0503e-01,  5.2295e-02, -7.8515e-02,
         1.2074e-02, -2.7029e-02, -6.0794e-02,  4.5762e-02,  3.3735e-02,
        -9.4963e-02, -2.6151e-02, -6.4262e-02,  5.5027e-02, -4.9689e-02,
        -8.4721e-02,  1.8042e-01, -5.4543e-02, -1.3230e-01, -2.9158e-02,
         1.2071e-01,  2.9624e-01,  3.3789e-01,  4.4306e-01,  2.8044e-02,
        -2.0638e-01,  6.5083e-02,  2.5364e-02, -8.5522e-04,  4.9859e-02,
         1.0695e-01,  1.0339e-01,  1.7275e-01,  1.2419e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8356e-02, -1.9055e+00,  1.5716e-01,  8.3921e-02, -1.9693e-01,
        -1.5112e-01,  7.9289e-02,  5.6640e-02, -1.8311e-02, -4.1235e-02,
         1.1411e-01, -5.0619e-02, -2.7423e-01, -5.8117e-01,  1.5304e-03,
         8.5676e-02,  4.4240e-04, -1.1812e-01, -2.9015e-01, -5.7538e-01,
         8.5676e-02, -4.0979e-02, -1.3220e-01,  4.6864e-03, -3.8359e-02,
        -1.8282e-02, -4.3773e-02, -1.3306e-01, -1.0391e-01, -1.2524e-01,
         8.0006e-02,  1.9106e-01, -8.5958e-02, -3.7780e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.2670e-01, -9.3322e-01, -4.6920e-05, -2.8168e-01, -3.4872e-01,
        -5.8603e-01,  1.8534e-03, -1.1379e-01, -1.3033e-01, -3.1439e-01,
        -3.0512e-01, -1.3974e-01, -9.8304e-02, -6.4153e-01, -3.1727e-02,
        -1.0374e-02,  1.6789e-01,  7.0948e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2508,  0.0092, -0.0014, -0.3155,  0.0365, -0.0010,  0.1445,  0.1080,
         0.1441,  0.9260, -0.0447,  0.0464,  0.0128,  0.1802,  0.0417,  0.1428,
         0.0716,  0.0472,  0.0139,  0.6144, -0.0059,  0.0177,  0.0491, -0.1269,
         0.0313, -0.3106,  0.3103,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6765,  0.2903, -0.0840, -0.1414, -0.0040, -0.0012,  0.0478,  0.0467,
         0.4761,  0.1159, -0.0288,  0.0962,  0.7215,  0.1727, -0.0138, -0.0885,
         0.1473,  0.1078,  0.0204,  0.5984,  0.0238, -0.0144,  0.1148, -0.1360,
         0.1932, -0.0165,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1290, -1.1423,  0.1383, -0.2389, -0.4516,  0.0915, -0.0379,  0.0757,
        -0.1557, -0.1758, -0.4045, -0.2004,  0.0438, -0.0735,  0.0293, -0.2390,
        -0.1071,  0.0281, -0.0186,  0.0772,  1.1777,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5832, -1.5711, -0.9883, -0.1570, -0.2361, -0.2979, -0.4616, -0.1316,
        -0.1255,  0.2370, -0.1181, -0.1957, -0.0239, -0.0788, -0.4109,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7070,  0.1176, -0.1735, -0.1621, -0.2283,  0.1440, -0.0806,  0.3262,
        -0.0150,  0.4890, -0.0593,  0.1500,  0.7544,  0.2481, -0.0481,  0.0679,
         0.2558, -0.0521,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1351, -1.2806, -0.4040, -0.4725,  0.0394,  0.0970, -0.1325, -0.0263,
        -0.1338, -0.3656, -0.3260, -0.2405,  0.2439, -0.3477,  0.1712,  0.1060,
        -0.0443, -0.0182,  0.2493,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1674, -0.7490, -0.3457, -0.0080, -0.2854, -0.0808,  0.0908, -0.0125,
         0.0053,  0.0577, -0.0143, -0.0072, -0.0653, -0.0104,  0.0154, -0.0513,
        -0.1603, -0.0494,  0.0685,  0.1039, -0.3079, -0.0325, -0.2264, -0.0692,
        -0.3788, -0.0951, -0.1753, -0.0253,  0.0755, -0.1912, -0.0444,  0.0149,
        -0.2650,  0.0149, -0.1184, -0.0455, -0.1886,  0.1387, -0.0313,  0.0527,
        -0.0701, -0.0019,  0.0562, -0.1761], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1661e-01, -1.2818e+00, -7.3342e-02, -1.7607e-01, -1.8309e-01,
        -2.1126e-02, -5.2327e-02,  4.2935e-02, -2.9186e-01, -5.6697e-03,
        -5.8510e-02,  5.9356e-02,  4.9587e-02,  1.7650e-02,  1.5541e-01,
         3.1922e-02,  2.5274e-01, -4.6175e-01,  7.1032e-02, -2.4322e-02,
        -1.3992e-01,  7.4990e-02,  3.2640e-02,  6.8616e-03, -1.2519e-01,
         3.6289e-03,  7.6352e-04, -4.1657e-01,  5.7582e-02, -7.0539e-02,
        -5.8254e-02, -8.0318e-02, -2.0764e-02, -3.4421e-01,  2.8709e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0947, -1.9033, -0.3291, -0.1374, -0.1486, -0.1526, -0.5014, -0.2897,
         0.0550,  0.0105, -0.0285, -0.2141, -0.2298,  0.0403,  0.0475, -0.2077,
        -0.0515, -0.0594,  0.0077,  0.0070,  0.0133, -0.0053,  0.1490, -0.0846,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0722, -0.8663,  0.0238, -0.1091, -0.1645, -0.0521, -0.0918, -0.2117,
        -0.5189, -0.0867,  0.0498,  0.0020,  0.0582, -0.1298, -0.4567, -0.0350,
         0.0769, -0.0623, -0.2448,  0.0024,  0.0012, -0.0938,  0.0169, -0.1547,
        -0.0744, -0.1425, -0.0532, -0.1295, -0.0041, -0.0843, -0.3165, -0.0382,
         0.3859, -0.2984,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8903,  0.1578, -0.0239,  0.0219,  0.0890, -0.1703, -0.3021, -0.0296,
        -0.0170, -0.0060, -0.0729, -0.0935, -0.2285, -0.0078, -0.1836, -0.7505,
        -0.0036, -0.0116, -0.1002,  0.4257,  0.0407, -0.0453, -0.0940, -0.2069,
        -0.0941, -0.0521, -0.1004, -0.1316,  0.1022, -0.2093,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 2.0392e-01,  2.8072e-01, -3.2854e-01, -4.9449e-01, -1.9484e-01,
        -4.1059e-02, -5.0614e-02, -1.9733e-01, -1.8670e-01, -9.7569e-01,
         5.6194e-04,  2.5703e-01, -1.5994e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9656e-01,  1.2238e-01,  1.4009e-01,  1.0673e-01,  1.3477e-01,
        -1.3760e-01, -2.1868e-01, -3.4632e-01, -7.2846e-01,  7.5620e-02,
        -4.8643e-04, -1.0931e-02, -5.7519e-02, -9.6383e-02, -4.1222e-01,
         4.0862e-02,  4.8224e-01, -1.0040e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1648, -0.8201, -0.7472, -0.0995, -0.6294, -0.3755, -0.3646, -0.0103,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1898, -1.1339, -0.0549, -0.0608, -0.1186, -0.1084,  0.0031,  0.0015,
        -0.0221,  0.0283, -0.0114,  0.1370, -0.0029, -0.0121,  0.0082, -0.0383,
        -0.0012,  0.0225,  0.0064, -0.0310,  0.0445, -0.0690, -0.1401, -0.1121,
        -0.0689, -0.1240, -0.0847, -0.1429, -0.3485, -0.2787, -0.0306, -0.2168,
        -0.0859, -0.3363,  0.1717, -0.2196, -0.1569, -0.0084,  0.1008,  0.0382,
        -0.0134, -0.0612, -0.0350,  0.0346,  0.0558,  0.0381, -0.1113],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1169, -0.7426, -0.0179, -1.6793, -0.4499,  0.0700, -0.0998,  0.2153,
        -0.1375, -0.0052, -0.1852,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3574,  0.7080,  0.1637, -0.0987, -0.1391,  0.2102,  0.0155,  0.5954,
         0.0613,  0.2900, -0.0237, -0.0882,  0.2470,  0.0922,  0.0255,  0.1835,
        -0.1458, -0.2099,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2761, -0.6171, -0.8995, -0.1795,  0.2881, -0.0179, -0.4408, -0.2282,
        -0.2117, -0.1223, -0.0469, -0.0109, -0.2351, -0.3998, -0.2202,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8355,  1.4748,  0.2114,  0.1024,  0.1010,  0.2958,  0.6048, -0.1698,
        -0.1558,  0.3468,  0.7792,  0.0285, -0.0270, -0.3500,  0.4908,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2326,  0.0655, -0.2064,  0.0291,  0.1114, -0.0484, -0.0244,  0.0355,
        -0.3492, -0.2586, -0.1948, -0.2336, -0.6474, -0.0389, -0.0607, -0.0988,
        -0.2470,  0.2352, -0.0477,  0.0885, -0.2917, -0.1974,  0.0546,  0.1618,
        -0.0509, -0.0300,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1676, -0.1900, -0.2523, -0.1622, -0.2562, -0.0835, -0.1229, -0.6402,
        -1.0558, -0.1480, -0.0366, -0.2320, -0.1192,  0.0174,  0.1172,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5638, -0.7388, -0.0441, -0.1170, -0.3734, -0.1684, -0.4575,  0.0074,
        -0.0668, -1.0163, -0.0093,  0.1114, -0.0415, -0.0936,  0.2403,  0.0158,
        -0.0393, -0.0450, -0.3041,  0.0780, -0.0201,  0.0057,  0.0660,  0.0149,
         0.2394,  0.2726,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1389,  0.0174,  0.1182,  0.2018, -0.0723, -0.0793,  0.1026, -0.2119,
        -0.1583, -0.3829, -0.5531, -0.1224, -0.1410, -0.3019, -0.3382, -0.0978,
         0.0207, -0.0737,  0.0909, -0.1490,  0.0315, -0.1142, -0.0821, -0.1250,
         0.1641,  0.2342,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.2291, -0.6397, -0.0642, -0.0202, -0.0410, -0.0237, -0.1202, -0.2073,
        -0.0652, -0.0867, -0.0769, -0.0321, -0.2780,  0.0709, -0.0831, -0.0732,
        -0.0359, -0.0061, -0.0647, -0.0267, -0.1619, -0.1255, -0.0368, -0.3744,
        -0.0786, -0.1310, -0.1585, -0.0179,  0.0170,  0.0492,  0.0120, -0.0989,
        -0.0243,  0.0057,  0.1446, -0.0578,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1484, -0.2349, -0.1497, -0.1526, -0.2446, -0.0189, -0.2526, -0.0511,
        -0.0057,  0.0107, -0.0550, -0.0156, -0.0076,  0.0118,  0.0129, -0.0172,
        -0.0142, -0.0110,  0.0701,  0.0310,  0.1685, -0.0736, -0.0505,  0.0821,
        -0.1374, -0.3877,  0.0133, -0.1270, -0.4407,  0.0029, -0.0225, -0.2633,
        -0.4071,  0.0561,  0.0588,  0.1693,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5853, -0.5811, -0.3185, -0.2405, -0.2891, -0.6929, -0.1272, -0.3145,
         0.0501, -0.0277, -0.4230, -0.1317, -0.1329, -0.0351,  0.1146,  0.0812,
        -0.1587, -0.0953, -0.0759,  0.1843,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6365, -0.1125, -0.0337, -0.1652,  0.0464,  0.0213, -0.3448, -0.1188,
         0.0079, -0.1453, -0.0307,  0.0121, -0.0466,  0.0160, -0.5554, -0.1411,
        -0.0664, -0.2575, -0.0907, -0.1408, -0.2194, -0.0341, -0.0069, -0.0512,
        -0.3871,  0.0327, -0.0742, -0.0820, -0.0655, -0.1665, -0.1133,  0.0323,
        -0.0648, -0.4167,  0.0546,  0.1387,  0.0013,  0.0201,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4138e-04, -5.5590e-01, -1.4156e-01, -2.7819e-01, -3.7757e-02,
        -6.1219e-03, -6.2625e-04,  3.8424e-02, -4.1674e-02, -7.3026e-02,
        -2.0252e-01, -1.8206e-01, -2.5476e-02,  1.1779e-01, -1.9495e-02,
        -1.2413e-01,  8.4511e-02, -3.2519e-01, -4.5863e-02, -2.7396e-03,
         4.7871e-02,  1.0405e-01,  1.7836e-01, -3.1214e-02, -3.0695e-02,
         7.7362e-04, -2.7651e-02, -1.0420e-01, -3.1715e-01,  1.3740e-01,
        -9.3089e-02,  2.2953e-02,  8.4005e-02, -2.2767e-02, -7.8701e-02,
         1.7109e-03,  2.3720e-02,  8.5745e-02,  4.6873e-03, -8.3077e-02,
         3.1539e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0397,  1.6699, -0.2367, -0.0061,  0.3080,  0.4671,  0.0455,  0.1752,
         0.1412,  0.0901,  0.1039,  0.3811,  0.1575,  0.0559, -0.1111,  0.2572,
         0.0365,  0.3243,  0.0524,  0.3048,  0.2261, -0.1214,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0587,  0.0076, -0.0285, -0.0215,  0.0133, -0.0427,  0.0167, -0.0164,
        -0.0113,  0.0184, -0.0251, -0.1480, -0.0086, -0.1426, -0.0429, -0.2368,
         0.0087, -0.1711, -0.0942, -0.2210, -0.3709, -0.0445,  0.1290, -0.1601,
        -0.0099, -0.0068, -0.0550, -0.0879, -0.1096,  0.1190, -0.0154, -0.1390,
        -0.0516,  0.0033, -0.0229,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0699, -0.2079, -0.5767, -0.0209, -0.3483, -0.0184,  0.0548,  0.1493,
         0.0562, -0.1064, -0.1269, -0.0647,  0.0563, -0.0408, -0.0479,  0.1077,
         0.1263,  0.0611, -0.3234, -0.2038, -0.2731, -0.0665,  0.0531, -0.0773,
        -0.0202, -0.0907, -0.1119,  0.2804,  0.1375,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0287e-01, -1.6352e+00, -5.2427e-01, -2.3547e-01, -1.8770e-01,
        -4.5010e-01,  5.4155e-03,  1.1072e-01, -1.9025e-01, -1.2076e-01,
        -2.4462e-01, -1.9340e-01, -1.9198e-01, -1.4858e-01, -1.7325e-01,
        -3.5838e-01, -1.6883e-01,  1.0007e-01, -7.6869e-02,  9.8026e-02,
        -1.0157e-03, -2.5146e-02,  8.1483e-02,  2.8465e-01,  4.4085e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4178,  0.0879, -0.0666,  0.0025, -0.6407, -0.0521, -0.1128, -0.4573,
        -0.4861, -0.1203,  0.0164,  0.0763, -0.0105,  0.0585, -0.2465, -0.5235,
         0.1024, -0.0442, -0.0671, -0.0421, -0.0907,  0.0846,  0.0929,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3083,  0.0654,  0.0277,  0.0160,  0.0547,  0.1353,  0.0668, -0.0313,
        -0.0209, -0.0317, -0.0780, -0.1144, -0.1402, -0.2004,  0.0220, -0.0055,
        -0.0415, -0.0525, -0.0424,  0.0108, -0.0194, -0.0174, -0.0065,  0.0531,
        -0.1644, -0.1440, -0.4089, -0.0107, -0.4304, -0.1682, -0.2573, -0.1018,
        -0.1622, -0.2032, -0.1146,  0.0591, -0.1001,  0.0892, -0.0041,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3673,  0.0515, -0.2543, -0.6744, -0.8625, -0.0980,  0.0829, -0.2884,
        -0.3773, -0.0138, -0.4556,  0.0874,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.5601, -0.3110, -0.7994,  0.0589, -0.0492, -0.0957, -0.0991,  0.0698,
         0.0240, -0.0797, -0.0201,  0.0111, -0.0768,  0.0744,  0.0021,  0.0292,
        -0.0388, -0.2369,  0.0063, -0.1288, -0.0976, -0.1080,  0.0752, -0.0078,
         0.0490,  0.0125,  0.1018,  0.0276,  0.0607,  0.0441,  0.0832,  0.1193,
        -0.1066, -0.0149,  0.0079, -0.0587, -0.1656, -0.0182,  0.0034,  0.0018,
         0.0720,  0.1069,  0.0822,  0.0343,  0.1887,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5153, -2.0019, -0.2494, -0.6250, -0.1580,  0.5609,  0.0270, -0.2331,
        -0.5471,  0.0128, -0.0643,  0.3442,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6035,  1.6567,  0.0158,  0.5292,  0.2393,  0.3496, -0.1125,  0.1808,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2688, -0.4517, -0.0765,  0.0633, -0.1771, -0.0747,  0.0298, -0.0097,
        -0.1293, -0.3342, -0.0089,  0.0271, -0.0205, -0.0491,  0.0019,  0.0744,
         0.0136, -0.0333,  0.0221, -0.0051, -0.0235,  0.0205, -0.0051, -0.0618,
        -0.0362, -0.0924, -0.0240, -0.0126, -0.1815, -0.0515, -0.0436, -0.0106,
        -0.0425, -0.0304,  0.0026, -0.0223, -0.0753, -0.0116, -0.0641,  0.0030,
         0.0220, -0.0360, -0.0346, -0.0235,  0.1287,  0.1371, -0.2154, -0.0620,
        -0.2347, -0.2542, -0.1191, -0.0293, -0.0838, -0.0058, -0.1214,  0.1193,
         0.0071,  0.1438, -0.1823,  0.4899, -0.0305], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0679, -0.0410,  0.0203, -0.0672, -0.3038, -0.0122, -0.0352, -0.0663,
        -0.0366,  0.1027,  0.0844, -0.2115, -0.0455, -0.1190, -0.3102,  0.0429,
         0.0542, -0.0247, -0.1075, -0.1432, -0.2193, -0.2032, -0.0101, -0.0210,
        -0.2637, -0.0660, -0.0745, -0.0873, -0.1701, -0.1002, -0.0249,  0.1249,
        -0.3417,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3667, -0.6316, -1.0776, -0.2819, -0.0822, -0.0798, -0.3356, -0.0262,
         0.0270, -0.1211,  0.0392,  0.0171, -0.0764, -0.0261, -0.0096,  0.2083,
         0.0291, -0.0714, -1.0849, -0.0067,  0.0674, -0.1040,  0.1640,  0.1334,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0598, -0.5206, -0.1502, -0.0159, -0.1338, -0.3334, -0.0922, -0.1900,
         0.0321, -0.4570, -0.3792, -0.2630, -0.3527, -0.0463,  0.0525, -0.0926,
        -0.0150, -0.0173,  0.1124,  0.0429, -0.0498,  0.0084,  0.1064, -0.1838,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0644, -0.0292, -0.2670, -0.3635,  0.0023, -0.0685, -0.0972, -0.1666,
        -0.0612,  0.0077, -0.0021,  0.0167, -0.0857,  0.0228, -0.1744, -0.2823,
        -0.2165,  0.0582, -0.0186, -0.0961, -0.2076,  0.0694, -0.0690, -0.2343,
        -0.0640, -0.0626, -0.0931, -0.0594, -0.0346,  0.0406, -0.0962,  0.0477,
         0.1262,  0.0670,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4342,  0.1403, -0.0738,  0.1990, -0.3484,  0.2614, -0.0306,  0.0009,
         0.0568,  0.0156, -0.1338, -0.0337, -0.0025, -0.2182, -0.0084, -0.2537,
        -0.5035, -0.0865, -0.2829, -0.0270, -0.1742, -0.2860, -0.0290,  0.0016,
        -0.1586,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2925, -0.0796,  0.1358, -0.0304,  0.1932,  0.1305,  0.5581,  0.2750,
         0.4156,  1.0432, -0.0594,  0.1258,  0.1660, -0.1407, -0.2351, -0.3829,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0153, -0.9013, -0.0850, -0.4528, -0.2678, -0.0904, -0.6222, -0.1120,
         0.0288,  0.0192, -0.1416,  0.0063, -0.2280, -0.0648,  0.0148, -0.0103,
        -0.1190, -0.1840, -0.1454,  0.0531,  0.0359, -0.2876,  0.1832,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0609,  0.2086,  0.2044,  0.9742,  1.5625, -0.1727, -0.4197,  0.0475,
         0.0889, -0.4136, -0.1293,  0.4317,  0.0758, -0.0834,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.8162,  0.5750,  0.6541,  0.9919,  0.1850, -0.5187,  0.2280, -0.1495,
         0.2439,  0.1561,  0.3318,  0.2622,  0.1780,  0.0983,  0.0111, -0.0019,
        -0.1535,  0.5302,  0.0165,  0.3207,  0.0789, -0.0350, -0.2141, -0.3950,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3571, -0.3765, -0.3805, -0.0494, -0.7729, -0.1895, -0.6424,  0.0517,
        -0.1526, -0.3564, -0.0558,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1635,  0.0370,  0.1155, -0.0807, -0.1136, -0.3623, -0.4503,  0.0166,
        -0.4342, -0.1869, -0.0994,  0.0069,  0.0090,  0.0417, -0.1607, -0.0783,
        -0.0079, -0.4123,  0.0886, -0.0169,  0.0807,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3873,  0.0383,  0.0056,  0.1125, -0.3445, -0.1257, -0.2494, -0.7834,
        -0.5262, -1.1691, -0.1418,  0.0218, -0.1192, -0.4890,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0614,  0.1754,  0.0475, -0.0257,  0.0430, -0.0699, -0.0151, -0.0865,
        -0.0662, -0.0179,  0.1811, -0.0742,  0.8671,  0.1411,  0.0598,  0.0674,
         0.1506,  0.2761,  0.1360, -0.0630,  0.3432,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1709, -1.7011,  0.0228, -0.3068,  0.6216, -0.2054, -0.0081, -0.6873,
        -0.0899, -0.2374, -0.2353,  0.4148,  0.3117,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1881, -0.2519, -0.0784,  0.0047, -0.0829, -0.0145, -0.0586, -0.0749,
        -0.0626, -0.0385,  0.0541, -0.0571,  0.0696, -0.0086, -0.0165, -0.0236,
         0.0446,  0.0383,  0.0402, -0.0593,  0.1664,  0.1620,  0.0522, -0.0182,
        -0.3296,  0.0146, -0.3327, -0.4350, -0.2649, -0.1684, -0.2222, -0.0788,
         0.0599,  0.0015,  0.0720, -0.0482, -0.1809,  0.0017,  0.0331, -0.0191,
        -0.0723,  0.0112, -0.0306,  0.0135,  0.0366], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1946, -0.1024, -0.2356, -0.0544,  0.0376, -0.0898, -0.0874, -0.0620,
         0.0165, -0.0952, -0.2929, -0.0316,  0.0398, -0.0239, -0.1158,  0.0818,
        -0.5671, -0.1027, -0.0733, -0.0155, -0.1002, -0.4120, -0.0497, -0.3838,
        -0.0409,  0.1433, -0.0036, -0.1270, -0.4213, -0.1013, -0.1910, -0.0090,
         0.1034, -0.0963,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0181, -0.0186,  0.0857,  0.1974,  0.0164, -0.0271,  0.0163,  0.0440,
        -0.4090,  0.1012,  0.1198, -0.0071,  0.2108, -0.0180, -0.0840, -0.1566,
         0.0316, -0.1937,  0.3136,  0.9803,  1.2492,  0.0875,  0.2327, -0.3116,
         0.3473,  0.2372,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0328, -1.2650, -0.1198, -0.3529, -0.0831, -0.1040, -0.0728, -0.3186,
        -0.1127, -0.0344, -0.0136,  0.0081, -0.0063, -0.0974, -0.1081, -0.0767,
        -0.2077,  0.1055, -0.0766, -0.0528, -0.0552,  0.0397,  0.0684,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5803e-02, -3.5354e-01,  4.7794e-02, -2.7655e-02,  1.2286e-02,
        -1.1404e-03, -2.5268e-03, -7.6542e-02, -1.2484e-01, -6.3985e-03,
         3.5498e-02,  7.1287e-03,  6.2674e-02,  5.3096e-02, -6.7816e-01,
         1.5956e-01, -1.4374e-01, -1.8423e-01, -2.2660e-01, -1.5066e-01,
        -1.2308e-02, -1.9329e-02,  1.4264e-01,  7.0669e-02,  3.8153e-02,
        -1.7958e-02, -1.7992e-01, -3.3059e-01,  3.9326e-02, -3.7456e-05,
         7.5012e-02,  3.8222e-02,  4.3048e-02,  8.0442e-02, -6.5008e-03,
         5.5669e-02, -5.5902e-02,  1.3903e-01, -7.2018e-02, -1.9287e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0207, -0.1692, -0.0941, -0.1719, -0.3641, -0.0425,  0.1919, -0.2001,
         0.0383,  0.0818, -0.0102, -0.0656, -0.0614, -0.0775, -0.1319, -0.2267,
        -0.1192, -0.0208, -0.0054, -0.0948,  0.0598, -0.1957, -0.0424,  0.0749,
         0.0032,  0.0071,  0.0215, -0.1325, -0.0561,  0.0906, -0.1926, -0.0199,
        -0.0872, -0.2847,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0578, -0.1198, -0.0842, -0.0336, -0.3438,  0.0227,  0.0583, -0.0018,
         0.0859,  0.0190, -0.0356, -0.0787,  0.1404, -0.0176, -0.3883, -0.0243,
        -0.0812, -0.0077, -0.1779,  0.0067,  0.0090, -0.0037, -0.1007,  0.0667,
        -0.0793, -0.1956,  0.0124, -0.0755, -0.0170,  0.0187,  0.0209,  0.0396,
         0.0718,  0.1050, -0.1392,  0.0295, -0.0801, -0.1665, -0.1046, -0.1222,
        -0.0849, -0.0566, -0.0910, -0.0596, -0.0068, -0.0045, -0.0008,  0.0018,
        -0.0833, -0.0325, -0.0344, -0.0600], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2628,  0.0332, -0.0377, -0.2093, -0.5829, -0.1439, -0.0022,  0.1419,
        -0.0051, -0.0711, -0.0101, -0.0153, -0.0373, -0.4383,  0.0311, -0.2017,
        -0.3529, -0.1858, -0.0856,  0.0192, -0.1511, -0.0793, -0.1691, -0.2310,
         0.0270,  0.0388, -0.0624,  0.0223,  0.0607, -0.0633,  0.0594,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2269, -0.9498, -0.0449, -0.1010,  0.0562,  0.0912,  0.0542,  0.0620,
        -0.0764,  0.0371, -0.0666, -0.2285,  0.0302, -0.0860, -0.1981, -0.3048,
        -0.0986, -0.2081, -0.2874, -0.3398, -0.1647, -0.0782, -0.1557, -0.1311,
        -0.1844,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0382, -0.4408,  0.0634,  0.1425,  0.3103,  0.8229,  1.4292,  0.1039,
         0.0901, -0.0395, -0.0238, -0.0200,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2044, -1.3420, -0.0373, -0.3059, -0.2194, -0.0252, -0.1615,  0.0213,
        -0.0082,  0.0741,  0.0042, -0.1464, -0.1375, -0.3013,  0.0718, -0.2129,
        -0.0586, -0.1001,  0.0160,  0.1185, -0.1206, -0.0934,  0.0394,  0.0029,
         0.1905, -0.3487, -0.0572, -0.0545,  0.0900,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4179, -0.9171,  0.4911,  0.2132, -0.1425, -0.2838, -0.7941,  0.2530,
        -0.2030, -0.0846, -0.0298, -0.0830,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1632, -1.2087, -0.0232, -0.3304,  0.1156, -0.3854, -0.5787,  0.0989,
        -0.1739,  0.0434, -0.1093, -0.0544,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2047,  0.2530, -0.4327,  0.0659, -0.9152,  0.2446,  0.0380, -0.1594,
        -0.6040, -0.1743, -0.0062, -0.4243, -0.1349,  0.0758,  0.0465, -0.3168,
         0.0749,  0.2292,  0.0573, -0.0809,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0077, -0.9413,  0.0302, -0.0958, -0.3923, -0.1396, -0.0477,  0.0153,
        -0.1568,  0.0164,  0.0258, -0.0244, -0.0152, -0.0754,  0.2999,  0.0691,
        -0.1457,  0.0439, -0.0167, -0.0664, -0.0978,  0.1230, -0.0586, -0.0322,
        -0.0786,  0.0705, -0.0346, -0.1768, -0.1530,  0.1827,  0.1016,  0.0183,
        -0.1026, -0.1708, -0.0551, -0.0300,  0.1692,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1005e+00,  2.7439e+00,  1.8788e-01, -4.6358e-01,  1.4727e-01,
         9.8712e-04,  8.8569e-02,  5.1725e-01,  1.9403e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1811,  0.1542,  0.0484, -0.8039, -0.1511,  0.0127, -0.3129, -0.4287,
        -0.0175, -0.0393, -0.1271, -0.0532, -0.0823, -0.3582, -0.0234,  0.0032,
         0.0345,  0.0743, -0.1064, -0.1572, -0.0500,  0.0682,  0.0350,  0.0086,
         0.0451,  0.3415, -0.1063,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2407,  1.0241, -0.0622,  0.2765,  0.0142,  0.1116, -0.0254,  0.0275,
         0.0342, -0.1081, -0.0953,  0.2240,  0.0220,  0.0055, -0.0855, -0.0419,
         0.1436, -0.0413, -0.0856, -0.0112, -0.0280,  0.1366,  0.1017, -0.0816,
        -0.0261, -0.0740,  0.0726,  0.1346,  0.4775,  0.0503, -0.0569, -0.0080,
         0.0502, -0.0627,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.1577,  0.0188,  0.0112, -0.0172, -0.0766, -0.1706,  0.0046, -0.0101,
         0.1497,  0.1477, -0.1149, -0.2611,  0.0131, -0.1133,  0.0718, -0.1270,
        -0.4211, -0.0039, -0.0732, -0.0264, -0.0662, -0.0493, -0.0589, -0.0176,
         0.0116, -0.0182,  0.0419,  0.0454,  0.0626, -0.1098,  0.0713, -0.2996,
        -0.3255, -0.0941, -0.1233, -0.0969, -0.1699, -0.0145,  0.0271,  0.0017,
        -0.0491, -0.2324,  0.0489,  0.0990,  0.0645,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1046, -0.2519, -0.1467, -0.0395, -0.1925, -0.4287, -0.6571, -0.0127,
         0.0199, -0.1473,  0.2574, -0.0625, -0.3301, -0.1161, -0.0266, -0.2945,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1932, -0.0182,  0.0588,  0.0504,  0.0993, -0.0143, -0.2619,  0.1390,
         0.6199,  0.5937,  0.1238,  0.3746,  0.0900,  0.1281,  0.0736, -0.0100,
         0.0318, -0.0469, -0.0468, -0.0277, -0.0152, -0.0148, -0.0467, -0.0558,
         0.0509,  0.0648,  0.1952,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0765, -0.0755, -0.1882,  0.0084, -0.0658,  0.0227, -0.0690,  0.0015,
        -0.0978, -0.0338,  0.0328, -0.0076, -0.0426, -0.0035, -0.0160,  0.0065,
        -0.0537,  0.0169, -0.0719, -0.0667,  0.0730, -0.1813,  0.0348, -0.2416,
         0.0101, -0.0295, -0.1462, -0.2599,  0.0698, -0.1919, -0.0281, -0.1958,
        -0.1055, -0.0393, -0.1783, -0.2035, -0.0622, -0.0755,  0.0105, -0.0476,
        -0.0308, -0.0810, -0.0042, -0.0046,  0.0022, -0.0233], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0160,  0.1186,  0.0365, -0.0634, -0.0048, -0.0095, -0.0099, -0.2324,
         0.0699, -0.1206, -0.0108, -0.0843,  0.0541, -0.0047, -0.0354, -0.0019,
        -0.0143, -0.0761, -0.2285, -0.5084, -0.0866, -0.1253, -0.0305, -0.0941,
         0.0472, -0.0574, -0.1715, -0.0293,  0.1172,  0.0095,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5711,  0.2362,  0.3055, -0.1850,  0.0038,  0.3229, -0.1068,  0.6702,
         0.3965,  0.3565,  0.2458,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3576, -1.1869, -0.0640,  0.0262, -0.3061, -0.1516, -0.2229, -0.2114,
         0.0127, -0.0531,  0.0595,  0.0571, -0.2987,  0.0120, -0.1243, -0.1421,
        -0.1072, -0.1248,  0.0427,  0.0462,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0325, -1.0055, -0.7679, -0.2378,  0.1882, -0.4362, -0.3355,  0.0360,
        -0.1981, -0.2693,  0.0600,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2195, -0.8469, -0.2701, -0.8445,  0.0395,  0.0080, -0.0814, -0.0958,
        -0.4399, -0.0500, -0.0287, -0.1008, -0.0835, -0.0151, -0.0362, -0.3157,
        -0.1310,  0.0137, -0.0255,  0.0900,  0.3743,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0994, -1.3621, -0.6762, -0.0720,  0.1613, -0.7981,  0.4575, -0.1061,
        -0.1304,  0.0326,  0.0427,  0.0019,  0.1224,  0.0050, -0.0470, -0.1940,
        -0.0127,  0.0693,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3660, -0.5993, -0.6910, -0.0036, -0.0749,  0.0077, -0.2481,  0.0337,
        -0.0231,  0.0010,  0.0583,  0.0519, -0.1642, -0.1554, -0.2025,  0.0274,
        -0.0446, -0.1343, -0.0757,  0.0406, -0.0165, -0.0904, -0.1715,  0.0322,
        -0.1477,  0.0548, -0.1764, -0.0673, -0.0837, -0.3050,  0.0340, -0.0148,
        -0.0118,  0.0649, -0.0603,  0.0032,  0.0436,  0.0923, -0.0222,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4228e-02, -1.6043e+00,  3.4143e-02, -2.5393e-01, -6.3432e-02,
        -3.4633e-02,  8.1889e-02,  1.3669e-01, -1.4865e-01, -1.2574e-01,
        -7.5686e-02, -2.3026e-01, -8.8067e-02,  2.1119e-02, -2.5654e-01,
        -1.0903e-01, -2.3803e-01, -1.1129e-01,  7.6299e-02, -1.2162e-01,
        -1.5013e-02,  9.5121e-03,  2.1312e-04,  6.7214e-03, -4.0575e-02,
        -4.6793e-02, -6.1307e-02, -3.0947e-02, -2.1101e-01, -2.4296e-01,
        -3.8197e-02, -8.8730e-02, -1.0540e-01, -1.0660e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.7160, -0.7046, -0.6587, -0.8270, -0.2213, -0.3957, -0.0376,  0.1086,
        -0.2207,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5259e-01, -6.9571e-01, -7.5028e-02, -1.5057e-01, -3.4710e-02,
        -2.9999e-01, -5.8489e-03,  7.6231e-03, -3.1229e-02, -1.8504e-01,
        -8.3135e-02, -1.5527e-02, -3.6913e-02, -3.0319e-03,  1.2546e-02,
        -2.0277e-01,  5.3032e-02, -6.5367e-01,  2.7331e-02, -1.1201e-02,
        -1.1954e-04,  7.2985e-02,  1.1303e-01,  6.5288e-02, -2.0901e-01,
        -1.8451e-02, -1.3315e-02, -1.4295e-02, -3.6299e-03, -1.3879e-02,
         6.5004e-04, -2.1962e-01,  3.0098e-02, -1.5192e-01,  1.6979e-01,
        -5.9685e-03,  6.0991e-02, -2.4013e-02,  3.7969e-02, -7.3530e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2558, -0.3243, -0.4953, -0.0297, -0.1604, -0.0097,  0.0107, -0.0227,
        -0.1253, -0.0007, -0.0113, -0.0802, -0.0127, -0.0602,  0.0509, -0.0464,
         0.0792,  0.3543,  0.0012, -0.0371,  0.0072,  0.0231, -0.0911,  0.1567,
        -0.0485, -0.0706, -0.1486, -0.0252, -0.1781, -0.2211, -0.0280, -0.0167,
         0.0080, -0.0046,  0.0434,  0.0439, -0.0638, -0.0442,  0.0304,  0.0512,
         0.0420,  0.1398,  0.0107,  0.0107, -0.0222,  0.0987, -0.0235,  0.0487,
         0.0195,  0.0666,  0.1564,  0.1275,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1833,  1.1673,  0.1547,  0.0935,  0.1383,  0.2212,  0.4751,  0.1867,
         0.0615,  0.0373, -0.2067,  0.1285,  0.2540,  0.2171,  0.0834,  0.2440,
         0.0723, -0.1184,  0.2625,  0.3317,  0.0360,  0.3339,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2399e-04,  2.2392e+00,  1.2025e-01, -5.8451e-02,  2.0198e-01,
        -5.2170e-02, -2.3518e-02,  2.0197e-01,  1.6099e-01, -8.6862e-02,
        -3.7811e-02, -3.6788e-02, -2.6343e-02, -7.6077e-02, -6.4739e-02,
         3.4655e-02, -8.1562e-02, -8.4467e-02,  9.1182e-04, -3.1016e-01,
        -3.1423e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0337, -0.2593, -0.1413, -0.0832, -0.0477, -0.0310, -0.0470,  0.0425,
        -0.0617, -0.0191, -0.0106,  0.0143, -0.0320, -0.0128,  0.0128,  0.0103,
         0.0204,  0.0373, -0.0196,  0.0121,  0.0433, -0.0074,  0.0064,  0.0377,
        -0.0315, -0.0055, -0.0450, -0.1104, -0.0532, -0.1244, -0.2829, -0.1414,
        -0.0277, -0.0840, -0.1158, -0.0532,  0.0176, -0.0427, -0.0300, -0.0115,
        -0.5804,  0.0225, -0.0060, -0.0657, -0.1546,  0.0187, -0.0624, -0.0358,
        -0.0078,  0.0466, -0.0534, -0.0357,  0.0502, -0.0446, -0.1172],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9146, -0.0301,  0.0224, -0.0779, -0.0086, -0.0600, -0.2893,  0.0237,
         0.1689,  0.0114,  0.0687, -0.0257, -0.0059, -0.0887, -0.1844, -0.0218,
         0.0351,  0.0073, -0.0302, -0.0533,  0.0201, -0.0309, -0.0835,  0.0523,
        -0.0506,  0.0466, -0.0155, -0.1000,  0.0102,  0.0113, -0.1525, -0.0534,
        -0.0715, -0.3862,  0.0490, -0.1547, -0.0664, -0.1288, -0.1633, -0.1343,
        -0.0484, -0.2359,  0.0908,  0.0726,  0.1680,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1691, -1.3714, -0.4975, -0.3682,  0.0096, -0.0480, -0.1268, -0.0488,
        -0.1559, -0.0426,  0.0046, -0.1154,  0.1125, -0.2280, -0.3156,  0.1197,
         0.0742, -0.1764, -0.0666, -0.1111, -0.3042,  0.1663, -0.1991,  0.3334,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0262, -0.0354,  0.0582,  0.0401, -0.0791, -0.0663, -0.2157, -0.1566,
         0.0356,  0.1316, -0.0322, -0.0480,  0.0325, -0.1862, -0.0095, -0.0297,
        -0.2720, -0.1082, -0.1084, -0.2755, -0.0782, -0.1213, -0.2109, -0.2991,
         0.0079, -0.2338, -0.2861, -0.0821, -0.0686, -0.3062, -0.0104,  0.0583,
        -0.0105,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3424, -1.3872, -0.4296, -0.5104,  0.0378, -0.2233,  0.0741, -0.0964,
        -0.8630,  0.0391, -0.0754, -0.0789, -0.1199, -0.1864,  0.1155, -0.0703,
        -0.2482,  0.1336, -0.3028,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8774, -0.1269,  0.0836, -0.2803, -0.4254,  0.0120, -0.2692, -0.2224,
        -0.7128, -0.0504,  0.0361,  0.0083,  0.0258,  0.2259, -0.0581, -0.0426,
        -0.1161, -0.3532, -0.0313,  0.0163, -0.1620, -0.0632, -0.0166,  0.1621,
         0.0563,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0589e-01, -4.4314e-01, -2.0860e-01, -1.6377e-01, -2.9362e-01,
        -5.2680e-01, -1.3130e+00,  2.4342e-01, -7.1815e-02, -9.0595e-02,
        -4.7727e-02,  1.5639e-01, -1.0719e-01, -8.5158e-02,  4.3562e-03,
         2.6687e-01, -4.3272e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.2174, -0.0431, -0.1177,  0.0291, -0.0088, -0.0343,  0.0515, -0.2452,
        -0.0862, -0.1271, -0.0766, -0.0190,  0.0603,  0.0808,  0.0497, -0.0749,
        -0.1493, -0.5695,  0.0354, -0.3367,  0.0720, -0.3359, -0.5254, -0.0059,
        -0.0986, -0.0048,  0.0542,  0.0601,  0.0199,  0.0673, -0.0210,  0.2601,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4078, -0.5770, -0.5194, -0.1744, -0.0492, -0.0786, -0.2121, -0.5202,
        -0.0044,  0.0433, -0.0204,  0.0126, -0.2926, -0.0644, -0.1372,  0.0399,
        -0.0954, -0.0058, -0.3802, -0.0390, -0.0479,  0.0543, -0.1740, -0.4290,
         0.0345, -0.0899,  0.2112,  0.0967,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0334, -0.4870,  0.1149, -0.1047, -0.1188, -0.2327, -0.5347, -0.0722,
        -0.0604, -0.0410, -0.0491, -0.3239, -0.0846,  0.0030, -0.1037, -0.2245,
        -0.1583, -0.2382, -0.1945,  0.2045, -0.0765, -0.0596, -0.0575,  0.0008,
        -0.1597, -0.0653, -0.1172,  0.0355,  0.3591,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0304,  0.0195, -0.0443, -0.1850, -0.6134, -0.0139,  0.0151, -0.0041,
        -0.0730,  0.1188,  0.2406, -0.0651, -0.1475, -0.4756,  0.2437, -0.2529,
         0.0396, -0.5228, -0.0245, -0.1960, -0.1228,  0.0395, -0.1638,  0.0432,
        -0.0285, -0.0243, -0.0562, -0.0297, -0.0479,  0.0074,  0.0444,  0.2624,
        -0.0055,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1718e-01, -8.9632e-01, -5.5883e-02, -1.2003e-01, -9.2658e-02,
        -1.9105e-01,  8.8959e-02, -9.8503e-02, -7.0193e-02, -2.8177e-01,
        -1.9123e-01,  3.4677e-02,  2.8216e-03, -8.4846e-02, -9.0832e-02,
         2.7642e-02, -1.2659e-01, -7.0030e-02, -5.3815e-01, -1.7294e-04,
        -3.4521e-01, -5.7659e-02, -7.4408e-02,  2.9803e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3101e-01, -6.4715e-01, -4.5605e-01, -7.9408e-02, -1.2628e-01,
        -7.8810e-02, -1.4752e-01, -2.1628e-01,  1.1908e-01, -1.5709e-01,
        -6.6380e-02, -1.0966e-01, -1.3132e-01, -3.8955e-02, -7.6523e-02,
        -1.0104e-02,  7.8110e-05, -9.2496e-02,  1.8500e-02, -3.9583e-01,
        -7.0644e-02,  3.2985e-02, -1.4449e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3683, -0.8042, -0.0537,  0.0600, -0.0961, -0.2164, -0.0096, -0.0870,
        -0.0616, -0.1087,  0.0035, -0.1109, -0.1006,  0.0783, -0.0224, -0.4112,
        -0.4206, -0.1555, -0.1935, -0.0504, -0.0603, -0.0675, -0.0832, -0.0738,
        -0.1938, -0.0736, -0.0228,  0.0035, -0.0122, -0.3296, -0.1340, -0.1462,
        -0.0557,  0.0400, -0.1258, -0.0964, -0.0489,  0.1667, -0.2390,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1531, -0.1120, -0.0202, -0.1052, -0.3603, -0.0410,  0.0771,  0.0277,
        -0.0445,  0.1041,  0.0390, -0.0092,  0.0316, -0.0094,  0.0579, -0.1106,
        -0.0485,  0.0410,  0.0154,  0.0571, -0.1283, -0.0007, -0.0833, -0.0979,
        -0.3436, -0.4564, -0.1399, -0.0950, -0.0400, -0.1214, -0.0552, -0.0875,
        -0.1269, -0.0483, -0.0659,  0.0152, -0.0087, -0.1655,  0.0106, -0.0056,
        -0.0198, -0.0325,  0.1371,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0359, -1.3245, -0.1038,  0.0118,  0.1059,  0.0801,  0.0572,  0.0107,
        -0.2936,  0.0223,  0.0905, -0.0019, -0.0814, -0.0021,  0.0049,  0.0220,
        -0.0402, -0.2161,  0.0443, -0.0085,  0.0030, -0.0151, -0.0091,  0.0016,
         0.0115,  0.0143,  0.0234,  0.0295, -0.0646,  0.0276, -0.0610, -0.1930,
        -0.3420, -0.0016, -0.0502,  0.0653,  0.0415, -0.0955, -0.0868, -0.0078,
        -0.0590, -0.1481, -0.0075, -0.0044,  0.0052, -0.0146, -0.0231,  0.0034,
         0.0517, -0.0103,  0.0482, -0.0090, -0.0139,  0.0198, -0.0119, -0.0033,
        -0.0635], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0931e-01,  1.8033e+00,  2.6913e-01,  1.6983e-03,  2.5706e-01,
        -5.4852e-03,  3.5417e-01,  7.0900e-01, -4.6706e-02,  4.2372e-01,
         4.3588e-01,  8.3960e-02, -1.7111e-02, -1.1529e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1343, -0.6520,  0.1244, -0.0106, -0.0623, -0.0656, -0.0088, -0.0220,
        -0.0972, -0.0221, -0.1200, -0.8209, -0.1022, -0.0113, -0.1162, -0.1587,
        -0.0633, -0.1689,  0.1647, -0.3266, -0.5141, -0.1811, -0.1192, -0.0886,
         0.0689, -0.0391,  0.0359, -0.0082,  0.0294, -0.3968,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0543, -0.4597, -0.1007, -0.5089, -0.6337, -0.2055,  0.0248, -0.1165,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0094, -0.4497, -0.0488, -0.0250, -0.2314, -0.2544, -0.0769, -0.0414,
        -0.0243, -0.0818,  0.0693,  0.0690, -0.1730, -0.2050,  0.0101, -0.0407,
         0.0119,  0.0228,  0.0417,  0.0223, -0.0369,  0.0102,  0.0236, -0.2430,
        -0.1789, -0.1333, -0.1968,  0.0437, -0.2914, -0.3123, -0.1141, -0.2256,
        -0.4279,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0715, -1.0177, -0.5839, -0.0637,  0.0780,  0.0426, -0.0862, -0.1114,
         0.0492, -0.0629,  0.0298,  0.0551, -0.0014,  0.0305,  0.1253,  0.0290,
        -0.0184,  0.0997,  0.0182,  0.0151,  0.0159,  0.0316, -0.0286,  0.0451,
         0.0255, -0.0705, -0.0842, -0.3852, -0.0098, -0.0627, -0.0580, -0.1001,
        -0.1622, -0.2422, -0.0116,  0.0321, -0.1140,  0.0573,  0.0135,  0.0072,
         0.0516,  0.0148,  0.0148, -0.1396,  0.0211,  0.0212, -0.0214,  0.0050,
        -0.0275, -0.0058,  0.0842, -0.1066, -0.0233, -0.0617, -0.0733,  0.0427,
        -0.0054,  0.0999, -0.0441, -0.1424, -0.0266, -0.0481, -0.2715, -0.0294,
         0.0011,  0.0587,  0.0133,  0.0978,  0.0623, -0.0093,  0.0512],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0976, -0.0754, -0.0756, -0.0561, -0.1444, -0.0568, -0.2811,  0.0182,
        -0.0052,  0.0417, -0.0560,  0.0563, -0.0311,  0.0018, -0.0234,  0.0423,
        -0.0109,  0.1098, -0.1463, -0.5076, -0.1829, -0.0964,  0.1300, -0.1942,
         0.1153, -0.0512, -0.1054,  0.0072,  0.0143, -0.0381, -0.0296, -0.0231,
        -0.2017, -0.3253, -0.0870, -0.0423, -0.1539, -0.0376,  0.0050, -0.0343,
         0.0522, -0.1067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0894, -0.9706,  0.1045, -0.0935, -0.0314,  0.1601, -0.0475,  0.1426,
        -0.0430, -0.1718, -0.1863, -0.1273, -0.1854,  0.0583,  0.0072,  0.0268,
        -0.0962, -0.2388, -0.2376, -0.0578, -0.0450, -0.0378,  0.0203,  0.0520,
        -0.0059, -0.2355, -0.0896, -0.0467, -0.1902,  0.0188,  0.0280,  0.0014,
        -0.0045, -0.0181, -0.0280, -0.1374, -0.1118,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1871, -1.2340, -0.1453, -0.2903,  0.0513,  0.7330, -0.1564, -0.0848,
        -0.0687, -0.1043, -0.0198, -0.0229, -0.0854, -0.3100,  0.0590,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1545, -0.2173,  0.2013,  0.1100, -0.1681, -0.6428, -0.1161, -0.1217,
        -0.9353, -0.0195,  0.2962,  0.1848, -0.4344,  0.0595,  0.1331,  0.0598,
         0.0215,  0.2392,  0.4656, -0.0733,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5232e-02, -2.4835e-02, -1.6956e-02, -5.7804e-02, -4.8278e-01,
        -1.5742e-01, -5.0037e-02, -6.5004e-02,  4.1061e-03, -1.0257e-01,
        -1.6314e-01, -3.5217e-01, -1.9402e-01, -8.8481e-02, -1.2632e-01,
        -9.8614e-02, -2.1232e-02, -1.2887e-01,  9.1684e-02, -1.8010e-01,
        -3.4607e-02, -3.1752e-02, -7.0720e-02, -1.2239e-02, -7.1031e-05,
        -1.8306e-01, -9.7767e-02, -4.8135e-03, -5.0493e-02, -1.2989e-01,
        -3.0735e-01, -2.8762e-02,  4.6645e-02, -3.6738e-03, -1.1508e-01,
        -1.3519e-01, -9.3387e-02, -3.4707e-02,  1.5382e-02, -7.9017e-04,
        -1.9331e-02,  1.4601e-02, -6.7806e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1687, -0.0137,  0.0586, -0.2168, -0.1596, -0.1238, -0.1334, -0.1724,
        -0.1152, -0.0312, -0.0022, -0.0237, -0.0971, -0.2750, -0.4416, -0.0683,
        -0.0115, -0.0102,  0.0433, -0.0929, -0.1983, -0.0577,  0.0467,  0.0119,
        -0.1294, -0.0666, -0.0353, -0.0706, -0.2728,  0.0018, -0.0408, -0.1730,
        -0.0524, -0.1364, -0.1437,  0.0195,  0.0532,  0.1334,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6280, -1.4572, -0.3350, -0.5139,  0.1564, -0.5270,  0.1189, -0.0184,
        -0.0088, -0.4363, -0.0341, -0.1615, -0.3831, -0.0246,  0.1085,  0.1173,
        -0.0566, -0.1717,  0.0045, -0.0487,  0.0387,  0.0545,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3406, -0.4679,  0.0734,  0.0314,  0.0430, -0.0042, -0.0058, -0.0147,
         0.0103, -0.4146, -0.0192,  0.0419, -0.0824,  0.1018, -0.1256, -0.1038,
        -0.4460, -0.0435, -0.0333, -0.1419,  0.0432,  0.0396, -0.0924, -0.0818,
        -0.0653, -0.0081, -0.0420,  0.0078, -0.0317, -0.2390,  0.0527,  0.0248,
         0.0380,  0.0716, -0.0690, -0.3782,  0.0647,  0.0587, -0.0740,  0.1356,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2199, -0.0752, -0.0325, -0.0075, -0.0355, -0.0988,  0.0113,  0.0103,
        -0.1997, -0.0859, -0.0150, -0.0433, -0.0423, -0.3841, -0.3912, -0.0993,
         0.0992, -0.0177, -0.4003, -0.0959, -0.0859, -0.0585,  0.1046, -0.0445,
        -0.0331, -0.3980, -0.0163, -0.2966, -0.3224, -0.1462, -0.0936, -0.4121,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2043, -1.1590,  0.0096, -0.0212, -0.1072, -0.0307,  0.0269, -0.1762,
        -0.5162,  0.0088,  0.0045, -0.0700, -0.1643, -0.0073,  0.0470,  0.0128,
        -0.1230, -0.0590,  0.0100, -0.0063, -0.0461,  0.0260, -0.0130, -0.0622,
         0.0451,  0.0988, -0.0671, -0.0205,  0.1018, -0.0365, -0.0063,  0.0377,
         0.0167,  0.1372, -0.3055,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.3184, -0.0689, -0.3111, -0.5544,  0.0670, -0.1756, -0.6704, -0.4822,
         0.1200, -0.6262, -0.0319, -0.2336,  0.0105, -0.2465, -0.1430,  0.0582,
        -0.0559, -0.1759, -0.0199,  0.0578, -0.0869,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2521e-01, -1.0043e+00,  3.2799e-01, -2.6446e-01, -3.5931e-01,
        -1.1054e-01, -2.8607e-02, -2.2007e-01, -2.5842e-01,  2.3600e-01,
         6.1521e-02,  3.5633e-02, -7.8371e-04,  7.9095e-02, -1.1687e-01,
        -4.6018e-03,  2.8711e-02, -1.6373e-01,  4.5201e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2117, -0.4400, -0.3292,  0.0288, -0.1106,  0.0047, -0.0025, -0.0446,
        -0.0625, -0.3998, -0.4058, -0.2205, -0.1198, -0.6997,  0.0116, -0.1117,
        -0.0163, -0.1278,  0.1114, -0.0546,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1292, -1.2460,  0.1227, -0.2556,  0.0678, -0.0939, -0.2330, -0.1683,
        -0.0480, -0.0475, -0.0651, -0.0280,  0.0260, -0.1693, -0.1437, -0.1386,
        -0.2880, -0.0276, -0.1118, -0.1659,  0.0219, -0.0517,  0.0380,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3068,  0.8748,  0.5729,  0.0528, -0.0734,  0.1378, -0.0754,  0.2537,
         0.1884,  0.6599,  0.0488,  0.5562,  0.3086,  0.4875, -0.2470, -0.1005,
         0.0039,  0.1051,  0.2761,  0.0841,  0.1271,  0.0711, -0.2206,  0.3773,
        -0.1065,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1474,  1.4686,  0.0489,  0.0754,  0.0384,  0.0353,  0.2242,  0.1520,
        -0.1387, -0.3170,  0.2041, -0.0990,  0.0297, -0.0067, -0.0124,  0.0964,
         0.0796,  0.0211,  0.1358, -0.0603,  0.1960,  0.1310,  0.1590,  0.0852,
        -0.0088,  0.0058,  0.0328,  0.0364,  0.0491, -0.0143,  0.0717,  0.0419,
         0.1179,  0.2087, -0.4153,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1273, -0.0670,  0.0091, -0.4448, -0.0752, -0.1425, -0.3604, -0.1618,
        -0.3120, -0.0468, -0.0020,  0.0366,  0.0833, -0.1661, -0.1875, -0.6403,
        -0.0831, -0.2588,  0.1183,  0.0451,  0.2504,  0.1653,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5872,  0.5873, -0.0118,  0.0831,  0.4345,  0.3807,  0.1690,  0.1567,
         0.0324,  0.7011,  0.0717,  0.3753,  0.0144, -0.3617,  0.4247,  0.4231,
         0.1084,  0.0085,  0.0489,  0.1166, -0.2121,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1209, -1.4447, -0.0775,  0.0938,  0.0385,  0.0718, -0.2565, -0.4276,
         0.0216, -0.2385, -0.1701, -0.1547, -0.0244, -0.1518,  0.0569, -0.2241,
         0.1547,  0.1683, -0.1353,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1117,  0.0125, -0.0526,  0.0850,  0.1099, -0.1741,  0.0375,  0.0060,
        -0.0161, -0.0353, -0.0917, -0.0823, -0.0492, -0.1662,  0.0006, -0.0204,
         0.0016, -0.1336, -0.0511, -0.1913, -0.1414,  0.1715, -0.0860, -0.0694,
        -0.2133, -0.1439, -0.0614, -0.2146,  0.0098, -0.4219, -0.0211, -0.2310,
        -0.1821, -0.2475,  0.0644, -0.0359, -0.0088, -0.0015,  0.1113, -0.0482,
         0.0397,  0.0572,  0.0462,  0.0032, -0.0150,  0.1114, -0.0129],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2882,  1.3645,  0.5330,  0.1126,  0.0310,  0.1096, -0.0118,  0.0432,
         0.1548,  0.0676, -0.0842,  0.2048,  0.0147, -0.2148, -0.1945, -0.0209,
         0.3230, -0.0015, -0.0947,  0.1531, -0.0378,  0.1473,  0.2960,  0.6049,
        -0.1087, -0.1852,  0.4336,  0.0397, -0.0706, -0.1837,  0.0458, -0.0025,
        -0.1167,  0.2758,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5434e-01, -1.7359e+00, -9.4108e-02,  9.5428e-02, -1.1874e-01,
        -2.2737e-01,  6.5914e-02,  1.2592e-01, -4.0198e-02, -7.8232e-02,
         7.0117e-02, -4.4859e-03, -1.8342e-01, -3.3947e-01, -1.5091e-04,
        -9.9244e-02, -1.2531e-01,  2.0833e-02,  1.2502e-01, -1.4088e-01,
         4.5847e-04,  5.8913e-02, -6.0846e-02,  2.3720e-02, -3.6160e-02,
         1.6439e-02, -6.3208e-03, -4.3986e-02, -5.0174e-02, -1.3412e-01,
        -8.7404e-02, -6.1300e-02, -2.0380e-01,  2.4196e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.2113,  1.9765,  0.0650,  0.0483,  0.3908,  0.7015, -0.0370,  0.1334,
        -0.1622,  0.3690,  0.1284,  0.0328,  0.2548,  0.5964,  0.0049,  0.0330,
        -0.0326, -0.1120,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1651,  0.1601,  0.1285,  0.2006,  0.0470, -0.0351,  0.0544,  0.1338,
         0.2989,  0.7342, -0.0766,  0.1108,  0.0975,  0.1162,  0.0315,  0.1086,
         0.0786,  0.0079,  0.0078,  0.5414, -0.0350,  0.0169,  0.1547, -0.0109,
        -0.0163, -0.0375,  0.0640,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6023,  0.0676,  0.1027, -0.0934,  0.0537, -0.0232,  0.1072,  0.0248,
        -0.4603, -0.0828,  0.0462, -0.2612, -0.8042, -0.1714, -0.0388,  0.0126,
        -0.1062, -0.0538, -0.0380, -0.8313, -0.1315, -0.0356, -0.1222, -0.0616,
        -0.0588,  0.0791,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0702,  0.8958, -0.0011,  0.2677,  0.3328,  0.2094,  0.1334,  0.0080,
         0.3295,  0.2964,  0.3216,  0.1971,  0.1550,  0.0250,  0.0461,  0.3306,
         0.1186,  0.1015,  0.0341,  0.0377, -0.3561,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0984, -0.4384, -0.7682, -0.3484, -0.2153, -0.1500, -0.3360, -0.0529,
         0.0321, -0.0164, -0.2024, -0.1566, -0.0812,  0.0098, -0.0017,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5318,  0.0548,  0.1018,  0.1037, -0.1189,  0.1982,  0.1452,  0.0670,
         0.1679,  0.1599,  0.0178, -0.0137,  0.6488,  0.4257,  0.0395,  0.1285,
        -0.0921, -0.0471,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6834, -1.5144, -0.5131, -0.6780, -0.1332,  0.4771,  0.2055, -0.0747,
         0.0114, -0.3541, -0.2833, -0.1241,  0.1685, -0.8595, -0.0729,  0.0889,
        -0.0734,  0.1160,  1.0550,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0023, -0.4434, -0.2239, -0.1470, -0.1550, -0.0432,  0.0791,  0.0068,
         0.0228, -0.0148,  0.0149,  0.0822, -0.0699,  0.0353,  0.0187,  0.0269,
        -0.2256,  0.0258,  0.0217,  0.1258, -0.3333, -0.0055, -0.1842, -0.0085,
        -0.2343, -0.0411, -0.1410,  0.0637, -0.0831, -0.0349,  0.0306, -0.0529,
        -0.2831,  0.0039, -0.0361, -0.0092,  0.1442,  0.0782, -0.0301,  0.0938,
         0.0454,  0.0310, -0.0227,  0.0941], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1971e-01, -1.0581e+00, -1.0244e-01, -2.3878e-01, -2.1130e-01,
        -6.4256e-03, -2.3155e-01,  5.8908e-02, -2.7786e-01,  6.7040e-02,
        -9.8547e-02,  2.8192e-02,  6.2588e-02, -2.3731e-04,  3.7993e-02,
         3.1942e-02,  2.1393e-02, -3.1745e-01, -2.5208e-02, -7.7313e-02,
        -1.4040e-01,  4.7551e-02,  8.3870e-02, -1.7998e-02,  1.5054e-01,
        -5.7512e-03, -9.5380e-03, -3.9701e-01,  6.5958e-02, -9.8903e-02,
        -1.1434e-01, -1.2445e-01, -3.8623e-02, -4.8606e-01,  7.5549e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1218,  2.3001,  0.3353, -0.0932,  0.2390,  0.1578,  0.4664,  0.2944,
        -0.0343, -0.0246,  0.0451,  0.1870,  0.2107,  0.0359,  0.0239,  0.1269,
         0.0331,  0.0464, -0.0786, -0.0134, -0.0859,  0.0346, -0.7132,  0.4332,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4220,  1.2875,  0.0697,  0.1024,  0.1803,  0.0117,  0.1292,  0.3071,
         0.4455,  0.0618,  0.0558, -0.0495,  0.0172,  0.0688,  0.2436, -0.0923,
         0.0046,  0.1017,  0.3252, -0.0930,  0.0988,  0.1489,  0.0074,  0.1114,
        -0.0765,  0.1431,  0.0390,  0.0893,  0.0210,  0.0602,  0.0386, -0.0415,
         0.0088, -0.0529,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2172,  0.0852,  0.0953, -0.2289,  0.1023,  0.0006, -0.3583, -0.0041,
        -0.0090, -0.0196, -0.0312, -0.0267, -0.4031, -0.0236, -0.3276, -0.4420,
        -0.0669,  0.0725, -0.0363, -0.1756,  0.0384, -0.0969,  0.0073, -0.3054,
        -0.1729, -0.0966, -0.3240,  0.0578, -0.1570,  0.0705,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.0527, -0.2551, -0.1848, -0.8936, -0.1894, -0.0307, -0.1299, -0.1316,
        -0.1266, -0.4812,  0.1897, -0.2349, -0.2739,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0612,  0.0466,  0.3870,  0.1383, -0.0711, -0.1211, -0.0468, -0.3674,
        -0.4975, -0.0325,  0.0963, -0.4172, -0.0058, -0.0636, -0.3313,  0.0108,
         0.0724,  0.1953,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1372, -0.6414, -0.9086, -0.0570, -0.3720,  0.0862,  0.1574, -0.3755,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9041e-01, -3.5503e-01, -1.5878e-02,  2.3142e-03,  2.2503e-02,
        -3.6605e-02,  1.1888e-02, -9.4242e-03,  4.5882e-02, -2.8722e-03,
        -8.5479e-03,  1.1307e-02, -1.1074e-02,  1.3296e-02,  9.5769e-03,
        -1.6804e-02,  2.1874e-02,  1.0155e-02, -9.0552e-05, -1.0517e-02,
        -1.1527e-02,  1.2864e-02, -4.6012e-02, -4.8415e-02,  3.5360e-02,
        -4.8409e-02, -3.0855e-02, -5.5245e-02, -1.7739e-01, -8.9565e-02,
        -1.8165e-02, -3.1867e-01, -2.0392e-02, -5.3414e-01, -7.5689e-02,
        -2.3291e-01, -1.8133e-01, -4.4305e-02,  4.5531e-02,  2.0821e-02,
        -2.9040e-02,  6.7038e-02, -4.7533e-02,  6.3576e-02,  7.1724e-02,
         2.6392e-02, -3.7990e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6576,  1.6096,  1.2596,  0.7398,  0.0320,  0.2000, -0.3931,  0.0336,
         0.3208,  0.3171, -0.2001,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2687, -1.5103, -0.2150, -0.4641,  0.0122, -0.0344, -0.1572, -0.8422,
        -0.0431, -0.4393, -0.0441, -0.1779, -0.5497, -0.0215, -0.0204, -0.2286,
         0.0389, -0.5607,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5401e-02,  6.6047e-01,  1.0624e+00, -3.5436e-02, -1.1139e-01,
         1.0631e-01,  3.1343e-01, -7.3800e-02,  1.8590e-01,  8.7028e-02,
         6.2544e-02,  4.2407e-04,  7.5535e-03,  3.9418e-01,  1.4833e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1297, -1.1707, -0.2391, -0.0663, -0.0107, -0.3301, -0.4055, -0.0928,
        -0.1044, -0.3601, -0.3325,  0.2348, -0.0271,  0.2084, -0.0429,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6949,  0.0443, -0.0109,  0.0593, -0.1208,  0.0163, -0.0728, -0.0326,
        -0.2779, -0.0324, -0.0341, -0.1373, -0.3933,  0.0093, -0.0661,  0.0024,
        -0.1769,  0.2433,  0.0017,  0.1329, -0.4374, -0.5349,  0.0231,  0.0215,
        -0.1424, -0.1227,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6762, -0.6403, -0.2907, -0.4540, -0.2697,  0.1682,  0.0744, -0.6011,
        -1.0424, -0.2546,  0.1262,  0.0013,  0.1153, -0.1809,  0.1212,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2960, -1.7597, -0.1328, -0.1335, -0.4181, -0.3655, -0.4164,  0.0357,
        -0.0266, -0.8467,  0.1054, -0.0074, -0.1075, -0.1022,  0.0325, -0.0564,
        -0.0506, -0.1303, -0.1068,  0.0755, -0.0989,  0.0145, -0.0905,  0.0430,
         0.2842,  0.6368,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5696e-01, -9.7537e-02, -2.1955e-02,  3.1985e-02, -1.5907e-01,
        -1.8386e-01,  1.0308e-02,  2.5129e-02, -1.1910e-01, -2.5739e-01,
        -8.1677e-01,  2.9517e-02, -1.4298e-02,  5.7239e-02, -4.4333e-01,
        -1.7530e-01,  9.7132e-02, -1.2395e-01,  5.5099e-04, -1.6283e-01,
        -5.9968e-02, -1.1401e-03, -5.3334e-02, -8.2116e-02,  2.8569e-02,
        -9.1024e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.1796, -0.4042,  0.0695, -0.0649, -0.0411,  0.0874, -0.1668, -0.1674,
        -0.0437, -0.0949, -0.0798, -0.0762, -0.1892,  0.0584, -0.0837, -0.0734,
        -0.0391,  0.0059, -0.0453,  0.0498,  0.0501, -0.0930, -0.0965, -0.2239,
        -0.4067, -0.0477, -0.0678, -0.0458,  0.0352, -0.0221,  0.0202, -0.0835,
         0.0074,  0.0368,  0.0205, -0.0588,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1604, -0.3894, -0.1789, -0.1791, -0.1455, -0.0413, -0.2001, -0.3291,
         0.0297,  0.0927, -0.1116, -0.0399, -0.0390,  0.1121, -0.0399, -0.0508,
        -0.0208, -0.0264,  0.0419,  0.0965, -0.0584, -0.1946, -0.0870, -0.0899,
        -0.1627, -0.2961,  0.0423, -0.0925, -0.2953, -0.0375, -0.0723, -0.1872,
        -0.1924,  0.0407, -0.0467, -0.1411,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7172,  0.4254,  0.0544,  0.1706,  0.0051,  0.9948,  0.1291,  0.3388,
         0.0690, -0.0147,  0.2446,  0.1817,  0.3641, -0.1622, -0.1212, -0.0150,
         0.0494, -0.0741,  0.1741, -0.3395,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6540e+00, -3.9341e-01, -9.1731e-02,  1.2138e-01,  1.1907e-02,
        -2.5206e-02, -5.6986e-02, -1.3620e-01,  3.9302e-02, -7.5946e-02,
         1.3323e-02,  1.0369e-02, -2.8118e-02, -2.7825e-02, -5.6752e-01,
        -1.2657e-01, -1.0313e-01, -1.5145e-01, -2.7817e-02, -1.0405e-02,
         2.3633e-02, -1.6886e-02,  5.1923e-02,  2.9595e-02, -2.9112e-01,
        -2.9209e-02, -2.7898e-02, -9.4070e-02, -8.0031e-02, -3.2329e-01,
        -3.0411e-01,  6.9585e-04, -4.9817e-02, -4.8606e-01,  6.6651e-02,
         1.3103e-01, -1.7189e-01, -8.5947e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2017, -0.7725, -0.2575, -0.3342, -0.0553,  0.0008, -0.0997,  0.0022,
        -0.1716, -0.1558, -0.3106, -0.3204, -0.0314,  0.0330,  0.0773, -0.2749,
        -0.0137, -0.2046, -0.0377,  0.0330, -0.0229,  0.0134,  0.1569,  0.0406,
        -0.0563,  0.0348, -0.0600, -0.0495, -0.3202,  0.1014, -0.0710, -0.0671,
         0.0509, -0.0123, -0.0849,  0.0640,  0.0263, -0.0351, -0.0380,  0.0503,
        -0.0620], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5420e-01, -1.1132e+00, -1.3942e-01, -3.1117e-01,  6.6870e-02,
        -1.8100e-01, -2.4033e-02, -1.6187e-01, -7.4550e-02, -2.1771e-02,
         1.5357e-02, -2.1284e-01, -1.3300e-01, -8.6596e-02,  6.7127e-02,
        -5.0155e-01, -4.8846e-02, -2.2749e-01,  1.0924e-03, -8.1164e-02,
         1.4973e-01,  1.8899e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0424e-01,  1.8859e-02,  1.5130e-02,  8.1486e-03,  3.3425e-02,
        -1.3441e-01, -9.1323e-02, -2.4998e-02, -6.1426e-02,  7.3136e-03,
         4.2476e-05, -1.3985e-01, -3.6310e-02, -3.9886e-02, -7.5984e-02,
        -2.1844e-01, -3.8259e-02, -1.6148e-01,  2.5898e-02, -2.6822e-01,
        -3.1742e-01, -9.4469e-03,  2.6980e-02, -2.6172e-01, -3.7643e-02,
         9.1936e-02, -7.8602e-02, -3.0873e-02, -1.7121e-01, -6.3292e-02,
        -5.9916e-02, -9.4046e-02, -5.1489e-02,  1.8665e-02, -3.1531e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2826, -0.1834, -0.7540,  0.0276, -0.4228, -0.1414, -0.1464, -0.0069,
        -0.2111, -0.0293, -0.1770, -0.0326, -0.0713, -0.0459, -0.0209, -0.2488,
         0.2466,  0.0443, -0.2008, -0.0437, -0.2536, -0.1321,  0.0106, -0.2003,
        -0.0823, -0.1745, -0.1040,  0.0607,  0.2627,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6525, -1.8714, -0.4888, -0.2969, -0.0935, -0.3127, -0.1590, -0.0076,
        -0.1141, -0.0459, -0.0496, -0.0662, -0.1071,  0.0552,  0.0392, -0.3165,
        -0.2551, -0.0808, -0.1177,  0.0903, -0.2562, -0.2210, -0.0165, -0.0883,
         0.4084,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1077,  0.1122, -0.1005, -0.1105, -0.6146, -0.2336, -0.2237, -0.3269,
        -0.4360,  0.0210,  0.1125,  0.2113,  0.0366,  0.0984, -0.1820, -0.2486,
        -0.1377, -0.0529,  0.0318, -0.0373,  0.0288,  0.0589, -0.0390,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0512, -0.0706,  0.0120,  0.0195,  0.1562,  0.0464,  0.0041,  0.0090,
        -0.0219,  0.0317, -0.0068, -0.0299, -0.2249, -0.4387,  0.0579, -0.0960,
        -0.0745, -0.1745, -0.0748,  0.0058,  0.0623,  0.1261,  0.0005,  0.0307,
        -0.0241, -0.0925, -0.3547, -0.0961, -0.1431, -0.1361, -0.2795, -0.0948,
        -0.1626, -0.0783, -0.0588, -0.0846,  0.0124, -0.0554, -0.0396,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2775,  0.1747, -0.2070, -0.5979, -0.7791, -0.1027, -0.2114, -0.4178,
        -0.5887,  0.0526,  0.1198, -0.0041,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.0760, -0.3582, -0.4557, -0.0097, -0.0505,  0.0267, -0.0570,  0.0173,
        -0.0175, -0.0827,  0.0107,  0.0357,  0.1320,  0.0663, -0.1011,  0.0121,
        -0.1570, -0.3280,  0.0182, -0.0903, -0.0703, -0.1000,  0.0473, -0.0475,
        -0.0545,  0.0133, -0.0445, -0.1409, -0.1056, -0.0910,  0.1368,  0.1321,
        -0.2166,  0.0276,  0.0120, -0.1013, -0.2265,  0.0202,  0.0029, -0.0262,
         0.0385,  0.0331,  0.0559, -0.1630,  0.0483,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1780, -1.8153, -0.2738, -0.5363, -0.1177, -0.0822,  0.1223, -0.3132,
        -0.5543, -0.0523,  0.2907, -0.1185,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8769, -1.9732, -1.3179, -0.0717,  0.5646,  0.2810, -0.3507,  0.1481,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5771, -0.7973, -0.1251, -0.0700, -0.0818,  0.0422, -0.0647, -0.0666,
        -0.0579, -0.3700, -0.0719,  0.0415, -0.0292, -0.0067, -0.0235,  0.0094,
        -0.0311,  0.0523,  0.0395, -0.0208,  0.0595,  0.0271, -0.0283, -0.0397,
        -0.0161,  0.0037,  0.0257, -0.0847, -0.1619,  0.0830, -0.0171, -0.0439,
         0.0028, -0.0184,  0.0098, -0.0149,  0.0121,  0.0543,  0.0252,  0.0086,
         0.0095,  0.0101, -0.1711,  0.0171,  0.0544,  0.0579, -0.2383, -0.1578,
        -0.1559, -0.3091, -0.0317, -0.0540, -0.0617, -0.0154, -0.0413,  0.0280,
        -0.0414,  0.0241,  0.0085,  0.1638,  0.1104], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2031, -0.0661,  0.0271, -0.0933, -0.3976, -0.0430, -0.1627,  0.0207,
         0.0198,  0.0014,  0.0309, -0.0950, -0.0163, -0.1035, -0.2988, -0.0316,
        -0.0669, -0.0360, -0.1299, -0.0348, -0.3658, -0.3675, -0.0847,  0.0702,
        -0.1002, -0.0299,  0.0341, -0.0127, -0.0384, -0.1416, -0.1198,  0.1451,
        -0.3082,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0578, -0.4039, -0.8404, -0.2142, -0.0688, -0.0682, -0.2677, -0.0221,
         0.1352,  0.0135,  0.0435, -0.0132, -0.0639,  0.1039,  0.0413,  0.4435,
        -0.0416, -0.1336, -1.0457,  0.1355,  0.2088, -0.2709,  0.2540,  0.1267,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1154, -0.4306, -0.3272, -0.1558, -0.1956, -0.2231,  0.0541, -0.0026,
         0.0845, -0.5957, -0.3554, -0.3170, -0.3566, -0.0864,  0.0084, -0.0281,
         0.2004, -0.1111,  0.1199, -0.0386,  0.0013,  0.1000, -0.2960, -0.2752,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2086, -0.0257, -0.0866, -0.2116,  0.0084, -0.0294, -0.1046, -0.3195,
        -0.0469, -0.0639,  0.0842, -0.0731, -0.0345, -0.0281, -0.2346, -0.4665,
        -0.2798,  0.0446, -0.0703, -0.0665, -0.1955,  0.0231, -0.0584, -0.2106,
        -0.0997, -0.0483, -0.1790, -0.0366, -0.0091,  0.0026, -0.2174, -0.0506,
         0.1411, -0.1931,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5960, -0.0460, -0.1375, -0.0025, -0.4947, -0.1192, -0.0223, -0.0487,
        -0.0127,  0.0097,  0.0659,  0.1456,  0.0547, -0.9764, -0.0259, -0.0749,
        -0.4625,  0.0021, -0.3678, -0.0472, -0.1556, -0.7376, -0.0163, -0.0662,
         0.1728,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3653,  0.1637,  0.0315,  0.0502, -0.0641,  0.0439, -0.5023, -0.4725,
        -0.2076, -1.4540, -0.1186, -0.0975, -0.0827, -0.1159,  0.1058,  0.0738,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1668, -0.6134,  0.0532, -0.2484, -0.2071, -0.2235, -0.3642, -0.0984,
         0.1581, -0.0065, -0.0367, -0.0736, -0.1342, -0.0688, -0.0980, -0.0871,
        -0.1164, -0.2424, -0.1900, -0.0272, -0.0192, -0.0413,  0.1731,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6225,  0.2415,  0.3816,  0.3719,  0.8045,  0.0547,  0.1105,  0.2865,
         0.0854,  0.0953,  0.0160, -0.0394,  0.3173,  0.2990,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.4145, -0.3141, -0.2644, -0.8733, -0.1190,  0.0309, -0.1615,  0.1718,
        -0.1170, -0.1020, -0.3252, -0.2108, -0.1711, -0.0145, -0.1007,  0.1377,
         0.1724, -0.6195, -0.0730, -0.5959, -0.2190,  0.0802,  0.0912,  0.1203,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2001,  0.0376, -0.4194, -0.1154, -0.5914, -0.2409, -0.9870, -0.0171,
        -0.1353,  0.0282, -0.2899,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0776,  0.0756, -0.1144, -0.0459, -0.0761, -0.2942, -0.5249,  0.0353,
        -0.4913, -0.1698, -0.0295,  0.0065, -0.0999,  0.1389, -0.2747, -0.0482,
        -0.0771, -0.2168, -0.0873, -0.2486, -0.1585,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0329,  0.1434, -0.2183,  0.1479, -0.3170, -0.0181, -0.0660, -1.6556,
        -0.2047, -0.7245, -0.3351, -0.1824, -0.3181, -0.1510,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1483,  0.1503,  0.0598,  0.0512,  0.0658,  0.0297, -0.0194, -0.0556,
        -0.0145,  0.1123, -0.1197, -0.1654,  0.9793, -0.0295,  0.1313,  0.1140,
         0.0861,  0.1268, -0.0616, -0.1494,  0.2896,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1945, -0.9759, -0.1044, -0.4780,  0.4972, -0.6345, -0.1856, -0.8329,
        -0.3145, -0.2322, -0.1070, -0.1878, -0.0804,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1549, -0.1533, -0.0428, -0.0166, -0.0257, -0.0489, -0.0822, -0.2362,
        -0.1578,  0.0070, -0.0238,  0.0804,  0.0743, -0.0124, -0.0179, -0.0164,
         0.0143,  0.0425, -0.0316,  0.0443,  0.0707,  0.0515, -0.0338, -0.0265,
        -0.4852, -0.1110, -0.9367, -0.3260, -0.3583, -0.0692, -0.0972, -0.0211,
         0.0542, -0.0349, -0.0477,  0.0443, -0.0693,  0.0081,  0.0213, -0.0180,
        -0.0407,  0.0099,  0.0446,  0.0042, -0.0246], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4752, -0.3022, -0.1649, -0.0641,  0.0059, -0.0500, -0.1067, -0.0680,
         0.0547,  0.0761, -0.5128, -0.0196, -0.0695,  0.0130,  0.0567, -0.2030,
        -0.2860,  0.0331,  0.0239,  0.1469, -0.2760, -0.4203,  0.0146, -0.2760,
         0.1032,  0.1344,  0.0494,  0.0322, -0.3256, -0.0472, -0.1771, -0.0600,
        -0.2421, -0.1212,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4060,  0.1469, -0.2365, -0.1624, -0.0931,  0.0297, -0.0338, -0.0213,
         0.1536, -0.1165, -0.0295, -0.0318, -0.0237, -0.0880, -0.0515,  0.1228,
        -0.0358,  0.0287, -0.1759, -0.7327, -1.2636,  0.0617,  0.0407,  0.1836,
        -0.2075, -0.0447,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1585, -1.3048, -0.1082, -0.2524,  0.2589, -0.0844,  0.0715, -0.1479,
        -0.0929,  0.1188, -0.0283,  0.0284,  0.0297, -0.0453,  0.0027, -0.1259,
        -0.7325,  0.1372, -0.1372,  0.0225, -0.0092,  0.0289,  0.3296,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0558, -0.5701, -0.2256, -0.0974, -0.0021,  0.0466, -0.0849, -0.0951,
        -0.1887, -0.0269,  0.0581, -0.0505, -0.0211, -0.0633, -0.4972,  0.1040,
         0.0090, -0.1121, -0.1642,  0.0391, -0.0179,  0.0373,  0.0277,  0.0019,
         0.0609, -0.0079, -0.2768, -0.3338,  0.0148, -0.0743, -0.0296, -0.0552,
         0.0865,  0.0077,  0.0168,  0.0314, -0.0932,  0.0338, -0.1340, -0.0613,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4484, -0.2641, -0.1052, -0.0950, -0.3133, -0.0886, -0.2015, -0.2329,
        -0.0445, -0.0602, -0.0212, -0.0648, -0.1474, -0.0474, -0.1763, -0.1873,
        -0.1782, -0.0090, -0.0318, -0.1845, -0.0212, -0.2654, -0.0293,  0.0610,
         0.0553,  0.0413, -0.0249, -0.2399, -0.0218, -0.1825, -0.2904,  0.0086,
         0.0107,  0.0359,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0723, -0.1197, -0.0498, -0.0657, -0.2599, -0.0026,  0.0174, -0.0350,
         0.0187,  0.0233, -0.0532, -0.0482,  0.0914, -0.0741, -0.2914,  0.0325,
        -0.0371, -0.0676, -0.2295, -0.0253, -0.0373, -0.0581, -0.0482,  0.0586,
        -0.0412, -0.4467, -0.1383, -0.1140, -0.0125,  0.0726, -0.0627, -0.0176,
         0.0241, -0.0437, -0.1752, -0.0047, -0.0988, -0.2449, -0.0345, -0.1395,
        -0.0685, -0.0737, -0.0751, -0.0429, -0.0031, -0.0062,  0.0098,  0.0127,
        -0.0381, -0.0429,  0.0486, -0.1137], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2859e-02, -3.7431e-02, -2.0781e-03, -7.0132e-02, -3.0862e-01,
        -3.5404e-02,  4.9354e-04,  1.5277e-01,  8.5906e-03, -8.1609e-02,
         3.9742e-03,  1.3494e-02,  3.5427e-02, -7.0197e-01, -1.4631e-01,
        -1.3795e-01, -3.6684e-01, -1.7046e-01, -1.6425e-01, -5.8500e-04,
        -1.0990e-01, -4.3486e-02, -6.6620e-02, -1.5692e-01,  5.1061e-02,
        -2.2329e-02,  7.1842e-02,  7.7041e-02,  1.2234e-02,  7.5021e-02,
         3.4720e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0055, -0.4507,  0.1696, -0.0026, -0.0262, -0.0128,  0.0390, -0.0192,
        -0.1258,  0.0072, -0.0879, -0.3803,  0.1442,  0.0869, -0.1558, -0.1582,
         0.1403, -0.2721, -0.4032, -0.3632, -0.1060, -0.0903, -0.0765,  0.0561,
         0.0702,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2461,  0.3157,  0.0379,  0.0401, -0.1428, -0.6755, -0.9909, -0.2068,
         0.1317, -0.2386, -0.0978,  0.1911,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4349e-01, -1.5056e+00,  2.2461e-01, -1.9170e-01, -3.8598e-01,
        -1.2217e-01, -3.5108e-03,  3.3595e-02, -1.8639e-02, -6.4960e-02,
         2.9851e-02,  3.9715e-02, -2.3214e-01, -3.2959e-01,  6.8664e-03,
        -1.1538e-01, -4.0692e-02, -4.2739e-02,  4.2246e-02,  1.0089e-02,
        -1.2369e-01, -2.9645e-02, -2.4498e-03, -8.9077e-04,  8.8529e-02,
        -1.7570e-01, -3.1861e-02,  9.9544e-03, -7.8142e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0736, -1.3714, -0.0214, -0.1621,  0.0828, -0.5155, -0.8962,  0.1863,
         0.1321,  0.1899, -0.2086, -0.2246,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1988, -1.0625, -0.4322, -0.4987, -0.1453, -0.2394, -0.6110, -0.2949,
        -0.4114, -0.1184,  0.0385, -0.2985,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3620,  0.3039, -0.2782, -0.0106, -0.2991,  0.2052, -0.0312, -0.0677,
        -0.3757, -0.1558, -0.0577, -0.4056, -0.1167, -0.0120, -0.0460, -0.2200,
        -0.0907,  0.1244, -0.1197, -0.1384,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1488, -1.1154, -0.0370, -0.1455, -0.6529, -0.0858, -0.0354,  0.0339,
        -0.0653, -0.0054, -0.0445, -0.0693, -0.0341, -0.1733, -0.0294,  0.0125,
        -0.0189,  0.0486, -0.0025, -0.0514, -0.1265,  0.0604,  0.0742, -0.0105,
        -0.0498,  0.0757, -0.0349, -0.1749, -0.0853,  0.0263,  0.1106, -0.0273,
        -0.1707, -0.2252, -0.0671,  0.0328,  0.0171,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1673,  2.6221,  0.0190, -1.3130,  0.2019,  0.0927,  0.3597,  0.0289,
        -0.1009,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0315,  0.1333, -0.0649, -0.5216, -0.1671, -0.3104, -0.2881, -0.3500,
         0.0006,  0.0309, -0.0937, -0.1446, -0.0512, -0.4272, -0.1327, -0.0074,
         0.0549, -0.0422, -0.3564, -0.2465,  0.0034,  0.0421, -0.0060,  0.0374,
        -0.0872,  0.3846,  0.1067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7985, -1.3228,  0.0584, -0.1846,  0.0160,  0.0119,  0.0503, -0.0114,
        -0.0497,  0.0133,  0.1055, -0.3571, -0.1086,  0.0016, -0.0116,  0.0179,
        -0.2204, -0.0430,  0.0219, -0.1282,  0.0491, -0.2612, -0.1947, -0.0236,
        -0.0657,  0.0327, -0.0353, -0.0162, -0.7847, -0.0677,  0.0287, -0.2044,
         0.3244,  0.2077,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.1182,  0.0617, -0.0412, -0.0883, -0.0590, -0.0100,  0.0498,  0.0411,
         0.0933,  0.0899, -0.0742, -0.3983, -0.1247,  0.0018,  0.0568, -0.1857,
        -0.2572, -0.2165, -0.0932, -0.0069, -0.0140, -0.0123, -0.2163, -0.0511,
        -0.0099, -0.0491, -0.0044, -0.0039, -0.0673, -0.0643,  0.0088, -0.1745,
        -0.4170, -0.1285, -0.0974, -0.0792, -0.1859, -0.0923,  0.0265, -0.0118,
        -0.0200, -0.1278, -0.0213,  0.0733, -0.0058,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1689, -0.1093, -0.0459, -0.0212, -0.3277, -0.4905, -0.8119, -0.1817,
        -0.2897, -0.5918, -0.0945, -0.0170, -0.1643,  0.1730, -0.1301, -0.3951,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0929, -0.0406,  0.1438,  0.1623,  0.1944,  0.0762, -0.1043,  0.0158,
         0.6286,  0.4833,  0.1703,  0.2985, -0.0461,  0.1217,  0.0794, -0.0802,
         0.0380,  0.0171, -0.0068,  0.0783, -0.0175, -0.0296,  0.0054, -0.0999,
        -0.0803,  0.1319, -0.0656,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6979e-01, -1.4358e-01, -1.6406e-01,  1.3435e-02, -2.8502e-02,
         1.8156e-02, -4.6698e-02,  7.8450e-03, -5.7667e-02,  1.0504e-02,
         3.4347e-03,  1.7978e-02, -7.2839e-02, -1.4521e-02, -4.5438e-02,
        -1.7470e-02, -5.2882e-02,  9.6760e-03, -9.4375e-05, -5.8560e-02,
         8.9322e-02, -8.5079e-02,  2.6566e-03, -3.5364e-01, -5.4932e-04,
        -5.9813e-02, -1.7925e-01, -2.4482e-01, -2.2903e-02, -1.7135e-01,
         4.1478e-04, -2.0549e-01, -8.0190e-02, -6.2795e-02, -9.3835e-02,
        -1.9941e-01, -3.4954e-02, -4.5686e-02,  1.0442e-02,  1.8239e-02,
        -4.6821e-02, -8.4031e-02, -1.1604e-02,  4.9364e-02, -1.0114e-01,
         9.0921e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2920,  0.0636, -0.0140,  0.0573, -0.0252, -0.0503, -0.0248, -0.2961,
        -0.0430, -0.1801, -0.0387,  0.0505,  0.0737, -0.0586, -0.1447,  0.0105,
        -0.0440, -0.1663, -0.3782, -0.9172, -0.2033, -0.2629, -0.0311,  0.0043,
         0.0489,  0.0538, -0.1959,  0.0638, -0.0752, -0.0095,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0134, -0.1121, -0.1765, -0.1426, -0.0649, -0.0494,  0.3062,  0.5933,
         0.0627,  0.0385,  0.3096,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5040, -0.9941, -0.1971, -0.1498, -0.2313, -0.0711, -0.2299, -0.2462,
         0.0288,  0.0731,  0.0895, -0.0387, -0.1252, -0.0130, -0.0895, -0.2334,
        -0.0783, -0.1790, -0.0828, -0.2222,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0873, -1.0449, -0.7512, -0.3604,  0.0672, -0.5148, -0.3919,  0.0176,
        -0.2359, -0.5516, -0.1528,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0215, -0.9982, -0.4786, -0.5028,  0.0128, -0.3840, -0.2056, -0.1678,
        -0.2141, -0.0593, -0.0603, -0.0664, -0.1447, -0.0917,  0.0428, -0.1729,
        -0.0708, -0.0132,  0.0059, -0.0437,  0.6015,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1920,  1.7895,  0.8191, -0.0942,  0.1857,  1.1132, -0.1794,  0.0195,
        -0.0027,  0.1367, -0.0714,  0.0237,  0.0717,  0.0699,  0.0160, -0.0429,
        -0.3439, -0.1187,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0910, -0.2340, -0.4150, -0.0493, -0.0323,  0.0412,  0.0773,  0.0374,
         0.0030, -0.0210, -0.0057,  0.0181, -0.0292, -0.1569, -0.1512,  0.0298,
        -0.0995, -0.2888, -0.0566, -0.0575, -0.0735, -0.1640, -0.3418, -0.0224,
        -0.0454,  0.0021, -0.0782, -0.0459, -0.0498, -0.3211, -0.0224, -0.0559,
        -0.0396, -0.0052, -0.0820,  0.0013, -0.0030,  0.0962,  0.0076,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9693e-01,  2.2748e+00,  2.5394e-01,  2.5212e-01,  3.4800e-02,
        -2.3608e-02,  9.6049e-02,  7.9634e-04,  1.4740e-01,  6.5045e-02,
        -2.3054e-03,  3.2722e-01,  1.1109e-01,  2.5678e-02,  5.9508e-02,
         5.0583e-03,  2.0849e-01,  5.7648e-02,  4.8439e-02,  6.2528e-02,
         4.9661e-02,  3.5974e-02,  3.7412e-03, -4.7851e-02, -4.5732e-02,
         8.7163e-02,  1.1076e-01, -3.4166e-02,  2.5875e-01,  3.8177e-01,
         3.9826e-02,  1.1864e-01,  9.2443e-02,  3.2546e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.3018, -1.0606, -0.6417, -0.6512, -0.1953, -0.2073, -0.2312, -0.1619,
         0.1631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2192, -0.7059, -0.5010, -0.1122, -0.0324, -0.2844,  0.0112, -0.0323,
         0.0161, -0.0336, -0.1231,  0.0121, -0.0352, -0.0968,  0.0526, -0.2863,
        -0.1752, -1.0911, -0.0181, -0.0673, -0.0970,  0.0588,  0.2606, -0.0055,
        -0.2724,  0.0014, -0.0680, -0.0209,  0.0407, -0.0313, -0.1320, -0.1998,
         0.0490, -0.0933,  0.0401, -0.0028,  0.0336, -0.0125,  0.2119,  0.1831,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3741e-02, -1.6432e-01, -4.5019e-01,  1.0799e-01, -1.3532e-01,
        -3.2860e-02,  2.7319e-02,  2.5458e-02, -8.0514e-02, -2.1535e-01,
         3.6392e-04,  1.8500e-02,  2.1774e-02,  1.9112e-02, -5.8777e-02,
        -1.0526e-02, -5.7207e-02,  1.7443e-01, -1.0319e-01, -1.3746e-02,
        -2.3153e-02, -1.6430e-03, -8.5412e-02, -2.9103e-03, -1.9959e-02,
        -9.1510e-02, -1.5096e-01, -1.5016e-02, -2.5866e-01, -2.8270e-01,
        -1.5695e-02, -5.6377e-02,  3.5446e-02,  3.8181e-02,  1.2274e-02,
         2.8528e-02, -1.4474e-01, -2.2154e-01,  4.6285e-03,  2.9348e-02,
         6.6295e-02,  3.4010e-02, -2.6463e-03,  2.7273e-03, -1.4068e-02,
         2.7918e-02, -2.2987e-02,  8.3693e-03,  3.3444e-02, -4.8290e-03,
         1.6834e-02,  8.8967e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1532, -0.6859, -0.2442, -0.1548, -0.1849, -0.1729, -0.1960, -0.1865,
        -0.1849,  0.0358, -0.0295, -0.2867, -0.3045, -0.1119, -0.0556, -0.2774,
        -0.2011, -0.0849, -0.3218, -0.0300, -0.1689, -0.2378,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0784, -1.2423,  0.1759, -0.1884, -0.0293,  0.0181, -0.0240, -0.2911,
         0.0291,  0.0016, -0.1793, -0.2076,  0.0029, -0.2407,  0.0087, -0.4579,
        -0.3511,  0.1026,  0.0551,  0.1127,  0.2448,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4398e-01, -2.8348e-01, -8.0684e-02, -8.1810e-02, -5.3131e-02,
         2.7661e-04, -3.9627e-02,  1.2536e-01, -6.3808e-02, -9.4013e-02,
        -6.2143e-03,  7.5043e-05,  7.9647e-03,  1.7346e-02,  7.6365e-03,
        -1.5615e-03,  8.6663e-03,  7.2925e-03, -4.0445e-02,  4.5199e-02,
         3.2401e-02,  8.5259e-02, -2.4351e-03,  7.1474e-03,  2.0618e-02,
         3.0551e-02, -1.0251e-01, -1.4334e-01, -2.1200e-02, -3.1079e-02,
        -3.6913e-01, -2.3309e-01, -4.9757e-02, -7.3854e-02, -4.3664e-02,
         8.3339e-03,  2.1689e-02,  3.0557e-02, -3.0677e-02, -1.0161e-02,
        -2.1869e-01,  6.7087e-02,  4.4432e-02, -1.9133e-02, -1.2284e-01,
         1.7437e-02, -6.6666e-02, -9.1667e-03,  9.6532e-02,  3.5726e-02,
        -3.9105e-02,  1.4315e-03, -3.0348e-02, -9.0431e-03, -2.1042e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1984, -0.0297,  0.1601, -0.1643, -0.0505, -0.0618, -0.5025, -0.2463,
         0.0166, -0.0198,  0.1183, -0.0015, -0.0121, -0.0946, -0.2019, -0.0618,
         0.0094, -0.1047, -0.1272, -0.1863, -0.0669, -0.0337, -0.0893,  0.0594,
         0.1084,  0.0566,  0.0547, -0.2707, -0.0118, -0.0906, -0.1232, -0.0766,
        -0.0590, -0.4992, -0.0486, -0.1241, -0.0201, -0.1363, -0.1228, -0.0890,
        -0.0428, -0.0076,  0.0059,  0.0492, -0.2189,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0320, -1.0596, -0.2814, -0.3634,  0.2438, -0.0617, -0.1407, -0.0762,
        -0.2515, -0.1591,  0.1769, -0.1032,  0.0096, -0.1705, -0.3679,  0.2234,
        -0.0448, -0.2081,  0.0955, -0.0514, -0.2999,  0.1855, -0.1274,  0.0481,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1644,  0.0790,  0.0034, -0.0307, -0.0946, -0.0375, -0.1372, -0.1240,
         0.0115,  0.0446,  0.0241, -0.0386,  0.0109, -0.0517,  0.0118,  0.1114,
        -0.1169, -0.1511, -0.0335, -0.1735,  0.0162, -0.0158, -0.0945, -0.3347,
        -0.0421, -0.1865, -0.2082, -0.0972, -0.1686, -0.3328, -0.0949,  0.1025,
        -0.3283,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0234, -1.8346, -0.4310, -0.2862,  0.0667, -0.0821,  0.0910, -0.1050,
        -0.4493,  0.0510,  0.1002,  0.0307,  0.1086, -0.3422,  0.1773, -0.1014,
        -0.1177, -0.2024, -0.0897,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6711, -0.0848,  0.2083, -0.1123, -0.2914, -0.0103, -0.2074, -0.3053,
        -0.3407,  0.0053,  0.0593, -0.1223,  0.0357, -0.1263,  0.0515, -0.0769,
        -0.0752, -0.2026, -0.0613,  0.0201, -0.2476, -0.4378, -0.1563, -0.2193,
         0.0922,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2468, -0.6404, -0.3300, -0.1465, -0.1206, -0.2031, -0.9634,  0.0972,
         0.0557, -0.0771,  0.0018, -0.0497, -0.1884, -0.0463, -0.0327,  0.1768,
         0.0145,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.1063,  0.0107, -0.1210, -0.0243, -0.0492, -0.0495, -0.0167, -0.0432,
        -0.0176, -0.4164, -0.0501, -0.0730,  0.0528,  0.0069,  0.0211, -0.0790,
        -0.2190, -0.4668,  0.0081, -0.1977,  0.0175, -0.5726, -0.5291, -0.0079,
        -0.0960, -0.0318,  0.0397, -0.0305, -0.0766,  0.0928, -0.0234, -0.1549,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0935, -0.4822, -0.5159, -0.0203, -0.0742, -0.0312, -0.2178, -0.4440,
        -0.0394, -0.0146,  0.0343, -0.0163,  0.2647,  0.0019, -0.0036,  0.0783,
        -0.0491,  0.0286, -0.1498,  0.0624, -0.0564,  0.0057, -0.2791, -0.4183,
         0.0444, -0.0860, -0.2600, -0.0550,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2465, -0.2915,  0.0334, -0.0732,  0.1291, -0.2193, -0.5717, -0.1541,
        -0.0713, -0.1657,  0.0187, -0.2071,  0.0104,  0.0171, -0.0618, -0.1963,
        -0.1779, -0.1786,  0.0174,  0.0106, -0.0280, -0.0408, -0.0170, -0.0199,
        -0.1319,  0.1660, -0.0496,  0.0496,  0.0291,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1329, -0.4110, -0.1473, -0.0657, -0.3447, -0.1785, -0.1490, -0.0595,
        -0.0565, -0.0147,  0.1462, -0.0754, -0.1855, -0.6807,  0.0981, -0.2478,
         0.0642, -0.2952, -0.1407, -0.2270, -0.0355,  0.0077, -0.0641, -0.0339,
        -0.1824, -0.0743, -0.1193, -0.0521, -0.0225,  0.0191, -0.0422,  0.2056,
        -0.1181,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3588, -1.2285, -0.0995,  0.0165, -0.0498, -0.1685,  0.0088, -0.1100,
         0.0645, -0.3919, -0.0840, -0.1654,  0.0600, -0.1512, -0.0794,  0.0241,
        -0.1575, -0.1042, -0.4732, -0.0047, -0.1229,  0.1042,  0.1117, -0.2196,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2799, -0.4139, -0.5214, -0.0176, -0.0207,  0.0496, -0.0463, -0.2114,
        -0.0721, -0.2066, -0.1247, -0.2452, -0.1596, -0.0681,  0.0655, -0.0448,
         0.0153, -0.1318, -0.0521, -0.2936, -0.1328, -0.1013, -0.1022,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0842, -0.1848,  0.0047, -0.0547, -0.0147, -0.2826, -0.0948,  0.0318,
         0.0761, -0.0489,  0.0882, -0.0548, -0.0199,  0.0552, -0.0317,  0.0040,
        -0.1797, -0.1277, -0.0795, -0.0587, -0.0436, -0.0350, -0.0191, -0.0624,
        -0.2763, -0.0808, -0.0291, -0.0274,  0.0601, -0.5918, -0.6878,  0.0174,
        -0.1779,  0.0029, -0.0880, -0.1550, -0.0088, -0.1285,  0.0447,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2298e-01,  3.6635e-01,  4.3534e-02,  1.3275e-01,  6.5434e-01,
         4.2766e-02, -3.2958e-02, -5.3412e-02, -2.9957e-02, -1.0998e-01,
         2.9177e-02, -2.6449e-02, -1.6616e-02, -3.2508e-02, -3.8397e-02,
         1.4288e-01,  1.6223e-01, -6.8368e-02,  2.1865e-04, -1.0854e-02,
         2.4075e-01,  3.9393e-02, -2.2705e-03,  8.3210e-02,  5.4494e-01,
         6.0951e-01,  2.0339e-01,  1.1755e-01, -6.4643e-02,  2.4112e-01,
        -2.1139e-02,  1.0521e-01, -1.6739e-01,  5.6908e-02,  7.9391e-02,
        -2.7749e-02,  2.2548e-02,  3.7506e-01,  1.9696e-02,  2.8338e-02,
         3.5501e-02,  1.0789e-01, -7.4443e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0758e-01, -1.5144e+00, -6.7527e-02, -2.7387e-02,  3.4500e-02,
        -1.3380e-02, -1.3220e-01, -4.9802e-02, -3.4312e-01,  2.9205e-02,
         5.0365e-02, -2.2845e-03, -1.2069e-01, -2.3078e-02, -1.7860e-02,
        -7.3554e-02, -5.6960e-02, -1.7878e-01, -3.7454e-02, -1.7890e-02,
         1.2098e-02,  1.2738e-03, -1.9535e-02, -2.0026e-02, -2.7000e-02,
        -3.3652e-02,  1.0556e-01, -4.7704e-03, -3.1465e-02,  7.7316e-02,
         8.4055e-02, -8.8848e-02, -2.5401e-01, -4.5429e-03, -9.1278e-02,
         2.1151e-03, -2.5272e-03, -6.0240e-02, -1.7712e-01, -1.8048e-02,
        -6.4469e-02, -1.3400e-01, -2.0366e-02,  7.8484e-02, -2.7596e-02,
        -2.5133e-02, -2.8453e-02,  2.3904e-02,  1.1372e-02, -6.2486e-03,
         3.8606e-02, -1.6913e-02,  1.6445e-02, -7.3056e-03,  5.2376e-02,
         1.0593e-01, -2.5144e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1775, -2.1248,  0.1550, -0.0715, -0.3192,  0.1148, -0.1677, -0.4206,
         0.1445, -0.2964, -0.2523,  0.2465,  0.1446, -0.0940,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8996, -0.7707, -0.2245, -0.0135, -0.0380, -0.1644, -0.1193, -0.0285,
        -0.1445, -0.0694, -0.1974, -0.2510, -0.0236, -0.0390, -0.1826,  0.0493,
        -0.0223, -0.1352,  0.1950, -0.2083, -0.5666, -0.1099, -0.1193, -0.0881,
         0.1014,  0.0365,  0.0347, -0.0061, -0.0703, -0.0518,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5402,  1.1810, -0.4876,  1.8432,  0.4422,  0.1863, -0.2371,  0.4859,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0955, -0.5885, -0.0898, -0.1841, -0.0615, -0.3950,  0.0054,  0.0642,
        -0.0782, -0.0971, -0.0494,  0.0391, -0.1517, -0.4121, -0.0110,  0.0062,
         0.0125, -0.0473, -0.0423,  0.0373,  0.0300,  0.0096,  0.0119,  0.0750,
        -0.2591, -0.0613, -0.1519, -0.0355, -0.1908, -0.3860,  0.0940,  0.0770,
        -0.0501,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0273, -0.5450, -0.2722, -0.0941,  0.0257, -0.0865, -0.0932, -0.1091,
         0.0462, -0.1279, -0.0007, -0.0104, -0.0907,  0.0100,  0.0667,  0.0039,
         0.0124,  0.0540, -0.0485,  0.0231, -0.0146,  0.0486,  0.0104, -0.0144,
        -0.0190, -0.0631, -0.0429, -0.1852,  0.0144,  0.0067, -0.0494, -0.0185,
        -0.0748, -0.3494, -0.0093, -0.0582, -0.1386, -0.0812, -0.0076,  0.0216,
         0.0301, -0.0224, -0.0324, -0.1189, -0.0352, -0.0073,  0.0159, -0.0114,
         0.0332,  0.0358,  0.0038, -0.0892,  0.0043, -0.0205, -0.0273,  0.0324,
        -0.0260, -0.0067, -0.0384, -0.1561, -0.0733, -0.0298, -0.4372, -0.0351,
         0.0176, -0.0066, -0.0038,  0.0717,  0.0085, -0.0883, -0.0366],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1358e+00, -2.8302e-02, -2.2486e-02,  3.4433e-02, -1.1803e-01,
         2.7895e-02, -3.5613e-01, -1.0559e-01, -1.5352e-02, -4.4135e-02,
         2.6512e-02, -2.1670e-02, -6.5295e-03,  1.2695e-01,  3.4077e-04,
         2.5618e-02,  3.0635e-02,  1.1654e-02, -6.6168e-02, -5.5223e-01,
         1.5237e-03, -9.0302e-02,  1.2861e-01, -2.4639e-01,  3.0021e-03,
        -1.5101e-01, -1.7497e-01, -6.1417e-02,  1.3396e-02, -7.7858e-02,
        -1.0688e-01, -1.0219e-01, -2.4215e-01, -4.2142e-01,  3.1443e-02,
        -2.1337e-01, -1.6653e-01,  9.6726e-03,  2.7181e-03, -4.6598e-02,
         1.7387e-01, -2.5400e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0637, -1.2789,  0.1305, -0.0186,  0.0191,  0.0053,  0.1190,  0.0266,
        -0.1055, -0.1614, -0.0626, -0.1283, -0.3725, -0.0267,  0.1554, -0.0209,
         0.0600, -0.2177, -0.3990, -0.0466, -0.0580, -0.0930,  0.0330, -0.0275,
        -0.0632, -0.2721, -0.1371, -0.1070, -0.5009, -0.0121, -0.0290, -0.0085,
         0.0063, -0.0561, -0.0417,  0.0478,  0.0066,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3923, -1.3344, -0.2158, -0.3456, -0.2226,  0.3112, -0.0233, -0.1828,
        -0.0987, -0.2066, -0.1136,  0.0509, -0.3140, -0.1325,  0.2742,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1400, -0.1074,  0.0244, -0.1639, -0.4472, -0.6241, -0.1268, -0.1509,
        -0.7042, -0.0564,  0.0761, -0.0437, -0.5043,  0.1001,  0.1561,  0.0115,
        -0.0392,  0.2690,  0.1483, -0.1580,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0547,  0.1356,  0.0480, -0.0320, -0.7900, -0.0539,  0.0309,  0.0486,
         0.0627, -0.0406, -0.0919, -0.1773, -0.0946, -0.0496, -0.1587, -0.1724,
        -0.0242, -0.0334,  0.0298, -0.2476,  0.0025, -0.0291, -0.0025, -0.0676,
         0.0032, -0.1865, -0.3156, -0.0196, -0.0843, -0.1526, -0.3777, -0.1401,
         0.0112, -0.0775, -0.1342, -0.1096,  0.0061, -0.0427, -0.0523, -0.0187,
         0.1613, -0.0367,  0.1348,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1560,  0.0218,  0.1943,  0.1514,  0.1379,  0.1433,  0.1452,  0.6996,
         0.0604,  0.0248, -0.0534,  0.0964,  0.2106,  0.4258,  0.4468, -0.0408,
        -0.0760, -0.0341, -0.0295,  0.1520,  0.3204,  0.1208,  0.0676,  0.0248,
        -0.0454,  0.1140,  0.1562, -0.0586,  0.3331,  0.0247,  0.0258,  0.1853,
         0.0239,  0.1794,  0.2375,  0.0184,  0.0311, -0.1344,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1973, -1.6742, -0.6498, -0.3733,  0.1224, -0.5127,  0.0565,  0.1284,
         0.0604, -0.3088, -0.0780, -0.0516, -0.2566, -0.0472,  0.1113, -0.0518,
         0.0675, -0.2792,  0.1552,  0.0292,  0.0587, -0.1095,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4076, -0.9078, -0.3031,  0.0846,  0.1250, -0.2538, -0.0089,  0.0715,
        -0.0913, -0.5155, -0.0309,  0.1347, -0.0293,  0.0320, -0.0888, -0.0964,
        -0.4675, -0.0517, -0.0762, -0.4145, -0.1919, -0.1916, -0.1716, -0.0730,
        -0.1529,  0.0084, -0.0467, -0.0234, -0.0846, -0.2764, -0.1197, -0.0305,
        -0.0797,  0.0352, -0.0511, -0.4278,  0.0920, -0.0124, -0.0116, -0.0367,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0798,  0.0833, -0.0031,  0.0178, -0.0189,  0.0055, -0.1238, -0.0583,
        -0.3994,  0.0123, -0.0670, -0.0675, -0.0943, -0.2888, -0.2070,  0.0071,
         0.0177,  0.0186, -0.3098,  0.0579,  0.0111,  0.0528,  0.0587, -0.0031,
        -0.0352, -0.3238, -0.0201, -0.3157, -0.4213, -0.1369, -0.0506, -0.3069,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2418, -1.7512, -0.2720, -0.0536, -0.1322, -0.0196, -0.1989, -0.1890,
        -0.3714, -0.0650,  0.0522, -0.1131, -0.1504, -0.0224, -0.0431, -0.0682,
        -0.0939, -0.1537,  0.0277,  0.0152,  0.0165, -0.0355, -0.0078, -0.2662,
         0.0037,  0.0181, -0.1113, -0.0474,  0.0332, -0.1149, -0.0036, -0.0290,
        -0.0288,  0.1061, -0.2559,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.1880,  0.3566,  0.2651,  0.5291,  0.2398,  0.0437,  0.4990,  0.6352,
         0.0393,  0.6021,  0.1423,  0.1209,  0.0295,  0.2875,  0.1343,  0.0313,
         0.0186,  0.1201, -0.0265, -0.0429,  0.2729,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1900, -2.1023,  0.0737, -0.3401, -0.3297, -0.2624, -0.0534, -0.1660,
         0.0555,  0.4164,  0.0828,  0.0631,  0.0470,  0.0556, -0.2006,  0.1170,
         0.0715, -0.0179,  0.4415,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0855, -0.2331, -0.5805, -0.2142, -0.0875, -0.0410,  0.0675, -0.1052,
         0.0166, -0.1218, -0.3578,  0.0027, -0.1487, -0.1019,  0.0648, -0.0942,
        -0.0298, -0.0746,  0.3512, -0.0413,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2231, -0.7779, -0.0124, -0.4391, -0.0444,  0.0691, -0.0555, -0.2768,
        -0.0309,  0.0641, -0.1222, -0.0914, -0.0290, -0.3677, -0.0458, -0.1346,
        -0.4390,  0.0072, -0.0484, -0.1701,  0.0191, -0.0743,  0.2504,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7852, -0.3611, -0.1482,  0.0724, -0.2528, -0.1512, -0.0945, -0.0028,
        -0.2559, -0.3740, -0.0434, -0.2196,  0.0309, -0.4746,  0.0192,  0.0636,
         0.0927, -0.0567, -0.0572,  0.0785, -0.2353,  0.0404, -0.0521, -0.0885,
         0.0686,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4189,  1.5043,  0.0524,  0.1375,  0.0125,  0.0854,  0.0383,  0.1100,
        -0.1865, -0.0668,  0.5811,  0.1388,  0.0522, -0.0814, -0.0080,  0.1506,
        -0.1071,  0.0668, -0.0936, -0.0093,  0.1944,  0.2138,  0.1044,  0.2215,
         0.1387,  0.0802,  0.0241,  0.0244, -0.0471, -0.0398,  0.1046,  0.0464,
         0.1021, -0.2237, -0.0097,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0255,  0.0156,  0.0299, -0.3738, -0.0564, -0.1555, -0.3548, -0.1039,
        -0.2392, -0.0533, -0.0144, -0.0580,  0.0333, -0.1202, -0.2625, -0.4025,
        -0.0939, -0.5374,  0.0192,  0.0341, -0.0978, -0.0814,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0564,  0.0931, -0.1075, -0.0721,  0.1189,  0.1451,  0.0162,  0.0307,
         0.1762,  0.5876,  0.2938,  0.0756,  0.0536,  0.0793,  0.2227,  0.2766,
         0.1538,  0.0803,  0.0180,  0.2144,  0.1460,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0294, -1.5637, -0.3008,  0.0365, -0.0941, -0.0746, -0.3215, -0.4553,
         0.4606,  0.2109, -0.0628,  0.0017,  0.0835, -0.1132, -0.0878, -0.2098,
         0.1431, -0.2949, -0.0973,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0657,  0.0213,  0.0675, -0.0187,  0.0069,  0.2199, -0.0758,  0.0007,
        -0.0674, -0.0220, -0.0781,  0.0409,  0.0553,  0.0778, -0.0365, -0.0791,
         0.0582,  0.2560,  0.0466,  0.0995,  0.0892,  0.0901, -0.0130,  0.0280,
         0.2461,  0.0386,  0.0931,  0.3840,  0.1832,  0.6477,  0.0157,  0.0736,
         0.1953,  0.2685, -0.0057,  0.0224, -0.1335, -0.0437, -0.1248, -0.0014,
        -0.0699, -0.0734, -0.0723, -0.0306,  0.0862, -0.0741,  0.1363],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2428, -0.8473, -0.3795, -0.0919,  0.0680, -0.0031, -0.0869, -0.0259,
        -0.1430,  0.0457,  0.0459, -0.0362,  0.0460, -0.2538,  0.0520, -0.1160,
        -0.2147, -0.0783,  0.1247, -0.0694, -0.0415, -0.2568, -0.3830, -0.4135,
         0.0067,  0.0145, -0.1419,  0.0046,  0.0743, -0.0448, -0.0997,  0.0562,
        -0.2271, -0.1068,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0516, -1.1319,  0.0382,  0.0362, -0.0772, -0.1515, -0.0311,  0.0175,
        -0.0265, -0.1176, -0.1180, -0.0034, -0.2728, -0.3591, -0.0246, -0.0057,
         0.0357,  0.1611,  0.0978, -0.1620, -0.0450,  0.0090,  0.0163,  0.0267,
        -0.0215,  0.0300, -0.0259, -0.0370, -0.0532, -0.1063,  0.0719, -0.0487,
        -0.1412,  0.2414,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-4.0250e-02, -1.3451e+00,  1.2379e-01,  1.6104e-01, -2.3535e-01,
        -3.2066e-01,  4.1538e-02, -1.1072e-02, -4.2478e-04, -3.8975e-01,
        -1.4471e-01, -9.3730e-02, -1.9376e-01, -5.8895e-01, -1.4069e-01,
        -1.1337e-01,  9.0128e-02,  1.6651e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0630,  0.0983, -0.0374,  0.3813,  0.0208, -0.0834, -0.0260,  0.1070,
         0.3555,  1.0796, -0.0554, -0.0613,  0.0790,  0.1513,  0.0146, -0.0660,
         0.0951,  0.2226,  0.0699,  0.6178, -0.0150, -0.0667,  0.0046, -0.0642,
        -0.0271,  0.1445,  0.0751,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9823e-01,  3.5287e-01, -7.3830e-02,  6.2380e-04, -3.5940e-02,
         1.3711e-01,  2.9018e-01,  8.8887e-02,  5.8585e-01, -1.8383e-01,
         2.0187e-02,  1.2249e-01,  1.0614e+00,  1.8049e-01, -8.7577e-02,
         2.9080e-02,  6.4948e-02, -4.8064e-02, -5.2361e-02,  9.5646e-01,
        -2.7003e-02,  1.2199e-01,  2.8887e-01, -5.1918e-02, -1.5988e-02,
         1.1112e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7441, -1.4462, -0.1200, -0.4567, -0.4549, -0.1755, -0.1630,  0.0672,
        -0.0990, -0.1547, -0.2850, -0.0695, -0.1011, -0.1318,  0.0255, -0.2802,
        -0.0191, -0.1179, -0.0417,  0.4137, -0.2196,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0894, -0.9312, -0.8691, -0.6049, -0.1327, -0.1131, -0.8423,  0.0504,
        -0.1495, -0.1736, -0.0033, -0.1821, -0.0300,  0.0661, -0.0582,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3823,  0.1955, -0.1535,  0.0406, -0.1256, -0.1287,  0.0194,  0.1111,
         0.0543, -0.0529,  0.2585, -0.1759,  1.1635,  0.3995, -0.3157,  0.2656,
        -0.0734, -0.0084,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4082, -1.2970, -0.5143, -0.5574, -0.0678, -0.1111,  0.0141, -0.0823,
        -0.0696, -0.2254, -0.3258, -0.2124,  0.0982, -0.2517, -0.0208,  0.0590,
         0.0176, -0.0387,  0.1765,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9941e-02, -5.2404e-01, -3.9889e-01, -1.2437e-01, -8.8513e-02,
        -4.0522e-02,  1.7197e-01,  3.6842e-02, -2.0864e-02,  1.1988e-02,
         5.7841e-02,  1.7725e-02, -5.2755e-02,  7.7221e-03, -3.6923e-04,
        -6.9382e-02, -3.1942e-01, -1.9909e-02,  3.7652e-02,  6.6077e-02,
        -2.8526e-01, -1.6760e-02, -2.2827e-01, -1.3294e-01, -3.3307e-01,
        -2.0721e-02, -2.0259e-01,  5.5315e-02, -8.6554e-02, -1.0053e-01,
        -6.1154e-03, -4.4809e-02, -1.1599e-01, -4.6391e-02, -2.1648e-01,
         3.6350e-02, -1.7757e-01,  1.1793e-01, -3.3630e-02,  4.1153e-02,
         1.1034e-01,  7.7514e-02,  6.2480e-02, -7.7064e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4721, -0.8299, -0.0065, -0.3095, -0.3097, -0.2339, -0.1029,  0.0432,
        -0.5462, -0.0277, -0.1263,  0.1065,  0.0966,  0.0193,  0.0982, -0.0425,
        -0.0790, -0.3877, -0.0115, -0.1518, -0.0936,  0.0064,  0.0374, -0.0187,
         0.0460,  0.0098,  0.0323, -0.3256,  0.0032, -0.0065, -0.0304, -0.0040,
        -0.0400,  0.1270,  0.0563,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2200,  2.9002,  0.3265,  0.2565,  0.0439,  0.1895,  0.5933,  0.5046,
         0.0073,  0.0504, -0.0520,  0.0369,  0.2570, -0.0639, -0.0494,  0.2540,
         0.0242,  0.0580, -0.0310,  0.0122, -0.0265,  0.0169,  0.2129, -0.7667,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1806, -0.8677, -0.0784, -0.0670, -0.0938, -0.0343, -0.0459, -0.1942,
        -0.2749, -0.0021, -0.0278,  0.0445, -0.0394, -0.1926, -0.2385,  0.0170,
         0.0017,  0.0225, -0.3123, -0.0280, -0.0145, -0.1665,  0.0210, -0.1042,
         0.0644, -0.1961, -0.0680, -0.1580, -0.0415, -0.0894, -0.1143,  0.0186,
        -0.0547, -0.0840,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3069,  0.2169, -0.0071, -0.0649,  0.1270, -0.0027, -0.3344,  0.0118,
        -0.0060, -0.0387,  0.0530, -0.0595, -0.3843, -0.0414, -0.1801, -0.6514,
        -0.0248,  0.0383, -0.0103,  0.0542,  0.0460, -0.0737,  0.1415, -0.2942,
        -0.1499, -0.0434, -0.1766, -0.0874,  0.2144,  0.0543,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.1061,  0.0325, -0.3475, -0.9767, -0.2381,  0.0348,  0.0502, -0.1883,
        -0.0372, -0.8165,  0.0748,  0.3368,  0.1015,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1149, -0.0543,  0.2140,  0.1921, -0.0517, -0.1761, -0.1657, -0.2713,
        -0.6002, -0.0276,  0.1116, -0.7216,  0.0375, -0.2613, -0.4038,  0.0168,
        -0.1089, -0.1055,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4345, -0.8776, -0.4686, -0.0767, -0.4710, -0.0491, -0.2470, -0.0980,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0830, -0.6053, -0.0412, -0.0237, -0.0054, -0.0484, -0.0535,  0.0023,
        -0.0095,  0.0525, -0.0105,  0.0243, -0.0007,  0.0701, -0.0101, -0.0228,
         0.0437,  0.0426,  0.0198,  0.0185,  0.0516, -0.0455, -0.1344, -0.0822,
        -0.0650, -0.0784, -0.0284, -0.0367, -0.1867, -0.1642, -0.0509, -0.2532,
        -0.0500, -0.4279,  0.0715, -0.3623, -0.2442, -0.0491,  0.0073,  0.0166,
        -0.0556, -0.0992, -0.0351,  0.0377,  0.1590, -0.1118,  0.0094],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5256, -0.0335, -1.0087,  1.4180, -0.0131, -0.2068, -0.5404,  0.0708,
        -0.6207,  0.7196,  0.3366,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5532,  1.4695,  0.3124,  0.6066,  0.0038, -0.0465,  0.0230,  0.8003,
         0.4024,  0.4196,  0.0359,  0.1153,  0.5100,  0.0287, -0.2291,  0.2877,
         0.0401,  0.0364,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1658, -0.2816, -1.1205,  0.1488,  0.1593,  0.1970, -0.3980,  0.2243,
        -0.5063, -0.3022,  0.0310,  0.1781,  0.0311,  0.0322, -0.3411,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1544, -1.5210, -0.3258, -0.0264, -0.1013, -0.4872, -0.6372, -0.0357,
         0.1955, -0.2336, -0.3381,  0.0572,  0.1506,  0.0484,  0.2304,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3434, -0.0101,  0.2389,  0.1105,  0.0534,  0.1045,  0.0366, -0.0702,
        -0.2418, -0.1487, -0.1178, -0.2409, -0.5942,  0.0333,  0.0576, -0.1891,
        -0.1061, -0.0262,  0.1507, -0.1491, -0.4029, -0.5344,  0.0841, -0.0949,
        -0.1418, -0.1625,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1670,  0.0708,  0.1248, -0.1636, -0.1678, -0.0406, -0.0152, -0.5608,
        -0.8066, -0.2280,  0.0424, -0.0747, -0.1790, -0.3371, -0.0845,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0195e-03,  1.8066e+00, -2.7849e-02,  2.9530e-01,  4.5913e-01,
         1.7687e-01,  6.0899e-01, -8.5332e-02,  1.9838e-02,  8.8929e-01,
        -1.9904e-01, -5.6428e-02,  3.3576e-01,  4.9905e-02,  5.3560e-02,
        -3.2420e-02, -2.1368e-02, -7.7451e-04,  1.8438e-01, -1.0426e-01,
         1.3990e-01, -4.2847e-02, -3.9248e-02, -6.0424e-02, -5.6353e-02,
         1.8445e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0034, -0.0173,  0.0557,  0.0507, -0.1203, -0.0594,  0.0291, -0.0468,
        -0.0281, -0.1970, -0.4364,  0.0084, -0.0618,  0.0591, -0.3765, -0.1502,
         0.1251, -0.2101,  0.0151, -0.1689,  0.0087, -0.0667, -0.0111, -0.2287,
        -0.1127, -0.0823,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-4.2079e-01, -9.6401e-01,  4.7599e-02,  1.7670e-03, -2.6227e-03,
         1.3962e-01, -9.7119e-02, -2.1838e-01, -5.1260e-02, -1.4460e-01,
        -7.1144e-02, -1.1371e-01, -3.5785e-01, -4.6155e-02, -1.3565e-01,
        -6.8490e-02, -3.1164e-02, -5.7052e-02, -1.0025e-01, -1.6452e-02,
         4.5431e-02, -1.4327e-01,  4.2900e-03, -2.0920e-01, -2.4443e-01,
        -1.4829e-01, -1.6617e-01, -1.9200e-02,  1.3852e-01, -2.3283e-03,
        -3.5607e-04, -8.4577e-02,  3.0701e-02, -3.4008e-02, -2.3487e-02,
        -1.8318e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0591, -0.3678, -0.1164, -0.0556, -0.1570, -0.1055, -0.3844, -0.2937,
        -0.0209,  0.0914, -0.0787, -0.0116, -0.0638, -0.0111, -0.0060,  0.0285,
         0.0393, -0.0204,  0.0158,  0.0445, -0.0867, -0.1626, -0.1040, -0.0248,
        -0.1768, -0.3101,  0.0537, -0.0953, -0.1913, -0.0747,  0.0353, -0.0927,
        -0.2046,  0.0434, -0.1892,  0.0036,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0288,  1.0084,  0.0917, -0.1291, -0.0390,  1.1525,  0.2247,  0.3609,
         0.1327, -0.0781,  0.3984,  0.0565,  0.4765, -0.0891, -0.2373, -0.0560,
         0.0525,  0.1932,  0.4574, -0.3616,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1185, -0.5268, -0.1528, -0.1113, -0.1003, -0.0443, -0.0237, -0.2316,
        -0.0014, -0.1309, -0.0578, -0.0147, -0.0507,  0.0034, -0.3730, -0.1302,
        -0.1658, -0.1853, -0.0204, -0.0364,  0.0900, -0.0223,  0.0490, -0.0044,
        -0.0061,  0.0284, -0.0346,  0.0526, -0.0090, -0.0360, -0.1259, -0.0701,
         0.0083, -0.2859,  0.1415,  0.1025, -0.0545,  0.1048,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4993e-02, -9.6480e-01, -1.7060e-01, -2.6952e-01, -9.4516e-02,
        -8.9001e-02, -9.2612e-02,  7.8678e-02,  3.8573e-02,  8.8978e-02,
        -1.6702e-01, -2.8468e-01, -3.7040e-02, -2.4403e-02,  3.5487e-04,
        -3.5876e-01,  2.4964e-02, -3.2500e-01, -4.8792e-02,  2.1038e-02,
        -7.4651e-02,  9.8123e-02,  1.1634e-01, -6.9310e-02,  8.6132e-02,
        -1.2625e-01, -1.1416e-01, -9.8170e-02, -3.6611e-01,  1.1591e-02,
        -2.5388e-02, -8.6402e-02,  3.0248e-02, -1.0513e-02, -9.0183e-02,
         1.5486e-02, -3.5959e-02,  2.0122e-02,  6.7705e-03, -2.0674e-01,
        -7.2573e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2134,  2.3377, -0.1329,  0.2932, -0.1097,  0.3206,  0.2428,  0.0828,
         0.0465, -0.0102,  0.1511,  0.2121,  0.2530,  0.0066,  0.0605,  0.3084,
         0.0961,  0.2462,  0.0661, -0.0624, -0.0724, -0.3746,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2316, -0.0306, -0.0466, -0.0714,  0.0241,  0.0418, -0.0109,  0.0626,
        -0.0419,  0.0057, -0.0574, -0.2228,  0.0788, -0.0774,  0.0795, -0.2921,
         0.0589, -0.2629, -0.0203, -0.0805, -0.4664, -0.0550,  0.0761, -0.2015,
        -0.0207,  0.0678,  0.0013, -0.1298, -0.5588, -0.0588, -0.0729, -0.2045,
        -0.2184,  0.2233,  0.0714,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1222e-01,  2.6224e-01,  1.0004e+00,  1.1868e-01,  3.1394e-01,
         4.1723e-02,  4.6684e-02, -1.4114e-01,  1.5293e-01,  8.4946e-02,
         1.0058e-01,  3.6064e-03,  5.1845e-02,  4.0899e-02, -1.4807e-02,
         8.9157e-02, -1.8341e-01,  1.5824e-01,  1.7169e-01,  7.2915e-02,
         2.3176e-01, -5.1872e-04,  5.9073e-02,  1.8488e-01,  6.4557e-03,
        -1.2363e-01,  1.3452e-01,  2.8160e-01, -1.5429e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4983,  1.4735,  0.5046,  0.2677,  0.1488,  0.4199, -0.0727,  0.1235,
         0.0812,  0.0970,  0.1721,  0.2528, -0.0071,  0.0501, -0.0492,  0.1954,
         0.1064,  0.1589,  0.0056, -0.0407,  0.1906,  0.0094,  0.0718,  0.2863,
        -0.2624,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2170, -0.0605, -0.0255,  0.0544, -0.6262, -0.0916, -0.0749, -0.5880,
        -0.4844,  0.1657,  0.1534,  0.0522, -0.1420, -0.0472, -0.1553, -0.4100,
         0.0024, -0.0315,  0.0355, -0.0604, -0.1140, -0.0069, -0.0751,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2413,  0.0093,  0.0284,  0.0660, -0.0113,  0.0417,  0.0216,  0.0138,
         0.0006,  0.0134, -0.0021, -0.0119, -0.1826, -0.5648, -0.0213, -0.1625,
        -0.0791, -0.2249, -0.0513, -0.0358, -0.0574, -0.0111, -0.1088, -0.0232,
        -0.2648, -0.1175, -0.4374, -0.0118, -0.3186, -0.0930, -0.2422, -0.0701,
        -0.1767, -0.0102, -0.0549, -0.1140,  0.0781, -0.1287,  0.2861,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0888,  0.0900, -0.0778, -0.3408, -0.8339,  0.0283, -0.1298, -0.4227,
        -0.5367,  0.1035, -0.2262,  0.2383,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.0818, -0.2571, -0.8149, -0.0437, -0.0507, -0.1329, -0.2773,  0.0126,
        -0.1111, -0.0974,  0.0104, -0.1098,  0.0013, -0.0257, -0.0952,  0.0075,
        -0.0691, -0.1710,  0.0053, -0.0613, -0.0654, -0.1004,  0.0455,  0.0017,
        -0.0140,  0.0485,  0.0135, -0.1088, -0.1805, -0.0469,  0.0991,  0.0142,
        -0.0946,  0.0043,  0.0027,  0.0035, -0.1716, -0.0113, -0.0261, -0.0243,
         0.0140, -0.0078,  0.0451, -0.0169, -0.0277,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3457,  1.7605,  0.3841,  0.4909,  0.2435, -0.0326, -0.4096,  0.3017,
         0.4852,  0.3071,  0.3145,  0.0597,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0596, -2.4207, -0.2070, -0.5678,  0.0515,  0.0442, -0.2260,  0.2547,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3281,  0.5614,  0.2466,  0.1857,  0.2866,  0.0157, -0.0342,  0.0833,
         0.1099,  0.3730,  0.0757,  0.0067,  0.0181, -0.0320, -0.0592, -0.0369,
         0.0801,  0.0627,  0.0283,  0.0409, -0.0099,  0.0498,  0.0462, -0.0106,
         0.0134, -0.0092, -0.0530,  0.0848,  0.1495,  0.0407,  0.0444, -0.0165,
        -0.0180,  0.0606,  0.0401, -0.0343, -0.0939, -0.0436, -0.0279,  0.0087,
         0.0452,  0.0339,  0.0606,  0.0431,  0.0135, -0.5081,  0.3132, -0.0837,
         0.2895,  0.3673,  0.1622,  0.0035,  0.0453, -0.0205,  0.0716, -0.0293,
         0.0562, -0.0190, -0.0411, -0.0932,  0.0559], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2385, -0.0857,  0.0660, -0.0450, -0.2708,  0.0246, -0.1205,  0.0047,
        -0.0130,  0.0675,  0.0697, -0.1043, -0.0445, -0.1975, -0.2846, -0.0494,
         0.0087, -0.0275, -0.2188, -0.1161, -0.5236, -0.2500, -0.2841, -0.0231,
        -0.2858, -0.0525, -0.1178, -0.0814, -0.0787, -0.1441, -0.0118, -0.0186,
        -0.1643,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1304, -0.6391, -0.9757, -0.2634, -0.3886, -0.0537, -0.3934,  0.0520,
         0.0504,  0.0283, -0.0466, -0.0546, -0.0948,  0.0434,  0.0138, -0.0511,
         0.0119, -0.4001, -0.9597,  0.0807, -0.0426, -0.1703,  0.0385,  0.0102,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5384, -0.3995, -0.4395, -0.1164, -0.1309, -0.3835, -0.0560, -0.1261,
        -0.0859, -0.3894, -0.3201, -0.3399, -0.4743, -0.0170, -0.1114,  0.0261,
         0.0383,  0.0893,  0.1444,  0.0278, -0.1582, -0.0376, -0.0379,  0.1322,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4673,  0.0364, -0.1363, -0.3293,  0.0174, -0.0057, -0.2075, -0.2499,
        -0.0369, -0.0713, -0.0245, -0.0124,  0.0095, -0.0054, -0.2486, -0.3277,
        -0.2816,  0.0023, -0.0519, -0.1959, -0.2984,  0.0137, -0.0977, -0.4205,
        -0.0583, -0.1004, -0.3251, -0.0681,  0.0040,  0.0341, -0.0370, -0.0873,
         0.0111, -0.0310,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5056e-01, -1.0648e-02, -2.8062e-02, -2.7223e-02,  5.5985e-01,
        -1.8560e-01, -5.9010e-03,  8.5851e-02, -8.0486e-02, -8.7684e-02,
        -7.1822e-04,  1.5838e-01,  2.3227e-01,  8.2627e-01,  8.2852e-02,
        -2.6938e-01,  6.9414e-01,  5.2733e-02,  2.7095e-01,  1.8497e-01,
         3.2683e-02,  4.2186e-01, -3.4033e-01, -1.2692e-01,  2.7112e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2856,  0.1427,  0.0484, -0.0069, -0.0614, -0.1078, -0.5671, -0.4044,
        -0.4023, -1.5755, -0.0298, -0.0294, -0.0458,  0.1080,  0.0927, -0.3449,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4856, -0.9901,  0.1008, -0.3664, -0.1233, -0.1575, -0.6928, -0.1269,
         0.0399, -0.0440,  0.0263, -0.1309, -0.0912, -0.0191, -0.0277, -0.0085,
        -0.0347, -0.2027, -0.1294,  0.0239,  0.0016, -0.1756,  0.1362,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6957, -0.1714,  0.2672,  0.9089,  2.0202, -0.2319, -0.4769,  0.1276,
         0.0387, -0.1491,  0.1653, -0.5338,  0.3884,  0.4169,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.2805,  0.4363,  0.4773,  1.0040, -0.0795, -0.1296,  0.4307,  0.0054,
         0.3524,  0.1306,  0.2173,  0.3767, -0.0145,  0.0457, -0.0259, -0.1287,
         0.0936,  0.2693,  0.0817,  0.4524, -0.0043,  0.1077, -0.0325, -0.1812,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2519, -0.0425, -0.5417, -0.2465, -0.7385, -0.1021, -1.1477, -0.0869,
        -0.1854, -0.1613, -0.1750,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0193,  0.1204, -0.0495, -0.0015, -0.0658, -0.1279, -0.7417, -0.0451,
        -0.7325, -0.1914, -0.0654, -0.0784, -0.1290, -0.0201, -0.2419, -0.0677,
        -0.0154, -0.4439, -0.1722,  0.1276, -0.0858,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2369,  0.2453, -0.2429, -0.0203, -0.1316,  0.0144, -0.1018, -0.7983,
        -0.3364, -0.7194, -0.1221,  0.0289, -0.1147,  0.4281,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0431,  0.2667,  0.1207,  0.0220, -0.0405, -0.0635,  0.0494,  0.0342,
         0.0242, -0.0304,  0.1125,  0.4347,  0.9342,  0.0477,  0.0801,  0.1849,
         0.0557,  0.0721,  0.0414, -0.0707,  0.0827,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1259, -2.0661, -0.1301, -0.4628,  0.4869, -0.7988, -0.3388, -0.3448,
        -0.1624, -0.2760, -0.2288, -0.1619,  0.0572,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2086e-02, -4.6491e-01, -6.3604e-02, -7.0058e-02, -6.2740e-02,
        -1.0658e-01, -2.3483e-01, -4.7799e-01, -1.6641e-01, -7.3245e-02,
        -4.3660e-02,  1.0804e-01,  6.1943e-02,  4.5914e-02, -1.2838e-02,
         2.7313e-02, -3.1590e-05,  9.8097e-03, -4.0025e-03,  1.2308e-01,
         1.7127e-02,  4.5069e-02,  1.8742e-02, -2.0011e-01, -8.6810e-01,
         6.1785e-02, -2.2280e-01, -3.1038e-01, -1.3679e-01, -1.9095e-04,
        -1.2407e-01,  4.6698e-02,  3.4089e-02, -8.9321e-02, -1.0066e-02,
        -2.8925e-02, -1.0578e-01, -4.0338e-03, -4.8546e-03, -3.1205e-02,
        -7.7044e-02, -6.0509e-03,  2.1885e-02,  8.7105e-02,  3.0314e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2017,  0.3514,  0.2156,  0.0798, -0.0520,  0.0643,  0.0714,  0.0934,
         0.0323,  0.0024,  0.1378,  0.0378,  0.0326,  0.0291,  0.1721,  0.1032,
         0.5619, -0.1084,  0.0176,  0.0534,  0.1652,  0.3494,  0.1378,  0.3906,
        -0.0428,  0.0223,  0.1271,  0.1396,  0.3514,  0.0600,  0.0744, -0.0698,
        -0.1018, -0.0171,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1544,  0.2203,  0.0020,  0.2291,  0.1204,  0.0152, -0.1431,  0.1062,
        -0.0387,  0.1973,  0.0510,  0.1078, -0.0226,  0.1455,  0.1088, -0.1486,
        -0.0930, -0.0783,  0.1355,  0.9307,  0.9645, -0.3323, -0.1245, -0.1347,
         0.5298,  0.1018,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0068, -1.3662,  0.1384, -0.2904,  0.1327, -0.2625, -0.0421, -0.3449,
        -0.1680, -0.0816,  0.0388,  0.0641, -0.0143,  0.0345, -0.0403, -0.1283,
        -0.4578,  0.0734, -0.0518,  0.0420,  0.0076,  0.0051,  0.2443,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0917, -0.5537, -0.0792, -0.0998,  0.0016, -0.0299, -0.1407, -0.1735,
        -0.3073, -0.0577,  0.1045, -0.0953, -0.0331, -0.0311, -0.7229,  0.0903,
         0.0302, -0.1353, -0.3071, -0.0783,  0.0026, -0.0624,  0.0245,  0.1231,
         0.0160, -0.0040, -0.2050, -0.3791,  0.0409, -0.0119,  0.0118, -0.0135,
         0.0193, -0.0048,  0.0229,  0.0311, -0.1243,  0.0165, -0.0476,  0.0963,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1469, -0.2488, -0.0522, -0.0439, -0.4871, -0.0581,  0.3368, -0.3831,
         0.0098, -0.0141, -0.0585, -0.0100, -0.0317, -0.0806, -0.2123, -0.2523,
        -0.0834, -0.0246, -0.0475, -0.0690, -0.0781, -0.0990, -0.1933,  0.0320,
        -0.0464, -0.0269, -0.0300, -0.3413, -0.0460,  0.0979, -0.0275, -0.1283,
        -0.2868,  0.0355,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.0523, -0.3609, -0.0268, -0.0653, -0.2246,  0.1010,  0.0345, -0.0822,
         0.0284,  0.0343, -0.0136, -0.0338,  0.0651, -0.0552, -0.3863, -0.0259,
        -0.0935, -0.0317, -0.2427, -0.0813,  0.0077,  0.0284, -0.1837,  0.0130,
        -0.1576, -0.3792, -0.0946, -0.1221, -0.0564,  0.0482,  0.0977,  0.0057,
         0.0398,  0.0092, -0.1428,  0.0085, -0.0485, -0.1354, -0.0208, -0.0475,
        -0.0808, -0.0294, -0.0564, -0.0190, -0.0055,  0.0128, -0.0040,  0.0034,
        -0.0223, -0.0741,  0.0226, -0.1535], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2823, -0.0929, -0.1590, -0.4669, -0.5040,  0.0326,  0.0738,  0.1158,
         0.0655, -0.1240,  0.0349, -0.0251, -0.1337, -0.8813, -0.1265, -0.3069,
        -0.6840, -0.3672, -0.2881, -0.1579, -0.0576, -0.1778, -0.0536, -0.2893,
        -0.0095,  0.0563, -0.0480, -0.0065,  0.1214,  0.0211, -0.0091,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3398, -0.8318,  0.1168,  0.0302,  0.0828,  0.0487,  0.0529,  0.0080,
         0.0139,  0.1266, -0.0138, -0.3494, -0.0507, -0.0780, -0.2136, -0.2390,
         0.0250, -0.2319, -0.4753, -0.4023, -0.1915, -0.0183, -0.1064, -0.1834,
         0.1091,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4852,  0.2919, -0.1434, -0.0992, -0.4100, -0.6770, -1.2479, -0.0467,
        -0.0462, -0.0780, -0.2057,  0.2297,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1878, -1.2025, -0.0697, -0.1649, -0.2618,  0.0124, -0.0128, -0.0345,
         0.0071, -0.0323,  0.0579, -0.0557, -0.1523, -0.3918, -0.0450, -0.1609,
        -0.0199, -0.0422,  0.0342,  0.0066, -0.1180, -0.0594,  0.0098, -0.2037,
        -0.1265, -0.3382, -0.0508,  0.0943, -0.0014,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0038, -1.6111, -0.1686,  0.1581, -0.0791, -0.5914, -0.7420,  0.6043,
         0.3346,  0.1203,  0.0160, -0.1466,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0785, -1.7965,  0.1663, -0.5516,  0.1764, -0.5562, -0.9738,  0.1262,
        -0.1667,  0.0594,  0.0556, -0.2253,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5883,  0.2473, -0.4537, -0.1392, -0.6591, -0.2806,  0.0430, -0.1208,
        -0.5152,  0.0175, -0.0159, -0.2613, -0.0438,  0.0514, -0.0569, -0.4318,
         0.2219,  0.0957, -0.0304, -0.1226,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0448, -1.0767,  0.1175, -0.4134, -0.6327, -0.2142, -0.1058,  0.0590,
        -0.1221, -0.0850, -0.0926, -0.1377, -0.0193,  0.0203,  0.1015,  0.0423,
        -0.0816,  0.0510,  0.0272, -0.0595, -0.1114,  0.1531,  0.0901, -0.0134,
        -0.0310, -0.0461, -0.0476, -0.1058, -0.0514, -0.0030,  0.0382, -0.0065,
        -0.1802, -0.3046, -0.0160,  0.0275,  0.0382,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7266,  2.2307, -0.2235,  0.0726,  0.1152, -0.0133, -0.1540, -0.1069,
         0.0752,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1784,  0.0071,  0.0748, -0.5560, -0.1373, -0.0521, -0.2596, -0.6274,
         0.1373,  0.0487, -0.1118, -0.1323, -0.1014, -0.3426,  0.0072,  0.0639,
         0.0024, -0.0248, -0.2426, -0.1913,  0.1526,  0.0384, -0.0627, -0.0017,
         0.0209,  0.1019,  0.5631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1800e-01,  1.7638e+00, -1.2892e-01,  3.5239e-01,  1.0690e-01,
         1.2442e-01,  4.5133e-04,  8.0247e-02,  1.0707e-01,  1.3194e-01,
        -8.6856e-02,  4.1480e-01,  6.0056e-02, -1.1499e-01, -9.5373e-03,
         2.6452e-02,  2.1304e-01, -8.0438e-02, -4.0491e-02,  1.1038e-02,
        -1.1990e-01,  2.5019e-01,  2.1123e-01, -7.3232e-02, -6.3341e-03,
        -1.0669e-02, -7.3830e-03,  1.0746e-01,  9.7528e-01,  5.3509e-02,
         2.9479e-02, -2.8744e-02, -2.8957e-01,  1.8495e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0296, -0.0472, -0.0566, -0.0681, -0.0886, -0.0591,  0.0161, -0.0203,
        -0.0187,  0.0172, -0.0359, -0.3085, -0.0957, -0.0597,  0.0304, -0.1892,
        -0.3838, -0.0819, -0.0660,  0.0597,  0.0125,  0.0043, -0.2000, -0.0661,
        -0.0296, -0.0118, -0.0584, -0.0614, -0.0569, -0.0239, -0.0407, -0.3532,
        -0.2067, -0.0086, -0.0433, -0.1182, -0.1449,  0.0507,  0.0721, -0.0063,
        -0.0585, -0.1255, -0.0492, -0.0169, -0.1170,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0178, -0.0637, -0.1351, -0.2095,  0.0252, -0.5066, -0.9381, -0.0915,
         0.2215, -0.6032, -0.3097, -0.0573, -0.3129,  0.1320, -0.1594, -0.3951,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0036, -0.0410,  0.1523,  0.1036,  0.1398,  0.0766, -0.0366,  0.0682,
         0.4217,  0.3883,  0.2014,  0.3509,  0.0528,  0.1490,  0.0882, -0.0791,
        -0.0511, -0.0143, -0.0053,  0.0226, -0.0057, -0.0246, -0.0042,  0.0266,
        -0.1137,  0.0899,  0.2676,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1146, -0.0321, -0.1867, -0.0127, -0.0857, -0.0569, -0.1055, -0.0036,
        -0.0791, -0.0386, -0.0157, -0.0467, -0.0838, -0.0081, -0.0326, -0.0280,
        -0.0833,  0.0170, -0.0141, -0.0659,  0.0781, -0.0551, -0.0665, -0.3839,
         0.0336, -0.0280, -0.1532, -0.1930,  0.0172, -0.3638,  0.0133, -0.2092,
        -0.1204, -0.0359, -0.0476, -0.1077, -0.0156, -0.1256,  0.1057,  0.0701,
        -0.0049, -0.1556,  0.0021, -0.0250,  0.0715,  0.0363], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0226, -0.1348, -0.0101,  0.0615,  0.0226,  0.0232, -0.0276,  0.1584,
         0.0247,  0.0108,  0.0320,  0.0443, -0.0678, -0.0516,  0.0236, -0.0208,
        -0.0944, -0.0337,  0.4068,  0.8783,  0.3155,  0.1367,  0.0572,  0.0544,
        -0.0094, -0.0329,  0.1958,  0.0962,  0.0287,  0.0349,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3940,  0.1084, -0.1249,  0.0683,  0.0908,  0.2054,  0.2659,  0.6604,
         0.4896,  0.0081,  0.4581,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0125, -1.2288, -0.0092, -0.2719, -0.1576, -0.0485, -0.2295, -0.7222,
        -0.0554, -0.4486,  0.0051, -0.0627,  0.0506,  0.0242, -0.2876, -0.2662,
        -0.0932, -0.1574,  0.0074, -0.2737,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4352, -0.8379, -0.9436, -0.5511,  0.0321, -0.2238, -0.3693, -0.0928,
        -0.3578, -0.1960, -0.3128,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1471e-01,  1.1434e+00,  6.1435e-01,  4.8817e-01, -4.9460e-02,
         9.3535e-02,  1.4036e-01,  1.9971e-01,  3.3176e-01, -3.9979e-02,
         1.7498e-01, -1.1015e-03,  8.2250e-02,  8.7229e-02,  1.5550e-02,
         1.0576e-01, -2.0304e-02,  5.9363e-02,  8.0564e-03,  1.7178e-01,
        -3.4599e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0773,  1.1455,  0.9255,  0.0961,  0.0662,  1.3378, -0.3424, -0.1526,
         0.1891,  0.0880, -0.1094,  0.0047, -0.0752,  0.0401,  0.0258, -0.0784,
        -0.3625,  0.3324,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3463, -0.2025, -0.7589, -0.0347,  0.0402,  0.1137,  0.0156,  0.0519,
         0.0254,  0.0089,  0.0725, -0.0010,  0.0706, -0.1802, -0.2559,  0.0997,
        -0.0183, -0.2617, -0.0830, -0.0681, -0.0277, -0.1556, -0.5488, -0.0507,
        -0.1536, -0.0218, -0.1453, -0.0334, -0.0609, -0.0639, -0.0046,  0.0034,
         0.0101, -0.0191, -0.0586, -0.0304, -0.0159,  0.1802, -0.1094,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2416,  1.6383,  0.0190,  0.2978,  0.0209, -0.0484, -0.1053, -0.0463,
         0.1175,  0.0204,  0.1181,  0.3050, -0.1267,  0.0104, -0.0305,  0.0958,
         0.2898,  0.0342, -0.0251,  0.0521,  0.0610,  0.0374, -0.0469, -0.0376,
        -0.0449,  0.0202,  0.0990,  0.0367,  0.2040,  0.2995,  0.0223,  0.0193,
         0.0322, -0.2152,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.6385, -0.3373, -1.0279, -0.8213, -0.2349, -0.6362, -0.0377, -0.0189,
        -0.5838,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3053e-01, -9.4956e-01, -1.2881e-01, -3.9861e-01, -1.6322e-01,
        -5.7842e-01,  1.0090e-01,  7.6107e-02,  1.4089e-02,  7.3833e-02,
        -4.1279e-02, -3.6841e-02,  1.2231e-02, -2.6120e-02,  4.7061e-02,
        -1.4100e-01, -2.4505e-02, -7.9572e-01, -8.3976e-02,  4.9266e-02,
        -1.1757e-02,  6.0588e-02, -6.1132e-02, -4.2551e-02, -1.7119e-01,
        -5.2190e-02, -1.0622e-01,  6.6777e-02, -1.9360e-02,  4.2215e-02,
        -4.6071e-02, -1.8073e-01,  6.7379e-04, -1.0070e-01,  6.1577e-02,
        -5.0755e-03,  7.9602e-02, -6.1847e-03, -2.2166e-02, -3.7481e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2544, -0.7265, -0.5597, -0.0299, -0.2194, -0.0092,  0.0425, -0.0121,
        -0.0895, -0.4382, -0.0087,  0.0093, -0.0008,  0.0284,  0.0289,  0.0813,
        -0.0061, -0.0441, -0.0977,  0.0378, -0.0187, -0.0200,  0.0784, -0.0054,
        -0.0288, -0.0902, -0.1776,  0.0370, -0.2392, -0.2759, -0.0961, -0.0392,
         0.0017,  0.0120,  0.0373, -0.0193, -0.1600, -0.2084,  0.0150, -0.0112,
         0.1186,  0.0121, -0.0042,  0.0453, -0.0121,  0.0146,  0.0074,  0.0300,
         0.0214,  0.0181, -0.0151, -0.1223,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6343,  1.0856,  0.1935,  0.1705,  0.3946,  0.0478,  0.2214,  0.0472,
         0.0685, -0.0286, -0.1415,  0.1603,  0.2224, -0.0103,  0.1865,  0.3239,
         0.1293, -0.0634,  0.4166, -0.0691, -0.0964,  0.0634,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6017e-01,  2.0643e+00,  2.1870e-01,  1.5637e-02, -5.1794e-02,
         1.2059e-01,  8.1583e-02,  4.9147e-02,  6.0314e-02,  4.6302e-02,
         7.7285e-02,  1.8691e-01,  1.0957e-01,  1.6747e-01,  1.5857e-01,
         2.7545e-01, -2.7191e-04, -3.8662e-03, -1.4360e-01, -1.7701e-01,
        -3.4177e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1198, -0.5737, -0.1224, -0.0633,  0.0440, -0.0539,  0.0131,  0.0603,
        -0.0975, -0.0213, -0.0190, -0.0170, -0.0134, -0.0542,  0.0029, -0.0118,
         0.0385,  0.0014, -0.0506, -0.0209,  0.0336,  0.0909, -0.0278, -0.0321,
        -0.0162,  0.0085, -0.0304, -0.1058,  0.0222, -0.0432, -0.1284, -0.2824,
        -0.0138, -0.1226, -0.1142,  0.0491, -0.0288,  0.0123, -0.0068,  0.0106,
        -0.6869, -0.0489, -0.0689, -0.0062, -0.0980,  0.0067, -0.1458, -0.0768,
         0.0417,  0.0605, -0.0213, -0.0044,  0.0775, -0.0327, -0.2298],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3479, -0.0861,  0.0951, -0.0452, -0.0672,  0.0066, -0.4760, -0.0413,
        -0.1285,  0.0833, -0.0865, -0.0085, -0.0206, -0.0376, -0.1903, -0.0677,
        -0.0783, -0.0988, -0.1369, -0.2455, -0.1002, -0.0804, -0.1549,  0.0446,
         0.0359,  0.0439,  0.0529, -0.2103, -0.0642, -0.0014, -0.0030, -0.0354,
        -0.1339, -0.3530, -0.0086, -0.1349, -0.0698, -0.1481, -0.1833, -0.0948,
        -0.0455, -0.0033, -0.0280, -0.2614, -0.0871,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5526e-01,  1.3185e+00,  5.8238e-01,  5.5753e-01, -1.5767e-01,
         1.6296e-01,  9.8695e-02, -3.0724e-02,  4.7881e-01, -1.3458e-01,
         2.6572e-01,  2.9229e-01, -7.5445e-05,  3.2966e-01,  4.2894e-01,
         1.3754e-01,  1.1620e-01,  5.2412e-01,  4.7624e-02,  1.4309e-01,
         5.5437e-01, -8.3198e-02,  1.3085e-01, -1.0465e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1637,  0.1547,  0.0519, -0.0137, -0.0246, -0.0485, -0.1572, -0.2465,
        -0.0043,  0.1109, -0.0198, -0.0528, -0.0033, -0.2444, -0.0119,  0.0771,
        -0.1264, -0.1699, -0.1311, -0.1729, -0.0333,  0.0739, -0.1098, -0.1556,
         0.0801, -0.1228, -0.2561, -0.0314, -0.0798, -0.1290, -0.0570,  0.0351,
        -0.0926,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8395,  2.5438,  0.1133,  0.4538, -0.0879,  0.0415,  0.2009, -0.1733,
         0.9720, -0.0289,  0.0306, -0.0756,  0.0486,  0.1159,  0.0303, -0.1433,
         0.0696, -0.0154,  0.5428,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0559, -0.1829,  0.2971, -0.1172, -0.3733, -0.1114, -0.2131, -0.2440,
        -0.2909, -0.0965, -0.0869,  0.1309,  0.0392,  0.0516, -0.0615, -0.0391,
        -0.1096, -0.1264, -0.0084, -0.0307, -0.2986, -0.3027, -0.0473, -0.0404,
        -0.0111,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1775, -1.0973, -0.2916, -0.0320, -0.1246, -0.3470, -0.4141,  0.2770,
        -0.0710, -0.1287, -0.0134,  0.0879, -0.1292, -0.1150, -0.1518, -0.0175,
        -0.0132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.9070,  0.0032, -0.0801, -0.0348, -0.0821,  0.0230, -0.0462, -0.0447,
         0.0477, -0.2680, -0.0869, -0.0689,  0.0210, -0.0095, -0.0372, -0.0510,
        -0.3362, -0.7832, -0.0069, -0.2957, -0.0392, -0.5476, -0.7712,  0.0428,
        -0.1196,  0.2650, -0.0356, -0.1034, -0.0245, -0.0198, -0.0891,  0.1870,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5851e-01, -4.7407e-01, -9.2921e-01, -8.0762e-02, -1.5477e-01,
        -3.1996e-02, -1.8329e-01, -3.5863e-01, -9.1951e-02,  1.9608e-02,
         8.1317e-02,  8.3748e-02,  3.6836e-02, -4.3247e-02, -1.5585e-01,
         5.0744e-02, -4.2439e-05, -8.1969e-03, -2.3999e-01,  1.3393e-01,
        -1.2842e-01,  6.2732e-03, -2.5750e-01, -2.0213e-01,  1.2224e-01,
        -4.8979e-02, -2.3236e-01, -5.1577e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1637, -0.4074, -0.0289,  0.0387,  0.0534, -0.2360, -0.4341, -0.0477,
        -0.0853,  0.0607,  0.0603, -0.1489, -0.0484, -0.1657, -0.0571, -0.3618,
        -0.0801, -0.2714, -0.0624,  0.0902, -0.0412,  0.0126, -0.1193, -0.0121,
        -0.0837,  0.0649,  0.0427, -0.0318,  0.0260,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0180, -0.2523, -0.0935, -0.2266, -0.5503, -0.0582, -0.0612, -0.0789,
        -0.0382,  0.0278, -0.0064,  0.0102, -0.1112, -0.6510,  0.1024, -0.1613,
        -0.0441, -0.4016,  0.0062, -0.3458, -0.0101, -0.0555, -0.0570, -0.0515,
        -0.1227, -0.0504, -0.0928, -0.0447, -0.0076,  0.0157, -0.0109,  0.0162,
        -0.0067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1422, -0.6829, -0.0438, -0.1927, -0.0492, -0.3719, -0.0831, -0.1900,
        -0.1052, -0.1065, -0.2357, -0.0086, -0.0380, -0.0443, -0.1348,  0.0439,
        -0.1605,  0.0188, -0.3874, -0.0381, -0.3301,  0.0841, -0.1249, -0.0555,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1003, -0.8532, -0.3899, -0.0932, -0.2216,  0.0377, -0.1089, -0.2387,
         0.0182, -0.2924, -0.1051, -0.1634, -0.1740, -0.1152,  0.0090, -0.0865,
        -0.0594, -0.2608, -0.0355, -0.4663, -0.1066, -0.1393, -0.0231,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1554, -0.6590,  0.0247, -0.0073, -0.0436, -0.1771,  0.0102,  0.0008,
        -0.0946, -0.0133,  0.0713, -0.0317,  0.0074,  0.1133,  0.0745, -0.0859,
        -0.2004, -0.0809, -0.1478,  0.0042, -0.0250, -0.1722, -0.0277, -0.0603,
        -0.1851, -0.1098, -0.0285, -0.0765,  0.0249, -0.4861, -0.3053, -0.0305,
        -0.0193,  0.0505, -0.0731, -0.1849, -0.1018, -0.1460,  0.1133,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0454, -0.4438, -0.1104, -0.0606, -0.6666, -0.0830,  0.0247,  0.0183,
        -0.0188,  0.0053,  0.0205, -0.0022, -0.0241,  0.0045,  0.0247, -0.1852,
        -0.1559,  0.0509, -0.0645,  0.0271, -0.2179, -0.0054, -0.0122, -0.1323,
        -0.3462, -0.4561, -0.0800, -0.1246,  0.0048, -0.2127, -0.2063, -0.1163,
        -0.0560, -0.0528, -0.0178,  0.0120, -0.0381, -0.2220, -0.0479, -0.0059,
        -0.0565, -0.0859,  0.1081,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2186, -0.8208,  0.0575,  0.0282, -0.1010, -0.0312, -0.1400, -0.0145,
        -0.2310, -0.0422,  0.0237, -0.0165, -0.0902, -0.0185, -0.0235, -0.0516,
        -0.1644, -0.2110, -0.0233, -0.0541, -0.0059,  0.0191, -0.0797, -0.0126,
        -0.0082, -0.0288, -0.0434, -0.0507, -0.0374,  0.0443,  0.0103, -0.1063,
        -0.3369,  0.0483, -0.1209, -0.0280, -0.1120, -0.0998, -0.1817, -0.0534,
        -0.0773, -0.1210,  0.0019,  0.0820, -0.0112,  0.0722, -0.0084,  0.0099,
         0.0151, -0.0145,  0.0357,  0.0179,  0.0386, -0.0252, -0.0428,  0.0842,
        -0.0845], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3263, -1.9890, -0.0601, -0.0393, -0.1853, -0.0516, -0.3242, -0.3770,
        -0.0216,  0.0522, -0.1705,  0.0913, -0.3092,  0.2365,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1385, -0.5522, -0.2729,  0.2150, -0.0849, -0.0891, -0.1183, -0.0315,
        -0.0966, -0.0173, -0.1664, -0.6108,  0.0521, -0.2519, -0.3091,  0.0267,
        -0.0724, -0.2013,  0.0906, -0.2534, -0.3887, -0.1169, -0.1323, -0.0151,
         0.1650,  0.0855, -0.0059,  0.0546, -0.1934,  0.0794,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4404,  0.8991,  0.0966,  1.1316,  0.0091, -0.1009,  0.3996, -0.8659,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2613, -0.3547, -0.1380, -0.1023, -0.1191, -0.4533, -0.0091, -0.0863,
        -0.0092, -0.1274, -0.0209, -0.1054, -0.2206, -0.3566, -0.0826, -0.0412,
        -0.1225,  0.0061, -0.0298,  0.0132,  0.0891, -0.0511, -0.0632,  0.0527,
        -0.1350, -0.0395, -0.0993, -0.0825, -0.3273, -0.3403,  0.1567, -0.1764,
         0.0728,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4941, -0.7906, -0.2160, -0.2975,  0.0837, -0.0892, -0.0845, -0.1983,
         0.1571, -0.0835,  0.0130,  0.0153,  0.0189,  0.0356, -0.0226,  0.0140,
         0.0209,  0.0461, -0.0479,  0.0332, -0.0168,  0.0066, -0.0132,  0.0465,
         0.0805, -0.0071, -0.1066, -0.1110,  0.0421, -0.0054, -0.0049, -0.0641,
        -0.1975, -0.2591,  0.0973, -0.0224, -0.0863,  0.0289, -0.0012,  0.0124,
         0.0118, -0.0452, -0.0327, -0.1516,  0.0130, -0.0284,  0.0292,  0.0488,
        -0.0146, -0.0124, -0.0037, -0.0413, -0.0223, -0.0792, -0.0841,  0.0364,
        -0.0374,  0.0775, -0.0046, -0.0682, -0.0487, -0.0013, -0.2567, -0.1004,
        -0.0270, -0.0102, -0.0129,  0.1370,  0.0849,  0.0994,  0.0113],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6388e-02, -3.6879e-02, -1.0702e-01, -1.4606e-02, -1.7892e-01,
        -3.5178e-02, -9.5168e-01, -5.0633e-02, -3.3988e-03,  4.7512e-03,
         1.5311e-02,  8.8624e-03,  4.3290e-02, -2.3978e-02, -9.2100e-02,
        -2.4794e-03, -3.7470e-02,  3.6741e-02, -9.8523e-02, -5.1626e-01,
        -3.6411e-03,  8.7717e-02,  9.1014e-02, -2.6521e-01,  1.4155e-01,
        -2.2104e-01, -1.2826e-01, -5.4662e-04, -1.3106e-02, -1.6223e-01,
         1.0812e-02, -8.6290e-02, -3.3128e-01, -1.9272e-01,  1.2700e-01,
        -6.9374e-02, -2.0948e-01, -2.4197e-02, -4.3406e-03, -7.4762e-02,
         6.3542e-02, -6.2841e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3154, -0.8500, -0.1187, -0.0394, -0.0348,  0.0562,  0.0671,  0.0948,
        -0.0329, -0.5074, -0.1336, -0.0999, -0.4418, -0.0193,  0.0143,  0.0028,
         0.0431, -0.2088, -0.3579,  0.0214, -0.0706, -0.0564,  0.0669,  0.0908,
        -0.0204, -0.3060, -0.1199, -0.0723, -0.3701, -0.1144,  0.0572, -0.0167,
        -0.0136, -0.0525, -0.1306, -0.1749,  0.0389,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0471, -2.0716, -0.2092, -0.2226,  0.1031,  0.2095,  0.1509,  0.0417,
         0.0419, -0.0649, -0.1555,  0.0392, -0.3074, -0.2945, -0.0468,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2244,  0.2510,  0.0571,  0.1206,  0.2806,  0.5238,  0.2124,  0.2357,
         0.7697,  0.1392, -0.0836,  0.0898,  0.2842,  0.0247, -0.1300, -0.2225,
         0.0831, -0.0398, -0.1009, -0.2057,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8133e-03,  1.1983e-01, -7.5930e-02,  6.5753e-02, -6.2624e-01,
        -3.0987e-02, -5.3345e-02,  3.0009e-04, -9.7861e-03,  1.1476e-02,
        -2.4832e-01, -3.2538e-01, -1.6087e-01, -1.7222e-01, -9.6899e-02,
        -1.2643e-01,  3.5628e-02, -8.5411e-02,  1.2665e-01, -1.6448e-01,
        -5.7450e-02, -3.2521e-02, -9.0218e-02, -9.6059e-03, -6.4821e-03,
        -1.9173e-01, -3.4996e-01, -6.0147e-02, -7.3856e-02, -8.6841e-02,
        -4.0609e-01, -9.5239e-02, -2.0786e-02, -7.4765e-02, -1.7040e-01,
        -2.4233e-01, -8.6849e-02, -1.1013e-01,  3.1542e-02,  6.7211e-03,
         8.5725e-02, -3.8482e-02, -2.1613e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1993, -0.0570,  0.0141,  0.1840,  0.1702,  0.1575,  0.1339,  0.7425,
         0.0848,  0.0771, -0.1287, -0.1370,  0.0315,  0.2120,  0.5280,  0.0758,
         0.0488,  0.0713, -0.0314,  0.0039,  0.3445,  0.0756,  0.0347,  0.0161,
        -0.0742, -0.0278,  0.0439,  0.0277,  0.2725,  0.0852,  0.0490,  0.1419,
         0.1140,  0.2012,  0.2373,  0.1129, -0.2191,  0.2309,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0029,  1.4389,  0.4104,  0.5579, -0.2206,  0.2987, -0.1940,  0.0608,
         0.1372,  0.4123, -0.0164,  0.0903,  0.1639,  0.0365, -0.0171,  0.0842,
        -0.0192,  0.3038, -0.0513, -0.1981, -0.0635, -0.2060,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1547e-02, -1.2885e+00, -2.6517e-01, -9.8777e-02,  1.4559e-01,
        -5.6080e-02, -9.6854e-02, -1.0606e-01, -1.9585e-01, -5.4341e-01,
        -6.4327e-02, -3.5375e-02, -6.6105e-02, -3.2190e-02, -1.1138e-01,
        -1.7286e-01, -4.6007e-01,  2.2125e-02, -8.2577e-02, -2.9323e-01,
         4.6641e-02,  7.6196e-02, -2.2275e-01, -1.1200e-01, -1.0826e-01,
        -3.2476e-02,  2.7092e-03,  7.4592e-04, -3.9109e-02, -3.0783e-01,
        -2.0632e-02, -3.0089e-02, -7.4679e-02,  1.9920e-02, -1.9988e-01,
        -5.5846e-01,  4.4466e-02, -7.3309e-02,  1.8360e-02, -3.9291e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5823e-01, -3.4143e-02, -8.7565e-02, -9.0258e-02, -3.2399e-02,
        -1.2527e-01, -5.5988e-03, -7.4176e-02, -2.6003e-01, -5.0086e-02,
         2.3794e-02, -1.7308e-01, -5.0308e-03, -5.7252e-01, -4.3043e-01,
        -4.6740e-02, -1.1227e-02, -7.3181e-02,  6.0306e-03,  3.2694e-02,
        -6.4471e-02, -3.9313e-02,  4.0337e-04,  3.0413e-02, -6.7647e-02,
        -4.5324e-01, -6.2318e-03, -3.2082e-01, -3.4311e-01, -1.0950e-01,
         2.0325e-02, -9.7757e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2101, -1.5372, -0.0580,  0.1934, -0.1028, -0.0040, -0.0201, -0.3072,
        -0.4721, -0.0514,  0.0594, -0.1257,  0.0994, -0.0655,  0.0069,  0.0180,
        -0.2257,  0.0458,  0.0238, -0.0100, -0.0252, -0.1243, -0.0425, -0.3274,
         0.0232,  0.1212, -0.2053, -0.0144,  0.0709, -0.1690, -0.0022, -0.0036,
        -0.0276, -0.0989, -0.2109,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.5796, -0.4090, -0.4011, -0.4322, -0.0959, -0.2575, -0.5784, -0.5915,
         0.1147, -0.2328, -0.0714, -0.1638, -0.0343, -0.1952, -0.0667, -0.0445,
        -0.1594, -0.0307,  0.1145, -0.1347,  0.2449,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4285, -2.9828, -0.0092, -0.3427, -0.3137, -0.3411,  0.0214, -0.4272,
        -0.6552,  0.0567,  0.1118,  0.1337,  0.0926,  0.2079, -0.2042,  0.0845,
        -0.1480,  0.0231,  0.3988,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4325, -0.6436, -0.6640, -0.1078, -0.1604, -0.0102,  0.0356,  0.0769,
        -0.0998, -0.4800, -0.8367, -0.2455, -0.1163, -0.2479, -0.0314, -0.1543,
        -0.0998, -0.2788,  0.0701, -0.3858,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1496, -0.8930, -0.0636, -0.2839,  0.1093, -0.0258, -0.0480, -0.1617,
        -0.0579, -0.0376, -0.0813, -0.0382,  0.0544, -0.1649,  0.0695, -0.0898,
        -0.2125,  0.0055, -0.0799, -0.0402,  0.0225, -0.0636,  0.0545,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2564, -0.8445, -0.2080, -0.2298, -0.0825, -0.3232,  0.0045, -0.1895,
        -0.4371, -0.4139, -0.2107, -0.2741, -0.2261, -0.2574, -0.0333,  0.0762,
         0.0893, -0.1040, -0.1722,  0.0862, -0.1623,  0.0093, -0.0794, -0.0765,
        -0.0557,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1452, -1.9360, -0.1626, -0.1092, -0.1979, -0.0756,  0.0422, -0.0735,
        -0.0333,  0.1100, -0.2148, -0.0022,  0.0152,  0.1776,  0.0937,  0.0917,
        -0.0815,  0.0125,  0.0785, -0.1288, -0.2230, -0.3781, -0.2420, -0.1056,
        -0.0866, -0.1265, -0.1152,  0.0225, -0.0979, -0.0554,  0.0271,  0.0922,
        -0.1178, -0.1836,  0.1161,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1990, -0.0359,  0.0335, -0.3187, -0.1257,  0.1674, -0.2096, -0.1454,
        -0.6385, -0.1676, -0.2941, -0.0827,  0.0625, -0.2802, -0.2544, -0.4329,
        -0.0951, -0.3783,  0.0180,  0.1740, -0.0548,  0.0031,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2148,  0.1949, -0.0275,  0.0200,  0.2115,  0.1877,  0.0501,  0.2403,
        -0.1775,  0.7360, -0.0462,  0.2665, -0.2787,  0.1758,  0.2588,  0.2494,
        -0.0789,  0.3036, -0.1803, -0.2318, -0.0013,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0049, -1.9182, -0.1125, -0.0059, -0.0993,  0.1781, -0.4185, -0.6851,
         0.0387, -0.1103, -0.3240, -0.1846,  0.0028,  0.0324, -0.1277, -0.2024,
        -0.0417,  0.2346, -0.0490,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0735,  0.0997, -0.0089,  0.0623,  0.0326, -0.1565,  0.0366,  0.0011,
        -0.0024, -0.0285, -0.0314, -0.0374, -0.0013, -0.1318, -0.0134,  0.0486,
        -0.0982, -0.1978, -0.0811, -0.1183, -0.0226, -0.1348, -0.1402, -0.0411,
        -0.1681, -0.0120, -0.0655, -0.2750, -0.1747, -0.9683, -0.1227, -0.3832,
        -0.1472, -0.2343, -0.0415,  0.0304,  0.0330,  0.0529,  0.1365,  0.0174,
         0.0226,  0.0950,  0.0742,  0.0609,  0.0124, -0.1011, -0.1622],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3346e-01, -1.6154e+00, -6.9447e-01, -4.8243e-02,  1.2007e-03,
         2.8921e-03, -4.0415e-03,  1.1108e-01, -1.7905e-01,  5.3828e-02,
         1.7859e-02,  4.4545e-02,  1.1527e-01, -1.0466e-01,  2.0949e-02,
         9.2616e-02, -3.8674e-01, -1.0208e-02,  8.2177e-02, -2.7218e-02,
         1.5942e-01, -1.4020e-01, -1.9925e-01, -5.0913e-01, -1.8214e-02,
         2.8581e-01, -1.0402e-01,  6.1630e-02,  2.2256e-02, -3.0415e-02,
         8.0431e-03,  1.1714e-01, -3.9837e-01,  1.7409e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0035, -1.3161, -0.0952,  0.0402,  0.0154, -0.2443, -0.0295,  0.0114,
        -0.0337, -0.0133,  0.0423,  0.0253, -0.1777, -0.1693, -0.0190,  0.0850,
        -0.0627, -0.1216, -0.1097,  0.0529,  0.0329,  0.1044, -0.0016,  0.0434,
         0.0228,  0.0702,  0.0735, -0.0099,  0.0305, -0.0541,  0.0221,  0.1468,
        -0.0652,  0.0456,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.6009, -1.2728, -0.3001, -0.1445, -0.1844, -0.3218, -0.0526,  0.0588,
        -0.0357, -0.4319, -0.1603,  0.0253, -0.2523, -0.5307, -0.2141, -0.0664,
        -0.1093, -0.0608,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0163,  0.0204,  0.0766, -0.3213,  0.0111, -0.0031,  0.0695, -0.1018,
         0.8096,  1.3250,  0.0406, -0.1233, -0.0064,  0.3246, -0.0026, -0.0113,
        -0.0495, -0.0255,  0.1359,  0.7199, -0.0812, -0.0831, -0.0118, -0.1041,
        -0.0522, -0.1489,  0.2111,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0139, -0.2807,  0.0494,  0.0166,  0.0574,  0.0399,  0.1135,  0.0736,
        -0.4250,  0.0418, -0.0232, -0.0785, -0.7623,  0.0551,  0.0590, -0.0459,
        -0.0274, -0.0820,  0.0386, -0.8708, -0.0411, -0.0703, -0.0219,  0.0849,
         0.0199,  0.0978,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0253, -0.9856,  0.1107, -0.1909, -0.4859, -0.3357, -0.0158, -0.1202,
        -0.2971, -0.4142, -0.4113, -0.2348, -0.0945,  0.0902, -0.0619, -0.2096,
        -0.1556, -0.0210,  0.0029, -0.1321,  0.1403,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7939e-03, -1.3351e+00, -4.7646e-01, -7.0182e-01, -2.2735e-04,
        -3.1056e-02, -4.9694e-01, -8.0700e-02, -4.4560e-02,  1.9802e-01,
         2.8990e-03, -2.4716e-01,  1.6607e-01, -3.6663e-02,  2.6288e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3343,  0.0069, -0.1317,  0.0102,  0.1338,  0.1382,  0.0466,  0.2111,
        -0.0029,  0.0687,  0.0833, -0.1759,  1.2912,  0.1486,  0.0991,  0.0659,
         0.1108,  0.1662,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1484, -1.3370, -0.4307, -0.7057,  0.0166, -0.1810, -0.0062, -0.0842,
        -0.1339, -0.3589, -0.3021, -0.2295, -0.0650, -0.3368, -0.0202,  0.1711,
         0.0387, -0.1317,  0.2653,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0777, -0.6022, -0.3236, -0.0225, -0.1128, -0.0547,  0.1772,  0.0317,
        -0.0459, -0.0160,  0.0028, -0.0308, -0.0330,  0.0406,  0.0184,  0.0095,
        -0.1898,  0.0228,  0.0351,  0.0908, -0.2261,  0.0823, -0.2395, -0.0898,
        -0.3849, -0.0320, -0.1897,  0.0641,  0.0137, -0.1602,  0.0107, -0.0778,
        -0.1033, -0.0238, -0.0248, -0.0053, -0.1960,  0.0719, -0.0053,  0.0286,
         0.0273,  0.0821,  0.0800, -0.3461], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2649, -0.7727,  0.0242, -0.2240, -0.0329,  0.0196, -0.1046,  0.0293,
        -0.5253,  0.0160, -0.0164,  0.0039,  0.0022, -0.0151,  0.1348, -0.0295,
        -0.1834, -0.5321, -0.0646, -0.1423, -0.1654, -0.0497,  0.0368, -0.0314,
         0.0574, -0.0166, -0.1636, -0.2606,  0.0942, -0.0712, -0.0764, -0.0972,
        -0.0268, -0.0450,  0.2368,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3147, -1.9833, -0.0243, -0.1463, -0.0047, -0.0270, -0.5434, -0.3533,
         0.0257, -0.0454, -0.0346, -0.1401, -0.5682,  0.0994,  0.0517, -0.2755,
         0.0537, -0.0584,  0.0056,  0.0038,  0.0115,  0.0956, -0.0680,  0.4740,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9177e-01, -1.0351e+00,  2.5253e-02, -2.6135e-01, -2.0082e-01,
         1.2223e-03,  5.9541e-02, -1.2004e-01, -3.5507e-01, -3.3755e-04,
        -7.0872e-02,  1.4343e-01, -4.4892e-02, -6.3793e-02, -3.6257e-01,
         7.0922e-02,  7.4733e-02, -3.5231e-02, -3.3447e-01,  4.2813e-02,
        -2.0640e-02, -2.3106e-01,  8.1701e-02, -3.2104e-03,  1.3527e-02,
        -1.8321e-01, -9.8014e-02, -3.9065e-02, -1.7187e-02, -2.7107e-02,
        -1.0514e-01,  2.9253e-02, -2.2224e-02,  1.9498e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1490,  0.1412, -0.0270,  0.0679,  0.0390,  0.0753, -0.3522, -0.1151,
         0.0022,  0.0259,  0.0707,  0.0476, -0.3538, -0.0278, -0.3734, -1.0828,
        -0.0429,  0.0116, -0.0952,  0.0157,  0.0296, -0.1262, -0.0057, -0.2999,
        -0.2576, -0.1134, -0.3145, -0.1053,  0.0013, -0.0782,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.1484, -0.0582, -0.1745, -0.6882,  0.0249,  0.1610, -0.2030, -0.0589,
        -0.1587, -0.7681,  0.1971,  0.1617, -0.1863,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0316,  0.0955, -0.0747,  0.3456,  0.0719, -0.1903, -0.0686, -0.4857,
        -0.4397, -0.0466, -0.0305, -0.5415, -0.0151, -0.1958, -0.3951, -0.1052,
         0.0883, -0.1717,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1351, -0.8369, -1.1812,  0.3417, -0.4294,  0.0423, -0.1003,  0.5305,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0797, -1.0876, -0.0152, -0.1035, -0.1533, -0.0981, -0.0341, -0.0411,
         0.0565, -0.0050, -0.0401,  0.1362, -0.0362, -0.0389, -0.0091, -0.0070,
         0.0428,  0.0675,  0.0817, -0.0175,  0.0356, -0.0262, -0.1121, -0.0412,
        -0.0245, -0.1055, -0.0331, -0.1028, -0.5701, -0.1531,  0.1147, -0.3670,
        -0.1198, -0.7493, -0.0241, -0.3503, -0.1706, -0.0797,  0.0454,  0.0472,
        -0.0774, -0.0361, -0.0528,  0.0414,  0.0646, -0.1056,  0.1365],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3914, -0.4719,  0.5445, -1.3155,  0.3615, -0.2248,  0.3114,  0.3314,
         0.1781, -0.1142, -0.5512,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1146,  1.4602,  0.2267,  0.3884,  0.1554,  0.1128,  0.2818,  0.7281,
         0.1133,  0.3706,  0.0648,  0.0444,  0.5089, -0.0984, -0.1798,  0.1299,
         0.1950, -0.4823,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7453, -0.2072, -0.9892, -0.1181,  0.1858,  0.0543, -0.6432, -0.1244,
        -0.4302, -0.0496, -0.0106,  0.0321,  0.0048,  0.1571,  0.1602,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3051,  1.1229, -0.0620,  0.2208,  0.0528,  0.4911,  0.3924, -0.0306,
         0.1222,  0.4040,  0.2206, -0.0189, -0.0795, -0.1126, -0.0955,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3776,  0.0270, -0.0026,  0.1930,  0.0323, -0.0493, -0.0071, -0.0749,
        -0.1240, -0.0804, -0.0267, -0.3200, -0.6148, -0.2151, -0.0109, -0.0702,
        -0.2369, -0.0242, -0.1314, -0.0225, -0.3521, -0.5417,  0.0760,  0.3250,
        -0.2004, -0.2132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2010, -0.0135, -0.1285,  0.0452, -0.0133, -0.0531,  0.1749, -0.8414,
        -1.2355, -0.1495,  0.0146, -0.0152, -0.2930,  0.2316, -0.0988,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4387,  0.5197,  0.1059,  0.2854,  0.3429,  0.1971,  0.3436,  0.0180,
         0.0634,  1.1336,  0.1069,  0.0264,  0.2633,  0.1306, -0.0206,  0.0780,
         0.0901,  0.0711,  0.2213, -0.2643,  0.1937, -0.0108, -0.0050, -0.0889,
        -0.0949,  0.0954,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1068, -0.0706, -0.0153, -0.0087, -0.1577, -0.0530, -0.1210, -0.0856,
         0.0082, -0.1521, -0.4502, -0.1153, -0.0986, -0.1197, -0.3339, -0.2456,
        -0.0504, -0.1261, -0.0241, -0.3172, -0.0221, -0.1433, -0.0576, -0.2603,
        -0.0185, -0.4738,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-2.7264e-01, -1.8062e+00,  2.5983e-01,  1.2176e-01,  5.9072e-02,
         1.5876e-01, -9.5807e-02, -2.0125e-01,  8.5802e-04, -1.8426e-01,
        -1.1777e-02, -5.0832e-02, -2.1496e-01,  5.2110e-02, -1.1639e-01,
        -2.9911e-02, -3.8725e-02, -1.9636e-02, -1.1780e-01, -7.1448e-02,
        -4.7704e-04, -1.2990e-01, -1.9460e-02, -1.2674e-01, -3.5218e-01,
        -4.9096e-03, -7.5633e-02, -5.3399e-02,  1.9838e-02,  3.0049e-02,
        -9.9265e-03, -8.5335e-02, -1.2428e-02,  1.3450e-02,  2.0497e-02,
         1.7051e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4786, -0.3328, -0.1809, -0.0632, -0.1464, -0.1040, -0.1553, -0.2934,
         0.0569, -0.0404, -0.0648, -0.0387, -0.0014,  0.0124, -0.0541, -0.0525,
         0.0344, -0.0162,  0.0437,  0.0387, -0.0433, -0.2047, -0.1086, -0.0340,
        -0.2362, -0.3885,  0.0201, -0.2410, -0.2840, -0.0377, -0.0193, -0.2772,
        -0.2308,  0.0184, -0.1865,  0.0247,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3220, -0.2649,  0.1722, -0.1810, -0.2320, -0.2861,  0.0731,  0.0392,
         0.0049,  0.0627, -0.1230, -0.2127, -0.7855, -0.1216, -0.0443, -0.0464,
        -0.1813, -0.0335, -0.0253, -0.1311,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0416, -0.4650, -0.0548, -0.0184, -0.0179, -0.0593, -0.3085, -0.3612,
         0.0059, -0.0429,  0.0782, -0.0062, -0.0403, -0.0411, -0.5645, -0.2989,
        -0.1089, -0.2304,  0.0161,  0.0065, -0.0179, -0.0113,  0.0030,  0.0670,
        -0.2437,  0.0377, -0.0347, -0.0066,  0.0533,  0.0088, -0.1374, -0.0242,
        -0.0466, -0.3524, -0.0620,  0.0993, -0.3692, -0.0690,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1079e-02, -1.2494e+00, -4.1659e-01, -3.0095e-01, -5.5311e-02,
        -3.5893e-02,  7.1174e-04,  1.7836e-01, -1.1417e-01,  4.9422e-02,
        -1.7020e-01, -1.9348e-01, -6.7778e-02,  3.4566e-03, -8.2517e-03,
        -2.0109e-01,  1.3299e-02, -1.4307e-01,  6.2117e-03, -1.5770e-02,
        -6.7035e-03,  7.6581e-02,  6.4361e-02, -9.2164e-03,  8.0042e-03,
        -3.5090e-02, -6.6196e-02, -1.2990e-01, -3.4404e-01,  3.5237e-02,
        -8.1499e-02, -1.5717e-02,  7.1435e-02,  9.9222e-03, -1.2617e-01,
         4.0274e-02, -6.1173e-02, -1.1661e-03,  5.0454e-02, -2.7749e-01,
         1.3301e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2421, -1.3224,  0.0979, -0.0582, -0.0225, -0.1762, -0.0655, -0.1175,
        -0.1342,  0.0514, -0.0309, -0.3287, -0.2287, -0.0600, -0.0211, -0.3912,
        -0.0794, -0.4759,  0.0462, -0.0425, -0.1745,  0.0239,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1717,  0.0478,  0.0084, -0.0344,  0.0459, -0.0919, -0.0187,  0.0160,
        -0.0419, -0.0115, -0.0045, -0.1990, -0.0455, -0.1683, -0.0495, -0.2733,
         0.0268, -0.1209, -0.0098, -0.3307, -0.6241, -0.0422, -0.0783, -0.0778,
         0.0225,  0.0293,  0.0180, -0.1027, -0.2564, -0.0281, -0.1014, -0.1181,
        -0.1683,  0.2079, -0.0071,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7823e-01, -5.4330e-01, -1.0117e+00, -2.8315e-01, -4.2966e-01,
        -2.0980e-02,  1.4613e-02,  1.2339e-02,  1.4806e-01, -3.5244e-02,
        -7.8528e-02,  1.3448e-03, -5.7649e-03,  4.5907e-02,  1.4155e-02,
        -8.2955e-02,  1.3852e-01, -7.0128e-02, -1.9751e-01,  2.6507e-02,
        -1.0977e-01,  4.6805e-02,  6.8856e-04, -6.6155e-02, -7.3947e-03,
        -5.5069e-02, -2.8840e-01,  7.7681e-02,  3.7001e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1365, -1.8244, -0.4075, -0.4028, -0.1248, -0.5614, -0.0512,  0.0534,
        -0.1185, -0.0263, -0.0829, -0.2587,  0.0237, -0.1807, -0.0363, -0.2459,
        -0.0710, -0.1180, -0.1349, -0.0026,  0.0325,  0.0116,  0.1302,  0.0218,
         0.4011,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1576,  0.0665, -0.0869, -0.0479, -0.6320, -0.1641, -0.1734, -0.4844,
        -0.4301, -0.0676,  0.1292,  0.1861,  0.0796,  0.6363, -0.2881, -0.4152,
        -0.0261, -0.2150, -0.1263, -0.0793, -0.0434, -0.1714, -0.9869,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0507, -0.0111,  0.0797, -0.0211,  0.0822,  0.0300,  0.0256,  0.0158,
        -0.0257, -0.0446, -0.0276, -0.0815, -0.2713, -0.5312, -0.1651, -0.1740,
        -0.1847, -0.2222, -0.0130, -0.0992,  0.0657,  0.1078,  0.0048, -0.0122,
        -0.0647,  0.0103, -0.6528,  0.0282, -0.2809, -0.0843, -0.1808, -0.0221,
        -0.1270, -0.1314, -0.0546, -0.0370, -0.0936,  0.1674,  0.0639,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0761, -0.0692,  0.2479, -0.4590, -0.9561, -0.1371,  0.2615, -0.3227,
        -0.6698,  0.2922,  0.0899,  0.3089,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.6375, -0.5694, -1.3410, -0.0402, -0.0265, -0.1016, -0.2660,  0.1010,
        -0.0948, -0.0864,  0.0236,  0.0817, -0.0564,  0.0192, -0.0487,  0.0194,
        -0.0492, -0.1272, -0.0115, -0.1683, -0.0797, -0.1062,  0.0270, -0.0197,
        -0.0101, -0.0167,  0.0683, -0.0795,  0.0997,  0.0904, -0.0284,  0.0510,
        -0.0679,  0.0086, -0.0593, -0.0233, -0.0741,  0.0087, -0.0030,  0.0028,
         0.0024,  0.0347,  0.0527,  0.0750, -0.1332,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3287,  2.0091,  0.0470,  0.3663, -0.1407, -0.2243, -0.0168,  0.1949,
         0.5455,  0.1812,  0.1504, -0.1414,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8998, -2.7175, -0.2266, -0.9433, -0.1525,  0.1916, -0.3432,  0.5437,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9313e-02, -6.0535e-01, -5.6929e-02, -1.0561e-01, -2.4725e-01,
        -7.7312e-03, -2.2946e-02,  2.0632e-02, -1.5278e-01, -2.8234e-01,
        -9.1556e-02, -6.1801e-02, -2.7749e-02,  5.4757e-03,  2.0458e-02,
         1.4985e-02,  8.0835e-02, -3.1028e-03,  7.0633e-02, -4.0026e-02,
         1.9149e-02,  4.0369e-02, -6.4473e-02,  2.8729e-02, -2.1118e-02,
        -2.9628e-03,  1.5149e-02, -4.8475e-02, -1.3356e-01,  9.2453e-03,
        -3.3631e-02, -5.7655e-02, -3.0124e-03,  1.4157e-02, -2.1229e-02,
         2.4242e-02,  5.0711e-02, -1.2984e-02,  1.3619e-03,  1.7472e-02,
        -2.6371e-02,  2.5643e-03, -3.1659e-02, -4.0761e-05, -4.9493e-03,
         1.3861e-01, -2.7341e-01,  7.2969e-02, -2.0884e-01, -2.7206e-01,
        -6.6066e-02, -3.2719e-02, -7.8592e-02,  5.9087e-03, -8.7982e-02,
        -1.4198e-02, -9.4221e-02, -1.2204e-03,  5.6347e-03,  1.1320e-03,
         2.3949e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0896, -0.0790,  0.0611, -0.1600, -0.3974,  0.1555, -0.2127,  0.0214,
         0.0418,  0.0796,  0.0892, -0.3306, -0.0304, -0.2255, -0.3054,  0.0169,
         0.0058, -0.0127, -0.2748, -0.1060, -0.2228, -0.2740, -0.0687, -0.0331,
        -0.1753, -0.0434, -0.0025, -0.0519, -0.0926, -0.0831, -0.1326, -0.0818,
        -0.1190,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7452,  0.6064,  1.1889,  0.3726,  0.2123,  0.0953,  0.4224, -0.1001,
        -0.0667, -0.1485, -0.1703,  0.0141,  0.0771,  0.0084, -0.0081,  0.2434,
        -0.0084,  0.2609,  1.4516, -0.0268, -0.1336, -0.1780, -0.4609,  0.1418,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1386, -0.4721, -0.2255, -0.3842, -0.0963, -0.4553, -0.1047, -0.1795,
         0.0496, -0.2613, -0.2254, -0.2184, -0.6934, -0.0444,  0.0539, -0.0292,
         0.0597, -0.0557,  0.0967,  0.0638, -0.0952, -0.0463,  0.0847, -0.0798,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2347,  0.0554, -0.2018, -0.2062,  0.0511,  0.0531, -0.0271, -0.1412,
        -0.0038,  0.0157,  0.0560, -0.0819, -0.0273, -0.0203, -0.1707, -0.4140,
        -0.3412,  0.0319,  0.0285, -0.0182, -0.1345,  0.0677, -0.0244, -0.2957,
        -0.0808,  0.0307, -0.1581, -0.0361,  0.0170, -0.0022, -0.1122,  0.0347,
         0.1061,  0.0945,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2052, -0.1907, -0.2075,  0.1243, -0.4724,  0.3296,  0.0062,  0.3518,
         0.1033,  0.1010, -0.0964,  0.0748, -0.0427, -0.8572, -0.1119, -0.2283,
        -0.7408, -0.0054, -0.3755, -0.0356, -0.2578, -1.0035,  0.1334,  0.0390,
        -0.0394,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0999, -0.1981, -0.2221, -0.0310, -0.0898, -0.1749, -0.5032, -0.2685,
        -0.2067, -1.6335, -0.1821, -0.0132, -0.1139, -0.1024,  0.0411, -0.3224,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4215, -1.0760, -0.0751, -0.4521, -0.1378, -0.1102, -0.8581,  0.0767,
         0.1882,  0.0428, -0.0280, -0.5698, -0.2426, -0.0301,  0.0819, -0.0606,
        -0.0663, -0.0667, -0.0491, -0.0101,  0.0019,  0.4162, -0.0356,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1707,  0.2298,  0.3423,  1.2485,  1.8319,  0.6268, -0.2203,  0.2041,
         0.1700,  0.0848, -0.1657, -0.0856, -0.0766,  0.3550,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.1739, -0.5842, -0.3300, -0.8238, -0.1223, -0.0248, -0.1946, -0.0146,
        -0.1972, -0.1284, -0.4666, -0.3935, -0.3345, -0.0273, -0.0026, -0.0118,
         0.1547, -0.3494, -0.1103, -0.3275, -0.0521, -0.0340,  0.0915,  0.0505,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9565, -0.3318, -0.8132,  0.0127, -0.8798, -0.1211, -1.1180, -0.2757,
        -0.5330, -0.4492, -0.2443,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1473, -0.0470,  0.0525,  0.0017,  0.0572, -0.5330, -0.8884, -0.0863,
        -0.4900, -0.2350, -0.0618,  0.0236, -0.1387,  0.0373, -0.2438, -0.0101,
        -0.1077, -0.1818,  0.1884, -0.0389, -0.1128,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0879, -0.0480, -0.0362, -0.0921, -0.6024, -0.2282, -0.3046, -1.0437,
        -0.1549, -1.3025, -0.2353, -0.0740,  0.1166, -0.1618,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4995,  0.0489, -0.0369, -0.0147, -0.0380,  0.0295,  0.0316, -0.0384,
        -0.0612, -0.0504,  0.1482,  0.4032,  0.6876,  0.0399, -0.0170,  0.0638,
        -0.2747,  0.2174,  0.0096, -0.0540,  0.2902,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5555,  2.0261, -0.0581,  0.4084, -0.3805,  0.4536,  0.0502,  0.8223,
         0.0107,  0.2425,  0.0397, -0.1391, -0.2365,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2062e-01, -4.7829e-01, -5.6783e-02, -9.1789e-03, -6.1117e-04,
        -1.3403e-03, -1.3359e-01, -1.8474e-01, -2.0408e-02, -3.7056e-02,
        -1.6676e-04, -4.8113e-02,  3.7115e-02, -3.7127e-02, -3.5374e-02,
        -5.9391e-02,  3.2660e-02,  2.8100e-02, -2.6819e-02,  1.0012e-02,
        -2.2793e-03,  1.0626e-01,  1.5370e-02, -1.5934e-01, -3.9563e-01,
        -3.8951e-02, -5.3070e-01, -5.0834e-01, -3.1503e-01, -5.5643e-02,
        -5.5468e-03, -3.0199e-01, -2.1641e-02,  1.7235e-03, -6.0852e-02,
        -1.0688e-01, -1.8163e-01, -5.5158e-02,  3.1447e-02, -5.3384e-02,
        -1.4282e-01, -5.3687e-02,  8.1561e-03, -1.1685e-01,  1.9529e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0064, -0.1345, -0.3951, -0.0599, -0.0178,  0.1048,  0.0089, -0.0550,
        -0.0196, -0.0638, -0.3059, -0.0083,  0.0283, -0.0307, -0.1659, -0.0296,
        -0.7143, -0.0820, -0.0692,  0.0588, -0.1595, -0.4127,  0.0748, -0.3578,
        -0.0053,  0.0270, -0.1130, -0.1939, -0.3654, -0.2299, -0.0037, -0.1823,
        -0.0026,  0.0798,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6412e-01,  4.5600e-01, -1.6933e-01, -2.1360e-02, -4.7098e-02,
        -2.9050e-05, -5.3617e-02, -6.7010e-02, -1.6319e-01,  1.3893e-02,
        -9.2746e-02,  1.0957e-01, -3.1688e-02,  3.5025e-02, -1.1117e-01,
        -7.7719e-02, -1.0140e-01,  9.1952e-02,  4.5558e-02,  1.2140e+00,
         1.4739e+00,  3.3372e-02,  1.4068e-01,  3.3696e-01, -3.2565e-01,
         4.0355e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2723, -1.8851, -0.1886, -0.2576,  0.0939, -0.0412,  0.0377, -0.0985,
        -0.1347,  0.0180,  0.0109,  0.0124,  0.0077, -0.0595, -0.0331, -0.1982,
        -0.3587,  0.0267, -0.1454, -0.0260, -0.0838,  0.0397,  0.1835,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3929, -0.3243, -0.0114, -0.1644, -0.0614,  0.1016, -0.0991, -0.1876,
        -0.3373, -0.0590, -0.0151, -0.1209, -0.0012, -0.0715, -0.8265,  0.0399,
         0.0239, -0.1534, -0.1695, -0.1113,  0.0326,  0.0147,  0.0048,  0.0285,
         0.0427, -0.0234, -0.2590, -0.3298, -0.0896, -0.1485, -0.0566,  0.0963,
         0.0047, -0.0228, -0.0835,  0.0498,  0.0035,  0.1933, -0.0069,  0.0760,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0879, -0.1636, -0.0792, -0.0491, -0.3287, -0.0539, -0.0952, -0.3043,
         0.0276, -0.0029,  0.0083, -0.0456, -0.1343, -0.0362, -0.1251, -0.1259,
        -0.1866,  0.0377,  0.0220, -0.0711, -0.0254, -0.2775, -0.1610, -0.0502,
        -0.0159, -0.0326, -0.0267,  0.0096,  0.0054, -0.1113, -0.3209,  0.0728,
        -0.1838, -0.0396,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0924,  0.0717,  0.0609,  0.0080,  0.4291,  0.0251, -0.0199, -0.0477,
         0.0032, -0.0944,  0.0099,  0.0720, -0.2714,  0.1180,  0.9088, -0.0530,
        -0.1293,  0.0439,  0.3850,  0.0051, -0.0108,  0.0085,  0.0248, -0.0789,
         0.2087,  0.4175,  0.0050,  0.0602,  0.0544,  0.0822, -0.0037, -0.0038,
        -0.1128,  0.0115,  0.1571, -0.0132,  0.0781,  0.1144,  0.0032,  0.0402,
         0.0604,  0.0399,  0.1685,  0.0618, -0.0204, -0.0017, -0.0622, -0.0296,
         0.0434,  0.0606,  0.0114, -0.0822], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2547,  0.0344,  0.1191,  0.1855,  0.4096,  0.0599,  0.0739,  0.0327,
        -0.0156, -0.0142, -0.0202, -0.0773, -0.0503,  0.4463,  0.2177,  0.3715,
         0.6479,  0.4899,  0.2406,  0.0415,  0.1626,  0.0322,  0.2612,  0.6107,
        -0.0258, -0.0122,  0.0873, -0.1423, -0.0677,  0.0285, -0.1712,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1529e-01,  6.1883e-01, -7.0160e-02,  4.3476e-02, -4.0034e-02,
        -8.5873e-03,  5.4121e-02,  6.4060e-04,  1.4820e-02, -1.2309e-01,
         6.6613e-02,  1.9938e-01, -2.0418e-01, -3.0689e-03,  2.3808e-01,
         5.0227e-01,  9.6874e-02,  2.7761e-01,  8.4674e-01,  4.4918e-01,
         2.3026e-01,  1.5640e-01,  1.3040e-01, -2.5730e-01, -8.5393e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8179,  0.2347, -0.0520,  0.0255,  0.0832,  0.7779,  0.8809, -0.0267,
        -0.2798, -0.0244,  0.1460,  0.2959,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1181, -1.5530, -0.0961, -0.1743, -0.1303, -0.1572,  0.1453,  0.0047,
        -0.0745, -0.1140,  0.0441,  0.0602, -0.2472, -0.3872, -0.0704, -0.2486,
        -0.0813,  0.0778,  0.0231, -0.0207, -0.0813,  0.0169, -0.0470,  0.0698,
         0.0533, -0.5157, -0.0071,  0.0850,  0.0455,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2143, -1.2939, -0.1072,  0.1228, -0.2596, -0.3693, -1.0061,  0.0629,
        -0.0545, -0.1190, -0.0460, -0.2542,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0883, -1.4065, -0.3730, -0.5702,  0.0451, -0.5189, -0.6879, -0.0240,
        -0.1955, -0.1189, -0.0046, -0.0213,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2032,  0.2327, -0.4755,  0.0977, -0.3814,  0.0391,  0.0537, -0.1198,
        -0.8275, -0.0582, -0.1241, -0.3924, -0.0683, -0.1139, -0.0533, -0.1886,
         0.0341, -0.0405, -0.1895,  0.2647,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1047, -1.3769, -0.0464, -0.1737, -0.3098, -0.2752, -0.0652,  0.1003,
        -0.0062,  0.0391, -0.0262, -0.2071, -0.1167, -0.1174,  0.0326, -0.0786,
        -0.2546, -0.0470,  0.0292, -0.0169, -0.0314,  0.1065, -0.0361, -0.0334,
        -0.0761,  0.1046, -0.0520, -0.1881, -0.1470,  0.0219,  0.0849, -0.0172,
        -0.1089, -0.1185, -0.0628, -0.0713,  0.0574,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5532,  1.9109, -0.1083, -0.0596,  0.1720, -0.0605, -0.0273, -0.0180,
         0.0610,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2722,  0.2556,  0.1883, -0.4472, -0.0839,  0.0210, -0.2358, -0.2320,
        -0.0594,  0.0203, -0.0743, -0.0725, -0.2577, -0.5100,  0.0494, -0.0885,
         0.0987,  0.1385, -0.2849, -0.1561, -0.1142,  0.0736, -0.1138,  0.0445,
        -0.0116,  0.0810, -0.1698,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4605,  0.8896, -0.0379,  0.3040,  0.0043,  0.1079,  0.0226,  0.0226,
        -0.0044,  0.0888,  0.0023,  0.2963,  0.1579,  0.0440, -0.0388, -0.0737,
         0.3294,  0.0019, -0.0823,  0.0844, -0.0212,  0.3024,  0.2254, -0.1308,
         0.0487, -0.0519, -0.0195,  0.0649,  0.4148,  0.0544,  0.0444, -0.0043,
         0.0224, -0.4245,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.2114, -0.0218, -0.0318, -0.0349, -0.0935, -0.0464,  0.0524, -0.0374,
         0.0149,  0.0467, -0.0653, -0.4753,  0.0545, -0.0442, -0.0365, -0.3171,
        -0.7157, -0.1854, -0.1592,  0.0576,  0.0229, -0.0017, -0.1559, -0.0409,
        -0.0103, -0.0323, -0.0071, -0.0365,  0.0214, -0.0348, -0.1088, -0.1932,
        -0.3702, -0.0753, -0.1571, -0.1053, -0.1642, -0.1061,  0.0265, -0.0688,
        -0.0515, -0.1306,  0.1308,  0.0315,  0.0413,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3021,  0.0779, -0.0386,  0.0628, -0.3832, -0.6084, -1.0491, -0.0365,
         0.0470, -0.5687, -0.1358, -0.0104, -0.3465,  0.1519, -0.1218, -0.1557,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0858, -0.0346, -0.0147, -0.1421, -0.0666, -0.1359, -0.0095, -0.1635,
        -0.8084, -0.5764, -0.1483, -0.3548, -0.0541, -0.1789, -0.2275, -0.0051,
        -0.0979, -0.0222,  0.0428, -0.1077,  0.0180,  0.0144, -0.0118, -0.0915,
        -0.0993,  0.0269, -0.0576,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9496e-01,  3.4623e-02, -2.8560e-01, -2.8067e-02, -5.8856e-02,
        -5.0406e-03, -6.0812e-02, -1.7943e-02, -7.4645e-02, -2.9760e-02,
         1.1532e-02, -6.2633e-02, -6.9809e-02, -3.2648e-02, -5.6186e-02,
        -1.3793e-02, -1.5877e-01,  3.4086e-04, -4.7864e-02, -7.4503e-02,
         1.4071e-01, -9.1417e-02, -2.1811e-02, -2.4397e-01, -2.5723e-02,
         2.5594e-02, -1.5577e-01, -3.4712e-01, -5.9546e-04, -3.3829e-01,
         4.9446e-02, -1.2591e-01, -1.0673e-01,  2.0692e-02, -3.1422e-02,
        -8.7448e-02, -4.2970e-02, -1.8560e-01, -3.3413e-02,  1.1524e-02,
        -5.1374e-02, -1.2634e-01, -8.0325e-02,  3.9188e-03,  5.3855e-03,
         7.6361e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0456,  0.1136, -0.0728, -0.1054, -0.0952, -0.1831, -0.0633, -0.3798,
        -0.2142, -0.0657, -0.1087, -0.0423, -0.0591, -0.0303,  0.0232, -0.0871,
         0.0557, -0.1446, -0.5056, -0.7133, -0.0678, -0.3749, -0.0701, -0.1437,
         0.0281, -0.0207, -0.2701,  0.4943, -0.1132, -0.0466,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0416, -0.1003,  0.1896,  0.2387,  0.1006,  0.3595,  0.9837,  2.0635,
         0.2490,  0.3164, -0.1343,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1158, -0.5630,  0.1077,  0.2795, -0.3526, -0.0721, -0.4710, -0.1545,
         0.3380,  0.6363,  0.8226,  0.8096,  0.9549, -0.2201, -3.0135, -1.2855,
        -0.9796, -0.4721,  0.3041, -3.1336,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1960,  3.7212,  0.7311,  0.2149, -0.3798,  0.4446,  0.2805, -0.1869,
        -0.1002,  0.5525,  0.4983,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2247,  1.2292,  0.2558,  0.5261, -0.4647, -0.2736,  0.0475,  0.1921,
         0.3805,  0.1509,  0.0766,  0.1989, -0.1534,  0.0273,  0.0471,  0.1097,
         0.0110, -0.0364,  0.0254, -0.2168,  0.0210,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1722,  1.0031,  0.6865, -0.0193,  0.0068,  1.6852, -0.2926, -0.0737,
         0.1144,  0.2087,  0.0405,  0.1115,  0.0336,  0.0200,  0.0197, -0.0187,
        -0.3267,  0.3159,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0100, -0.2236, -0.8751,  0.1798,  0.1413,  0.1440,  0.0223,  0.0402,
        -0.0237, -0.0326,  0.0021,  0.0927, -0.1245, -0.3506, -0.5860,  0.0219,
        -0.0617, -0.2879, -0.0628,  0.0749, -0.0428, -0.1377, -0.3485, -0.0251,
        -0.1115,  0.0936, -0.1493, -0.0649, -0.1091, -0.0706,  0.0402, -0.0385,
        -0.0094,  0.0475, -0.0673, -0.0112,  0.0363,  0.0502, -0.1002,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1466e-01,  2.0692e+00, -4.7321e-02,  5.7242e-01,  3.8620e-03,
         6.8477e-02, -8.2418e-02,  1.6234e-01, -2.9873e-02,  4.6524e-02,
         7.9083e-02,  4.1008e-01, -1.0804e-01, -7.7510e-02, -2.3328e-01,
        -2.8823e-02,  5.5470e-01,  3.2967e-02,  1.3898e-02,  6.1707e-03,
         3.0053e-02, -9.3183e-02, -6.3519e-02, -2.9694e-02, -4.9898e-02,
         5.9472e-02,  1.2319e-01, -8.0770e-04,  3.4782e-01,  6.9267e-01,
         6.4879e-03,  6.2803e-02,  2.8065e-02,  1.4851e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.6178, -1.2874, -0.7625, -0.7958,  0.1460, -0.0990,  0.1688, -0.1221,
        -0.2113,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9330e-02, -7.6272e-01,  7.2747e-02, -2.2797e-01, -4.9824e-02,
        -3.3545e-01,  1.2292e-02, -8.3348e-02,  5.6569e-02,  3.8836e-02,
        -1.5054e-01, -4.2066e-02,  3.9789e-02, -5.0538e-02, -1.0081e-02,
        -3.0579e-01, -2.1492e-02, -4.4225e-01,  3.0791e-02,  1.0218e-01,
        -1.5064e-02,  6.2147e-02,  1.1689e-02,  2.8167e-02, -2.3201e-01,
         3.0371e-02, -6.8633e-02,  1.4352e-04,  7.2251e-02,  3.8203e-03,
        -5.8780e-02, -2.4118e-01, -1.6021e-02, -1.5730e-01, -5.6480e-02,
         3.2150e-02,  2.2570e-02, -1.1762e-01,  1.1660e-01, -6.8439e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4546e-01, -4.9376e-01, -7.9662e-01, -5.0922e-02, -2.7816e-01,
        -3.2792e-02,  2.7976e-02, -1.0751e-01, -2.0001e-01, -3.3226e-01,
         9.0434e-03, -9.3204e-03,  1.0831e-02,  1.9231e-02,  2.0167e-02,
         9.7816e-02,  4.7945e-02,  2.4947e-01, -1.7243e-01,  1.6824e-02,
        -1.0320e-01,  9.1402e-03, -1.8995e-02, -3.2726e-02,  6.6947e-02,
        -7.7360e-02, -1.7081e-01,  8.9338e-02, -1.6406e-01, -4.1493e-01,
         1.1115e-02, -7.5618e-02,  3.4848e-02, -1.5684e-02,  4.2101e-02,
         4.2809e-02, -1.4543e-01, -3.4722e-01,  3.4271e-02, -1.3176e-04,
         1.2080e-01,  7.0972e-02, -4.9871e-03, -8.1214e-04, -1.8392e-02,
        -1.2097e-02, -1.5121e-02,  5.7242e-02, -1.2723e-03,  6.9891e-02,
        -7.5848e-02,  4.9534e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1250, -1.9900, -0.1740, -0.0945, -0.1011, -0.1610, -0.3019, -0.0624,
        -0.1327,  0.0051,  0.0378, -0.0743, -0.4150,  0.0330, -0.0686, -0.5009,
        -0.3230,  0.0315, -0.2445, -0.0723,  0.0407,  0.3545,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4738,  2.6019, -0.0134,  0.0628,  0.0603,  0.0914, -0.0496,  0.3255,
         0.1508,  0.2876,  0.0984,  0.1435,  0.0500,  0.1737, -0.0254,  0.4171,
         0.0620, -0.1216, -0.1189,  0.1231,  0.0517,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4091e-02, -1.0524e+00, -4.9179e-02, -1.1821e-01,  7.9925e-02,
        -1.7745e-02, -1.0240e-02,  1.6692e-01, -2.1985e-02,  7.4650e-02,
         8.7685e-03, -6.0683e-03, -7.0679e-03, -9.4775e-02,  2.0219e-02,
        -3.6937e-03,  5.1236e-02,  7.1826e-02, -6.7072e-02,  2.1948e-02,
         4.0468e-02, -4.2067e-02, -3.3926e-02,  4.2531e-03,  2.9956e-02,
         9.5069e-02, -6.2124e-02, -1.4050e-01,  2.9222e-02, -1.4196e-01,
        -4.0405e-01, -3.7867e-01, -1.4729e-04, -5.6850e-02, -1.1129e-01,
         3.5747e-03,  3.0426e-02,  2.1909e-02,  5.9159e-04,  9.7129e-03,
        -2.2230e-01,  1.7505e-02, -2.1717e-02,  4.9411e-02, -9.7925e-02,
         4.9993e-02, -1.0659e-01, -2.7029e-02,  2.7469e-02,  3.3782e-02,
        -5.8603e-02,  4.9352e-02,  8.0383e-02,  1.3507e-01, -1.7988e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0504e-02, -7.9806e-03, -2.1392e-02, -1.0420e-01, -1.1926e-01,
        -1.8841e-01, -4.1024e-01, -3.9912e-02,  5.9550e-02, -2.3281e-02,
        -6.1431e-02,  5.9174e-03, -3.9692e-02, -8.4866e-02, -1.2971e-01,
         3.6077e-04,  8.5772e-04, -6.7722e-02, -1.2710e-02, -2.5894e-02,
        -3.4403e-02, -2.7756e-02, -2.2435e-01,  4.5595e-02,  4.0512e-02,
         2.5072e-02, -6.3372e-02, -3.6731e-01, -5.2930e-02, -2.3742e-02,
         6.4155e-02,  4.2566e-02, -9.2176e-02, -5.9509e-01, -5.3345e-03,
        -1.9325e-01,  2.2489e-02, -1.6914e-01, -1.9535e-01, -1.3073e-01,
        -4.9208e-02, -4.0282e-02,  2.7439e-02, -5.1568e-02, -1.9050e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0496, -0.9809, -0.3939, -0.4917,  0.0703, -0.0500, -0.0639, -0.0449,
        -0.3024,  0.0556,  0.0401, -0.2362, -0.1121, -0.3276, -0.3096,  0.1924,
        -0.1777, -0.1565, -0.0392, -0.0393, -0.3949,  0.0693, -0.1796,  0.1973,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2739, -0.0232,  0.0051,  0.1455,  0.0695,  0.0168, -0.1848, -0.1159,
         0.0840,  0.0046,  0.0290, -0.0121, -0.0637, -0.1754, -0.0288, -0.0507,
        -0.2537, -0.1680, -0.1952, -0.2626, -0.0366, -0.0196, -0.1892, -0.2424,
        -0.0301, -0.2471, -0.1659, -0.1080, -0.1278, -0.2122, -0.0423,  0.0437,
        -0.1690,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4456, -1.8811, -0.2646, -0.4600, -0.0470, -0.2543, -0.1793,  0.0746,
        -0.4789, -0.0377,  0.0451, -0.0802,  0.2219, -0.0972,  0.0679, -0.0364,
        -0.1535, -0.1541, -0.0199,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0396, -0.0803,  0.2219, -0.0960, -0.2681, -0.0464, -0.3210, -0.2509,
        -0.5025,  0.0796,  0.1025, -0.0911,  0.1050, -0.1604,  0.0043, -0.0254,
        -0.1035, -0.1878, -0.1003, -0.0853, -0.1730, -0.3990, -0.0915, -0.0713,
        -0.3160,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2127, -1.2633, -0.4694, -0.1922, -0.2092, -0.4707, -0.9375,  0.1545,
        -0.1016, -0.1094, -0.0443,  0.0477, -0.1996, -0.1168,  0.0691,  0.1052,
         0.2801,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.0144, -0.0420, -0.1305, -0.0053, -0.0931, -0.0568, -0.0230, -0.0312,
         0.0602, -0.2904, -0.1420, -0.1771,  0.0524, -0.0052,  0.1026, -0.1341,
        -0.3185, -0.6486, -0.0630, -0.2565, -0.0034, -0.7538, -0.6653, -0.0504,
         0.0211, -0.1138,  0.0549, -0.0182,  0.0905,  0.0572,  0.1062, -0.0108,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1719, -0.7265, -0.5888, -0.2168, -0.0565,  0.0124, -0.3807, -0.5726,
         0.0064, -0.0364, -0.0024, -0.0437, -0.0991, -0.0850, -0.0374,  0.1485,
        -0.0067, -0.0373, -0.3577, -0.0943, -0.1527,  0.0039, -0.1878, -0.3871,
        -0.0508, -0.0546,  0.0373,  0.0505,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3464,  0.6871,  0.0595, -0.1824,  0.1842,  0.2562,  0.5848,  0.2042,
         0.0139,  0.2626,  0.0522,  0.2371, -0.0542,  0.1345, -0.0339,  0.2969,
         0.0677,  0.3865,  0.0639, -0.0734,  0.0254,  0.0072,  0.0674, -0.0371,
         0.1856,  0.0487,  0.0608, -0.0419, -0.2157,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0555, -0.2011, -0.1496, -0.2007, -0.7084, -0.1210, -0.1464, -0.0242,
        -0.0630,  0.0343, -0.0010, -0.1218, -0.1669, -0.5791,  0.2465, -0.3029,
         0.0013, -0.4079, -0.1081, -0.0782,  0.0122,  0.0646, -0.1697, -0.0908,
        -0.0660, -0.0306, -0.0587,  0.0060, -0.0083, -0.0365,  0.0182,  0.0990,
        -0.0571,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0798, -1.2996, -0.0075, -0.1075,  0.0683, -0.2147,  0.2198, -0.0486,
        -0.1585, -0.2073, -0.2408, -0.0542, -0.0257, -0.0119, -0.0343,  0.0393,
        -0.1398, -0.0086, -0.3385,  0.1006, -0.1872, -0.1824, -0.1445,  0.3189,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5296, -0.9473, -0.6876, -0.0840, -0.2266, -0.2802, -0.1416, -0.2782,
         0.1824, -0.2866, -0.1331, -0.1121, -0.1237, -0.0530, -0.0048, -0.0024,
         0.0156, -0.1465, -0.0583, -0.7597, -0.1911, -0.0434,  0.2187,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6944e-01, -1.4779e+00, -1.1955e-01,  4.3961e-02, -1.3571e-01,
        -3.2955e-01,  1.1538e-01,  8.4218e-02, -5.6882e-02, -1.3803e-01,
         5.3138e-02, -5.3719e-03, -4.7533e-02,  1.5668e-01, -2.4411e-03,
         9.7439e-02, -2.0520e-01, -1.0102e-01, -3.3465e-02,  1.5521e-01,
         2.6590e-02, -1.3545e-03,  2.2844e-03, -5.2723e-02, -1.4399e-01,
        -2.4955e-02, -8.7732e-03,  9.7366e-03, -4.8968e-02, -3.9206e-01,
        -4.2626e-02, -1.0544e-01, -1.3930e-01, -2.4879e-02, -4.4459e-02,
        -2.2896e-01, -1.9173e-01,  1.0351e-01,  2.1381e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7274e-01, -3.7675e-01,  9.7593e-02, -9.1421e-02, -6.1195e-01,
        -4.6229e-02,  4.4685e-02,  3.1887e-02,  5.6898e-02,  7.7310e-02,
         4.7667e-04, -2.7264e-02,  1.8081e-02, -4.1767e-02, -2.6390e-02,
        -3.5974e-01, -1.8981e-01,  3.0335e-02, -1.3584e-02, -1.7479e-01,
        -3.4854e-01, -5.4076e-02, -1.3291e-01, -1.3492e-01, -2.5568e-01,
        -1.3765e-01, -5.2535e-02, -4.2918e-03, -6.4888e-02, -1.0654e-01,
        -1.5284e-01, -2.2843e-02, -4.2749e-02, -5.6391e-02,  1.2429e-02,
         1.7195e-02, -6.0642e-02, -2.8560e-01, -2.1069e-02, -5.2164e-02,
        -7.0421e-02,  4.5672e-02,  6.7627e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2916, -1.3872,  0.0848,  0.0929, -0.0791,  0.1460,  0.0650, -0.0231,
        -0.3454,  0.0499,  0.2577, -0.0187, -0.1330, -0.0152,  0.0270,  0.0629,
        -0.2341, -0.4189, -0.0589, -0.0407,  0.0306, -0.0505, -0.0812,  0.0126,
         0.0262,  0.0191,  0.0598,  0.0161,  0.0239,  0.0622,  0.2153, -0.1781,
        -0.4566,  0.1470, -0.0742, -0.0067, -0.0484, -0.0113, -0.1784, -0.0817,
        -0.1761, -0.3194, -0.0267,  0.0918,  0.0289, -0.0772, -0.0050, -0.0054,
        -0.0192,  0.0168,  0.0579, -0.0174, -0.0424,  0.0298, -0.0247,  0.1961,
        -0.1275], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1224,  2.1578,  0.2850, -0.0846,  0.2429, -0.0077,  0.2017,  0.5382,
         0.0252,  0.0586,  0.2963, -0.2086, -0.2429, -0.0680,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2191, -0.7683,  0.2536,  0.0938, -0.0338, -0.2483, -0.1122, -0.0164,
        -0.0967, -0.0108, -0.2541, -0.5567,  0.1481, -0.0783, -0.1149,  0.0157,
        -0.0354, -0.0514,  0.1049, -0.2266, -0.4307, -0.0542, -0.1249,  0.0037,
         0.1175,  0.0631,  0.0119, -0.0447, -0.0228, -0.0559,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5633,  0.5472,  0.2168,  2.7913,  0.6268,  0.4852,  0.4030, -0.2746,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.3222, -0.6600,  0.0653, -0.0838, -0.2361, -0.3936, -0.0625,  0.1014,
        -0.0395, -0.0607,  0.0543,  0.0317, -0.1757, -0.3632, -0.0378,  0.0025,
         0.0642, -0.0207, -0.0087,  0.0438, -0.0234,  0.0173,  0.0503,  0.0545,
        -0.1545, -0.1284, -0.2117, -0.0583, -0.3570, -0.3814, -0.0480, -0.0157,
        -0.1074,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3845, -1.6787, -0.6704, -0.4219, -0.0209, -0.0141, -0.1690, -0.0775,
        -0.0453,  0.0140, -0.0590,  0.0279, -0.0058, -0.0366, -0.0214, -0.1074,
         0.0315, -0.0164, -0.0228,  0.0442,  0.1052, -0.0326,  0.0144,  0.0587,
         0.0514, -0.0743, -0.1137, -0.1487, -0.0070,  0.0444, -0.0352, -0.0601,
        -0.2245, -0.2436, -0.0186, -0.0967, -0.1392, -0.0495, -0.0261,  0.0231,
         0.0381, -0.0522, -0.1662, -0.0997, -0.0161, -0.0642, -0.0494,  0.1324,
        -0.0602,  0.0320, -0.1786, -0.0339,  0.0090, -0.0590, -0.0049, -0.0059,
        -0.0571, -0.1286, -0.0294, -0.1790, -0.0521, -0.0180, -0.1774,  0.0669,
        -0.0188,  0.0023, -0.0475, -0.1153,  0.0285, -0.4202, -0.3440],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4079, -0.0479, -0.1291, -0.0141, -0.2298, -0.0568, -0.5746, -0.0675,
         0.0383,  0.0462, -0.0740, -0.0092,  0.0092, -0.0725, -0.0761, -0.0332,
        -0.0705, -0.0354, -0.0709, -0.3744, -0.1463, -0.0161,  0.1264, -0.1964,
         0.0703, -0.1653, -0.1024, -0.0337, -0.0579, -0.0854, -0.0586, -0.1111,
        -0.2073, -0.2132, -0.0880, -0.0911, -0.2533, -0.0243, -0.0127, -0.0766,
        -0.0985,  0.1081,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3164, -0.9324,  0.1036,  0.0409, -0.0234,  0.0882,  0.1142,  0.0263,
        -0.1628, -0.3100, -0.1722, -0.1845, -0.3468,  0.0051,  0.0116, -0.0367,
         0.0574, -0.2793, -0.2925, -0.0272, -0.0134, -0.0838, -0.0119, -0.0931,
         0.0275, -0.5238, -0.2683, -0.0908, -0.2990, -0.0140,  0.0686, -0.0523,
        -0.0346, -0.0487, -0.0683,  0.0125, -0.2642,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5369, -1.9246,  0.2206, -0.1691,  0.0408,  0.2964, -0.0839,  0.0708,
        -0.1304, -0.0726, -0.0635,  0.0278, -0.2107, -0.0950,  0.1914,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1903, -0.3141,  0.1439,  0.1393, -0.4958, -0.6194, -0.0815, -0.4023,
        -0.7138, -0.1349,  0.1224, -0.1452, -0.1314, -0.1250,  0.4767,  0.1636,
        -0.1422, -0.0112,  0.0797,  0.3515,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0541,  0.0133,  0.0879, -0.0724, -0.5064, -0.0841, -0.0512, -0.0359,
        -0.0569, -0.0267, -0.1504, -0.2059, -0.1063, -0.0395, -0.1417, -0.1041,
         0.0128, -0.1069,  0.0707, -0.1613, -0.0061, -0.0679, -0.0560, -0.0017,
        -0.0511, -0.1814, -0.1596, -0.0207, -0.0136, -0.1527, -0.2000, -0.0634,
        -0.0477, -0.0410, -0.1281, -0.2201, -0.0255, -0.1174,  0.0179, -0.0040,
         0.0296,  0.0750, -0.1642,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6415,  0.1637, -0.0910,  0.0176, -0.1389, -0.2119, -0.3145, -0.6747,
        -0.1044,  0.0024, -0.0103, -0.0747, -0.0689, -0.4562, -0.4709, -0.0402,
         0.0538, -0.0100, -0.0590,  0.0351, -0.3311, -0.0766, -0.1563,  0.0857,
         0.0448, -0.0639,  0.0031, -0.0655, -0.2659, -0.0398, -0.0186, -0.1925,
        -0.0763, -0.2444, -0.1842, -0.0049,  0.1153,  0.1432,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3005, -1.3583, -0.3503, -0.3883,  0.0424, -0.3146, -0.0626,  0.1874,
        -0.1327, -0.4404, -0.0959, -0.0415, -0.2120,  0.0197,  0.0320, -0.1254,
        -0.0581, -0.3486,  0.0649,  0.1259, -0.0198, -0.0028,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6578, -0.7989, -0.0470,  0.0128,  0.0394, -0.0416, -0.0274,  0.1504,
         0.0184, -0.6600, -0.0943,  0.0079, -0.0435,  0.1379, -0.1271, -0.2936,
        -0.5786, -0.0085, -0.1379, -0.1563,  0.0808, -0.0215, -0.0944, -0.1008,
        -0.0542,  0.0530,  0.0073,  0.0179,  0.0397, -0.2177,  0.0040,  0.0371,
         0.0471,  0.0365, -0.0661, -0.0905,  0.1179, -0.0090, -0.0459, -0.0370,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1296,  0.1557, -0.0630,  0.0040, -0.1240, -0.0647,  0.0617, -0.0387,
        -0.3185,  0.0023, -0.1514, -0.0726, -0.0662, -0.4920, -0.5431, -0.0617,
        -0.0216,  0.0671, -0.2045, -0.0904, -0.0865,  0.0702, -0.1264,  0.0034,
        -0.0615, -0.3129, -0.0931, -0.2318, -0.3162,  0.0140,  0.1077, -0.1633,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3207, -1.6496, -0.0661,  0.0564, -0.1014,  0.0182, -0.1446, -0.5728,
        -0.6463, -0.1035, -0.0969, -0.2762, -0.1140, -0.0890,  0.1089, -0.0723,
        -0.2758, -0.0295, -0.0215,  0.0027,  0.0067,  0.0556, -0.0222, -0.3193,
        -0.0197,  0.0120, -0.1812, -0.0237,  0.0122, -0.1125, -0.0151,  0.0148,
        -0.0848,  0.0192, -0.1558,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.4967, -0.4284, -0.3335, -0.6040,  0.0282,  0.0485, -0.6911, -0.4487,
        -0.3871, -0.6271, -0.1226, -0.1860,  0.0216, -0.1850, -0.1697, -0.0640,
        -0.2613, -0.0785,  0.0263,  0.1964,  0.2498,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5639, -1.5836, -0.3885, -0.3771, -0.3870, -0.4808, -0.0106, -0.3591,
        -0.4014,  0.0358,  0.0742,  0.0173, -0.0297,  0.0744, -0.1735,  0.0449,
        -0.1512, -0.0285,  0.3212,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0501, -0.8541, -0.5529, -0.0442, -0.2051, -0.0059,  0.0065, -0.1754,
        -0.0122, -0.5503, -0.8125, -0.1793, -0.1457, -0.5044,  0.0474, -0.1558,
         0.0110, -0.2294, -0.0078, -0.1117,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5556e-01, -1.5466e+00, -5.9133e-02, -3.6947e-01, -8.8407e-04,
        -5.0144e-02, -4.1146e-02, -2.3314e-01, -4.2059e-02,  1.8276e-02,
        -3.2473e-02, -3.3123e-02,  1.5393e-02, -1.8361e-01, -6.9442e-02,
        -1.6158e-01, -3.1152e-01,  4.4140e-04, -1.5589e-01, -2.6200e-01,
         7.9506e-02,  4.0693e-01, -8.9972e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2064, -1.0721, -0.3802, -0.0519,  0.0367, -0.2921, -0.1384, -0.3390,
        -0.4761, -0.2103, -0.1790, -0.3815, -0.2057, -0.3123, -0.0611, -0.1420,
         0.0103, -0.0418, -0.0772,  0.1537, -0.0254,  0.0175,  0.0169, -0.4204,
         0.3293,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5370, -1.5386, -0.2004,  0.1815,  0.1865,  0.0229, -0.3960,  0.3139,
         0.7544, -0.8122, -1.2085, -0.0579, -0.1204, -0.3038,  0.0484, -0.4333,
         0.0836, -0.0087,  0.0350, -0.0693, -0.3646, -0.2517, -0.2791, -0.1417,
        -0.0725, -0.2319, -0.2134,  0.0977, -0.2910,  0.2799, -0.0161, -0.0353,
        -0.2104, -0.2346,  0.0466,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2195,  0.0191,  0.1573, -0.2567, -0.0356, -0.0626, -0.3240, -0.0824,
        -0.4542, -0.0655, -0.0486, -0.0089, -0.0779, -0.3583, -0.4245, -0.9202,
        -0.2700, -0.1854, -0.1661, -0.0077,  0.0368,  0.1050,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0776, -0.2845, -0.0303,  0.1291, -0.0518,  0.1137, -0.0107,  0.0871,
         0.2143,  1.3821,  0.4910, -0.2839,  0.2519,  0.3418,  0.3028,  0.5538,
         0.1228,  0.5723,  0.0144, -0.4643,  0.5630,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1996, -2.0253, -0.0362, -0.0872, -0.0215,  0.1285, -0.2581, -0.4924,
        -0.0185, -0.1039, -0.0579, -0.0087,  0.0748, -0.2870,  0.0255, -0.2190,
         0.0396,  0.0996, -0.2836,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2523, -0.1055, -0.1231,  0.0386, -0.0480, -0.2035, -0.0235,  0.0133,
         0.0433, -0.0120, -0.1100,  0.0031,  0.0038, -0.0379,  0.0198,  0.0372,
        -0.0537, -0.2091, -0.0135, -0.1483, -0.1115, -0.2246, -0.1823, -0.0105,
        -0.0947, -0.0985, -0.0059, -0.2368, -0.0438, -0.5222, -0.0330, -0.1244,
        -0.2737, -0.4346,  0.0503, -0.0067,  0.0107,  0.0262,  0.0555,  0.0384,
         0.1132,  0.1189, -0.0022,  0.0151,  0.0088,  0.0641,  0.0350],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1612, -0.8795, -0.3323, -0.0011,  0.0506,  0.0886, -0.1012,  0.0976,
        -0.2036,  0.0159,  0.0413, -0.0117,  0.1740, -0.0844,  0.0744, -0.0492,
        -0.4499, -0.0712, -0.0260, -0.0956,  0.0439, -0.3630, -0.5437, -0.6570,
        -0.0789,  0.2185,  0.0158, -0.0458,  0.0350,  0.0238,  0.0695,  0.1875,
        -0.1737,  0.2554,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4365, -1.5053,  0.0684,  0.0898,  0.0061, -0.1261,  0.1147,  0.1213,
        -0.0475, -0.1060,  0.0752,  0.0108, -0.2785, -0.3240, -0.0035, -0.0640,
        -0.0092,  0.0361, -0.1228, -0.2334, -0.0649, -0.2880, -0.0420, -0.1954,
        -0.0443, -0.0145, -0.0569, -0.1064, -0.0338, -0.1038, -0.0492,  0.0450,
         0.0827,  0.1185,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.0885, -2.2773, -0.1596,  0.1077, -0.1942, -0.4541, -0.0084,  0.3062,
         0.1397, -0.5753,  0.0430, -0.2440, -0.2234, -0.6037, -0.1211, -0.2195,
         0.1032, -0.1263,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2301, -0.0067, -0.0436, -0.1494, -0.0575,  0.1081, -0.0227, -0.1436,
        -0.8298, -1.5131, -0.1618, -0.3151, -0.0826,  0.0149,  0.1441,  0.0365,
        -0.0712,  0.0250, -0.0089, -0.7075,  0.1670, -0.0792, -0.0850, -0.0653,
        -0.0816,  0.0290, -0.4506,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0270, -0.0095, -0.0728,  0.0843, -0.1920, -0.0926,  0.0463,  0.1112,
         0.7033,  0.0367, -0.0334,  0.1937,  0.9815,  0.3082,  0.0796, -0.1537,
         0.0832, -0.0413, -0.0796,  0.9910,  0.1603, -0.0521,  0.0549,  0.0432,
         0.0049, -0.1403,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2887, -1.9983,  0.1494, -0.5309, -0.5708, -0.0537, -0.1756, -0.0129,
        -0.2544, -0.2552, -0.4229, -0.2340, -0.1234, -0.0781, -0.2479, -0.2274,
        -0.0807, -0.0615, -0.1013, -0.1752,  0.0353,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3202, -1.2086, -0.9010, -1.0350, -0.3014, -0.0805, -0.3353,  0.0743,
         0.0028,  0.0146,  0.0532, -0.1273,  0.0433, -0.0149, -0.1734,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5308, -0.0383,  0.0802,  0.1788,  0.1504, -0.0348,  0.0240, -0.1276,
         0.0033, -0.3774, -0.0716,  0.1114, -1.0037, -0.2951,  0.1428, -0.2795,
         0.2562,  0.0646,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1942, -2.1741, -0.6709, -0.7927, -0.4116, -0.0800, -0.2058, -0.2520,
        -0.2004, -0.4875, -0.4124, -0.2145, -0.0852, -0.3566,  0.0279,  0.1397,
        -0.0687, -0.0250, -0.0398,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1857, -0.6296, -0.0788, -0.0108, -0.1180, -0.1179, -0.0689,  0.0396,
        -0.0421,  0.0327, -0.0033,  0.0295, -0.0127,  0.0046,  0.0028,  0.0469,
        -0.1407,  0.0397,  0.0228, -0.1068, -0.3985,  0.0031, -0.1431, -0.1040,
        -0.4149, -0.0420, -0.2475, -0.0094, -0.0580, -0.0963, -0.0105,  0.0425,
        -0.1127, -0.0442, -0.1264, -0.0654, -0.1364,  0.0301, -0.0184, -0.0378,
        -0.0066,  0.0074,  0.0112, -0.0494], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1638, -0.8685, -0.0879, -0.2120, -0.2006, -0.0876, -0.0861,  0.0252,
        -0.3981, -0.0776, -0.0351,  0.0325, -0.1085, -0.0370,  0.0806,  0.0554,
         0.0439, -0.4095,  0.0789, -0.1956, -0.1500,  0.0218,  0.0203, -0.0266,
         0.0593, -0.0473, -0.0821, -0.5273,  0.1022, -0.0456, -0.0164, -0.0032,
        -0.1075,  0.0837,  0.1850,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0623, -2.2979, -0.1629, -0.2415, -0.0389, -0.0350, -0.3253, -0.2028,
        -0.0617,  0.0246,  0.0340, -0.1187, -0.4062,  0.0811,  0.0143, -0.3540,
        -0.0771, -0.0208,  0.0745,  0.0416,  0.0115, -0.0191,  0.3767,  0.8799,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2073, -1.0674, -0.0880, -0.1652, -0.0377, -0.0130, -0.0668, -0.2983,
        -0.4118, -0.1065, -0.1882, -0.1368,  0.1061, -0.1909, -0.2371, -0.0016,
         0.0901, -0.0508, -0.2242, -0.0151, -0.0587, -0.2349, -0.0624, -0.2040,
         0.0201, -0.2102, -0.1513, -0.1264, -0.0803, -0.1364, -0.1455, -0.0318,
        -0.1864, -0.4011,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0207,  0.1240, -0.0435, -0.1243,  0.0448,  0.0306, -0.7728, -0.1568,
         0.0909, -0.0811, -0.0009, -0.1646, -0.2334, -0.0703, -0.4654, -0.8556,
        -0.0037, -0.0585, -0.0314, -0.0513,  0.0422, -0.0084, -0.0840, -0.2287,
        -0.1089, -0.0698, -0.1706, -0.0131,  0.0359, -0.0337,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.2905,  0.0257, -0.1590, -0.8189, -0.1000, -0.1156, -0.1081, -0.1785,
        -0.1446, -0.4314,  0.1907, -0.2341, -0.4390,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5797,  0.0231,  0.3539,  0.3162,  0.0874, -0.3700, -0.5125, -0.3485,
        -0.8958,  0.1717, -0.0515, -0.3938, -0.1533, -0.1203, -0.3992, -0.1638,
         0.2665, -0.0392,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1034, -0.6219, -0.5988,  0.0713, -0.3321, -0.1707, -0.2128, -0.2699,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1660, -0.7972,  0.0241,  0.0191, -0.1011, -0.0585, -0.0278, -0.0417,
        -0.0379, -0.0157, -0.0309,  0.0116, -0.0556, -0.0825, -0.0016,  0.0024,
         0.0343,  0.0031,  0.0160,  0.0029,  0.0456, -0.0210, -0.1206, -0.0661,
        -0.0183, -0.0863,  0.0021,  0.0190, -0.4504, -0.1887,  0.0643, -0.1965,
         0.0262, -0.4048, -0.0052, -0.2172, -0.1593, -0.0597,  0.0971, -0.0129,
        -0.1010, -0.0444, -0.0567,  0.0279,  0.1048,  0.0015,  0.0932],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3075,  1.3276, -0.5824,  1.0900,  0.3088, -0.3038, -0.3142,  0.4720,
        -0.1455,  0.5921, -0.7243,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0391,  1.6929, -0.2941,  0.5832, -0.0101,  0.1522,  0.3392,  0.6559,
         0.0913,  0.1504,  0.0034, -0.0405,  0.1165, -0.0399,  0.3284,  0.2022,
         0.1639,  0.0857,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3940, -0.5297, -2.0369,  0.4370,  0.0599, -0.1216, -0.2685, -0.1258,
        -0.4497, -0.2026,  0.0167, -0.1115,  0.1894, -0.0777, -0.1144,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3347, -1.7033, -0.0301, -0.0711, -0.1389, -0.4318, -0.7589,  0.0465,
         0.0437, -0.4103, -0.4870,  0.0023,  0.0704, -0.0197, -0.1521,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1247, -0.0006,  0.0772,  0.2371, -0.0342,  0.1323,  0.0466,  0.0173,
        -0.2544, -0.1831, -0.0661, -0.4448, -0.5668, -0.0310, -0.0217, -0.1586,
        -0.2394,  0.0979, -0.0531, -0.0272, -0.2701, -0.4062, -0.0300,  0.1487,
        -0.1330, -0.2361,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0467,  0.0113,  0.1331, -0.1631,  0.0376, -0.0816,  0.1297, -0.7744,
        -1.3225,  0.0159, -0.0427, -0.0592,  0.0602,  0.1808, -0.0673,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0479, -2.2369, -0.1100, -0.2364, -0.2326, -0.1082, -0.2147, -0.0240,
        -0.0525, -0.8529,  0.0417, -0.1081, -0.0080, -0.0411,  0.1158, -0.0884,
        -0.1013,  0.0777, -0.2386,  0.1345, -0.1368,  0.1068,  0.0683,  0.0243,
         0.0525,  0.2406,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2184, -0.0707,  0.0122,  0.0333, -0.1061, -0.1825,  0.0181,  0.0462,
        -0.0674, -0.2078, -0.6163, -0.0948, -0.0051, -0.1709, -0.5995, -0.3320,
         0.0460, -0.2031, -0.0399, -0.2114,  0.0506, -0.0671, -0.0738, -0.1185,
         0.3774, -0.0979,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.0924, -1.2996, -0.0371,  0.0031, -0.0811, -0.0030, -0.1856, -0.2557,
        -0.0089, -0.2318, -0.0370, -0.1861, -0.2955,  0.1872, -0.1190, -0.0109,
        -0.0466,  0.0267, -0.1500,  0.0052, -0.0661, -0.1164, -0.0519, -0.1997,
        -0.2914,  0.0806, -0.1656, -0.0041, -0.0301, -0.0051, -0.0210, -0.1723,
        -0.0233,  0.0507,  0.1403, -0.1111,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0302, -0.4328, -0.2080, -0.1971, -0.1490, -0.0312, -0.3214, -0.2409,
         0.0109,  0.0612, -0.1256, -0.0447, -0.0839,  0.0643, -0.0289, -0.0179,
        -0.0206, -0.0082,  0.0951, -0.0028, -0.0902, -0.1883,  0.0225, -0.0165,
        -0.2035, -0.3374,  0.0069, -0.1072, -0.1603, -0.0994, -0.0372, -0.2062,
        -0.2898,  0.0300,  0.0279, -0.0745,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2874, -0.5044, -0.2006, -0.1016, -0.0566, -0.3941, -0.2524, -0.0039,
         0.0645, -0.0822, -0.1006, -0.0894, -0.8716, -0.0459,  0.5047,  0.0095,
        -0.0904,  0.1095, -0.0588, -0.1330,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2862, -0.3513, -0.0573, -0.1291,  0.0696,  0.0919, -0.1072, -0.1585,
        -0.0530, -0.3602, -0.0673,  0.0101, -0.0108, -0.0871, -0.5244, -0.0609,
        -0.1291, -0.2061,  0.0335,  0.0155,  0.0414,  0.0453,  0.0313,  0.0726,
        -0.0661,  0.0351, -0.0979, -0.0860, -0.0923, -0.2478, -0.1937, -0.1144,
        -0.1354, -0.5929, -0.0590,  0.2620, -0.1153,  0.2206,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0844, -1.0735, -0.2915, -0.4828, -0.0095,  0.0498, -0.0741,  0.1497,
        -0.0628, -0.0891, -0.2889, -0.3375,  0.0133,  0.0611,  0.0219, -0.2716,
         0.0280, -0.2579,  0.0093,  0.0143,  0.0589,  0.0857,  0.0688,  0.0353,
         0.0987, -0.0104, -0.0352, -0.0707, -0.2530, -0.0387, -0.0653, -0.0447,
         0.0478, -0.0262, -0.0712,  0.0561,  0.0379,  0.0106,  0.0205,  0.0356,
        -0.1893], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2190, -2.3160,  0.1083, -0.0718, -0.0886, -0.4554,  0.0363, -0.1417,
        -0.1135, -0.0197, -0.0366, -0.1893, -0.2425, -0.1034,  0.0468, -0.3024,
        -0.0165, -0.3773, -0.0353,  0.0434, -0.0243, -0.0267,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1703, -0.0029, -0.0156, -0.0306,  0.0708, -0.0281, -0.0180, -0.0220,
         0.0154,  0.0580, -0.0169, -0.3742, -0.0458, -0.0356, -0.0255, -0.2835,
         0.0166, -0.1925, -0.0030, -0.2829, -0.3559,  0.0359,  0.0989, -0.1619,
        -0.0078,  0.0333, -0.0428, -0.0827, -0.5885, -0.0450, -0.0599, -0.1680,
        -0.0209,  0.2786,  0.0337,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5225,  0.1678,  0.8526,  0.2495,  0.3883, -0.0566,  0.0332,  0.0077,
         0.2036,  0.0326,  0.1347, -0.0297,  0.1062, -0.0469, -0.0370,  0.0758,
        -0.0087,  0.0107,  0.2559,  0.0362,  0.1596,  0.0376,  0.1094,  0.2416,
         0.0278, -0.0873,  0.2644,  0.0670, -0.1010,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7117, -2.4093, -0.2796, -0.1169, -0.2613, -0.2532,  0.0046,  0.0736,
         0.0422,  0.2030, -0.0789, -0.1157, -0.0764,  0.0842,  0.1478, -0.2709,
        -0.1779,  0.0867, -0.1348, -0.0713, -0.0669, -0.0808, -0.0125,  0.0232,
         0.3982,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3265,  0.1570, -0.0399, -0.0842, -0.6190,  0.3079, -0.1454, -0.4761,
        -0.7254,  0.0286, -0.0518,  0.2341,  0.0770,  0.0426, -0.3810, -0.3788,
         0.0520, -0.0432,  0.1071,  0.0113, -0.0268, -0.0745, -0.0873,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1294, -0.0380,  0.0153,  0.0163,  0.0235,  0.0671,  0.0250,  0.0088,
         0.0128, -0.0225,  0.0295,  0.0018, -0.1483, -0.5791, -0.0180, -0.0979,
        -0.0357, -0.0732, -0.0134, -0.0222,  0.0328,  0.0289,  0.0329, -0.1239,
         0.0828, -0.0647, -0.5324, -0.1417, -0.3084, -0.1696, -0.2845, -0.0570,
        -0.1165, -0.1622, -0.0628, -0.0872,  0.0113, -0.0192,  0.0598,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3285, -0.3155,  0.2330, -0.6923, -1.1304, -0.2816, -0.0776, -0.8628,
        -0.6493,  0.1998, -0.4307,  0.0980,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-8.1653e-02, -5.5020e-01, -1.0384e+00, -1.5298e-01, -1.8865e-02,
        -1.7678e-01, -2.4336e-01, -1.4858e-02,  8.4238e-02, -2.7254e-02,
         4.0432e-02,  1.6507e-02, -1.1684e-02, -4.1519e-02, -5.6757e-02,
         4.4840e-02, -3.7064e-02, -1.7587e-01, -4.6981e-03,  6.9877e-03,
        -2.8371e-02, -8.4875e-02,  3.5130e-02, -1.7822e-02, -2.6382e-02,
        -1.0817e-02, -1.1228e-02,  9.7503e-02,  4.4910e-02, -1.0564e-01,
        -4.7670e-02,  1.1631e-01, -2.4706e-01, -2.0529e-02, -1.4195e-02,
        -2.7676e-02, -1.1620e-01, -4.8144e-03,  5.5444e-04, -2.3965e-02,
         5.2570e-02,  7.6097e-03, -1.1041e-03, -8.5225e-02,  7.8723e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2940, -1.7085, -0.5246, -0.4742, -0.5901, -0.0975,  0.0928, -0.1717,
        -0.9046, -0.1532,  0.1169, -0.1437,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3417, -2.5422, -0.6925, -0.8478, -0.4351,  0.0102, -0.0751, -0.2566,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0081e-02,  1.1017e+00,  7.9524e-02,  2.2375e-02,  1.4509e-01,
         5.2640e-02,  2.9492e-02,  5.3168e-02,  1.5097e-01,  3.5445e-01,
         2.9297e-02,  1.1443e-01,  6.0667e-02,  2.5394e-02,  1.9988e-02,
        -3.4133e-02, -7.5872e-02,  6.4857e-03, -1.0193e-01,  1.2044e-02,
         2.6690e-03, -2.4583e-02,  7.1296e-02, -9.2377e-03,  6.3139e-02,
         1.3482e-02,  7.2194e-04,  6.6865e-02,  1.3472e-01,  4.4003e-02,
         3.1097e-02,  4.7002e-02,  2.7382e-03, -1.5304e-02,  2.1940e-02,
        -1.1882e-02,  1.1738e-02,  2.1269e-02,  5.1684e-02, -4.4984e-03,
        -1.2015e-02,  7.0728e-02,  4.6466e-03,  6.6412e-02,  2.4190e-02,
        -2.3103e-01,  4.0592e-01,  9.3487e-02,  3.2212e-01,  4.1719e-01,
         2.3460e-01,  7.7278e-02,  1.7190e-01,  6.5763e-02,  9.7924e-02,
        -7.1715e-02,  4.3526e-02,  2.5845e-02, -2.2524e-02,  4.5387e-01,
        -1.8534e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0489, -0.0112, -0.0130, -0.0734, -0.1480,  0.0167, -0.0971,  0.0976,
         0.0496,  0.0650, -0.0026, -0.2841, -0.0388, -0.1432, -0.1961,  0.0293,
        -0.0771,  0.0279, -0.1998, -0.1257, -0.3171, -0.1866, -0.0756, -0.0431,
        -0.2019, -0.0327, -0.0154, -0.0560, -0.0936, -0.0648,  0.0808,  0.1916,
         0.0548,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2555, -0.1541, -1.5053, -0.4214, -0.2427, -0.1681, -0.4163, -0.0390,
         0.1158, -0.1287, -0.0086,  0.0613, -0.0478,  0.1181,  0.0091,  0.2958,
        -0.0093, -0.2520, -1.2505,  0.0907, -0.0838,  0.1438,  0.3309,  0.0918,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1269, -0.4047, -0.3125, -0.1690, -0.0438, -0.1377,  0.0321, -0.1999,
         0.0746, -0.1636, -0.3457, -0.3194, -0.8816,  0.1088,  0.0036,  0.0170,
         0.0585,  0.0029,  0.1630,  0.0652, -0.0779, -0.0903,  0.0786,  0.1878,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3640e-01,  6.6954e-02, -1.7271e-01, -1.8938e-01,  8.7298e-03,
        -6.0285e-02, -6.4367e-02, -3.8911e-01, -3.7573e-02, -6.8126e-02,
         6.5427e-02, -1.0063e-01,  9.7241e-02, -3.1945e-03, -2.4731e-01,
        -4.7521e-01, -3.4905e-01, -2.0964e-02, -2.9099e-02, -1.2256e-01,
        -1.5807e-01, -5.0486e-02, -1.8806e-01, -3.6018e-01,  8.9345e-02,
         1.6687e-02, -3.0431e-01, -6.7981e-02,  2.3766e-02, -2.7799e-04,
        -9.4565e-02, -1.0246e-01,  4.2787e-02, -3.1983e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3652,  0.0034, -0.0574,  0.0539,  0.2995,  0.0073,  0.0347, -0.0535,
        -0.1375,  0.0586, -0.0152, -0.1148,  0.2552,  0.5437, -0.2987, -0.3969,
         0.5292, -0.0062,  0.3542,  0.1537,  0.3169,  1.0566, -0.2705,  0.2137,
         0.0186,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4250, -0.0782,  0.1511,  0.0297,  0.1266,  0.1297,  0.5404,  0.2285,
         0.3723,  1.0970, -0.1126,  0.2069,  0.0998,  0.0628,  0.1518,  0.0083,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0293, -1.2300, -0.0190, -0.3757, -0.2792, -0.1950, -0.9584, -0.0521,
        -0.0729,  0.0320, -0.0091, -0.2235, -0.2202, -0.0454, -0.1104,  0.0801,
        -0.2490, -0.2698, -0.1086, -0.0223,  0.0534,  0.0211,  0.3920,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4941, -0.0416, -0.3256, -0.8320, -2.1896, -0.1997,  0.6397, -0.0726,
        -0.0735,  0.1306, -0.1944,  0.2555, -0.0871,  0.3724,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.2481, -0.6567, -0.3501, -0.6014, -0.0216,  0.0777, -0.2277,  0.1046,
        -0.1610, -0.1027, -0.2753, -0.2616, -0.0130, -0.0353, -0.0494,  0.1192,
         0.0032, -0.4318, -0.1150, -0.5077,  0.0647,  0.0174, -0.0902,  0.0259,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5775,  0.0254, -0.6098, -0.0015, -0.8873, -0.0950, -1.4388, -0.0081,
         0.1804,  0.1234, -0.3885,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4226, -0.0241,  0.2684, -0.0151,  0.0274, -0.2334, -0.4462, -0.0023,
        -0.4840, -0.2256, -0.2028,  0.0925, -0.1641,  0.1505, -0.3458, -0.0897,
        -0.0460, -0.3894,  0.0362,  0.1531,  0.1807,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4128,  0.3640, -0.0698,  0.2257,  0.0503, -0.1946, -0.3285, -0.7523,
        -0.5316, -1.1177, -0.0287,  0.3693,  0.1711, -0.4131,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0856e-01,  1.9711e-01,  8.8427e-02,  7.2748e-02,  4.6415e-02,
         5.9365e-02,  1.0109e-01,  8.1483e-03, -4.8910e-02,  6.8924e-04,
         6.3092e-01,  2.0929e-01,  1.2759e+00,  9.8824e-02,  4.1925e-02,
         7.5245e-02,  4.2784e-01, -7.9788e-02,  1.3524e-01,  7.1050e-02,
        -1.5649e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4048,  1.8640, -0.1998,  0.1781, -0.2220,  0.5232,  0.1972,  0.4432,
         0.1145,  0.1506,  0.3640, -0.5991,  0.0243,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1049e-01,  8.1532e-01,  1.2827e-01,  4.7834e-02,  1.0583e-01,
         9.5487e-02,  3.4999e-01,  3.7671e-01,  2.0881e-01,  2.5580e-02,
         5.2462e-02, -2.1637e-01,  8.0897e-04, -8.4831e-02, -2.8876e-02,
         6.9948e-02, -3.8255e-02,  6.5236e-02, -3.6233e-02, -2.4258e-01,
         3.5433e-02, -1.4181e-01, -2.2411e-01, -1.8180e-01,  1.4554e+00,
        -7.9230e-02,  9.3404e-01,  3.3093e-01,  3.1151e-01,  1.5358e-02,
         7.7093e-02, -1.1903e-02, -4.3656e-02, -4.6192e-03,  1.0564e-01,
         3.3664e-02,  1.6183e-01,  5.4718e-02,  1.9303e-02,  1.0815e-01,
         1.9402e-01,  3.1042e-02, -8.4120e-03, -2.2401e-01,  8.4340e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1041,  0.2971,  0.1985,  0.0569,  0.0788, -0.0642,  0.1324,  0.2332,
         0.0268,  0.0909,  0.3708, -0.1404, -0.0597,  0.0819, -0.0154,  0.2019,
         0.7159, -0.0763, -0.0101,  0.1066, -0.0210,  0.4186, -0.0684,  0.2424,
        -0.0779, -0.1494,  0.2079,  0.2488,  0.6460,  0.0694, -0.0252,  0.0688,
         0.3530,  0.1869,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3246, -0.4257,  0.3626,  0.5283,  0.1286, -0.0494, -0.1412,  0.0311,
        -0.5308,  0.0118, -0.0854,  0.0181, -0.1047, -0.0283, -0.0954, -0.2239,
         0.1063, -0.0299,  0.3199,  0.9901,  1.8025, -0.1627,  0.1149,  0.0410,
         0.0555,  0.5749,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1279, -1.2440,  0.2057, -0.3175,  0.0345,  0.0077,  0.0851, -0.1923,
        -0.1869, -0.2577, -0.0581,  0.0275, -0.0709, -0.0559, -0.0333, -0.3492,
        -0.6430,  0.0964, -0.1206, -0.0888, -0.0457, -0.0855,  0.5007,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2144, -0.4241, -0.0300, -0.1151, -0.0037, -0.0124, -0.1205, -0.0832,
        -0.2704, -0.0482,  0.0302, -0.0748,  0.0508, -0.0293, -0.9703, -0.1026,
        -0.0051, -0.1588, -0.2680, -0.0943, -0.0042,  0.0188,  0.0538,  0.0952,
         0.0774, -0.0030, -0.4015, -0.3953,  0.0281, -0.1222, -0.0920,  0.0162,
         0.0707, -0.0217,  0.0712, -0.1357, -0.0486,  0.1118, -0.0892,  0.1261,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3816,  0.2546,  0.1589,  0.1368,  0.3850,  0.1388,  0.1038,  0.5038,
        -0.2117,  0.0478, -0.0330,  0.0309,  0.2067,  0.0975,  0.1217,  0.4270,
         0.3027,  0.1438,  0.0673,  0.1132,  0.0578,  0.3137,  0.2611,  0.0165,
         0.0979,  0.0161, -0.0160,  0.1857,  0.2020,  0.1466,  0.2567,  0.2675,
         0.4722,  0.2035,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.0627, -0.1699, -0.0859, -0.0657, -0.3773,  0.0470,  0.0143, -0.0734,
         0.0092,  0.0422, -0.0213, -0.0177,  0.1524, -0.0193, -0.7224,  0.1864,
         0.0766,  0.0087, -0.2549, -0.0037,  0.0135,  0.0225,  0.0850,  0.0412,
        -0.0865,  0.0790,  0.0614,  0.0276, -0.0115,  0.0221,  0.1075,  0.0244,
         0.0229, -0.0983, -0.1334, -0.0421, -0.1142, -0.1555, -0.0758, -0.1790,
        -0.1552, -0.0117, -0.1142, -0.0525, -0.0274,  0.0092,  0.0012, -0.0125,
        -0.0404, -0.0316, -0.1169,  0.0199], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6851e-01,  2.2040e-02, -3.1699e-03, -8.7859e-02, -5.9732e-01,
        -4.0152e-02,  2.7370e-02,  1.0637e-01,  1.0037e-02, -2.4644e-03,
        -3.1760e-02, -5.0954e-02, -1.7799e-01, -8.3107e-01, -2.8002e-01,
        -2.6462e-01, -5.6518e-01,  5.7805e-02, -1.6826e-01,  2.0715e-01,
        -3.6584e-02, -1.4667e-01, -9.9602e-02, -1.8332e-01,  6.1501e-02,
         3.7047e-02,  5.5789e-02, -2.4466e-03,  4.1075e-04, -7.0634e-03,
        -1.5915e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0082, -1.2411, -0.0407, -0.0642,  0.0156,  0.0374, -0.0450,  0.0093,
        -0.0798,  0.1299, -0.0631, -0.3896, -0.1173, -0.0890, -0.1866, -0.3004,
        -0.0445, -0.1483, -0.4144, -0.3567, -0.1429, -0.0084, -0.1154, -0.0327,
         0.2302,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5454, -0.0360, -0.0378,  0.1204, -0.3153, -0.8192, -1.5994, -0.1405,
         0.0717, -0.0546, -0.0965, -0.0671,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2960,  1.9772,  0.1998,  0.2965,  0.3587,  0.0324,  0.1174, -0.0030,
         0.0174, -0.0999,  0.0414,  0.1130,  0.3034,  0.4790,  0.1501,  0.3844,
         0.1274,  0.0499, -0.0288, -0.1194,  0.0987,  0.0226, -0.0024, -0.1156,
        -0.0430,  0.2324,  0.0146,  0.0026, -0.1061,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0406,  2.0096, -0.0086, -0.2406, -0.0372,  0.5985,  0.9509, -0.0918,
         0.0037, -0.0155, -0.3042,  0.4534,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2266, -1.7686, -0.1869, -0.4831,  0.0676, -0.2772, -0.2685,  0.0879,
        -0.2226, -0.0718, -0.0231, -0.0178,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0254,  0.2312,  0.0419,  0.1033, -0.3233, -0.0536, -0.0934,  0.0460,
        -0.6508,  0.1917, -0.1403, -0.7495, -0.1211, -0.0579, -0.1713, -0.4434,
         0.1935, -0.1044, -0.1872,  0.8270,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1318e-01,  1.4549e+00,  4.0583e-02,  3.1916e-01,  1.2288e+00,
         2.9115e-01,  4.1892e-02, -3.4528e-01,  1.5844e-01,  4.3930e-02,
         4.3602e-02,  2.1078e-01,  1.3503e-01,  8.0321e-03, -3.3609e-02,
        -1.5791e-01,  2.4323e-01, -3.3667e-02,  1.6141e-02,  6.5363e-02,
        -1.4470e-02, -5.2605e-02, -4.4339e-03, -2.9143e-04, -3.1448e-02,
        -9.2546e-03, -2.4483e-01,  1.0746e-01,  1.3062e-01, -1.5354e-01,
        -2.4625e-02,  2.2738e-02,  1.8293e-01,  4.8485e-01,  7.4117e-02,
        -2.2099e-01, -1.9475e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2516, -3.7448, -0.0447, -0.0064,  0.0413,  0.0047, -0.1169,  0.5609,
        -0.2948,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0120,  0.0824,  0.1995, -0.6791, -0.0456,  0.1160, -0.4302, -0.3653,
         0.0934, -0.0214,  0.0236, -0.0084, -0.3481, -0.2863, -0.0891,  0.1095,
        -0.0536, -0.0654, -0.2510, -0.1965,  0.0945, -0.0109,  0.0216, -0.0109,
        -0.0843,  0.2694, -0.3574,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4027e-01,  1.3798e+00, -5.3298e-02,  2.4509e-01,  5.4877e-03,
         4.7296e-02, -1.3592e-02,  3.0545e-02,  1.3318e-02,  5.2892e-03,
         5.2233e-02,  2.8189e-01,  5.6139e-02,  2.5005e-02, -1.2594e-02,
         7.6533e-02,  5.5153e-01, -5.8000e-02, -2.6827e-02,  7.8528e-02,
        -1.4368e-01,  2.1536e-01,  2.4131e-01,  3.1779e-02, -1.8520e-02,
         6.6223e-03,  3.9620e-02,  1.4312e-01,  5.1281e-01,  6.2902e-04,
        -9.6918e-04,  2.7301e-02,  1.0985e-02,  6.1026e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0695, -0.0146, -0.0167, -0.0221, -0.0964, -0.0512,  0.0142,  0.0700,
         0.0143,  0.1015, -0.1799, -0.3582, -0.0461, -0.0272, -0.0293, -0.2548,
        -0.3691,  0.0699, -0.0797, -0.0286,  0.1283,  0.0350, -0.1897, -0.1065,
         0.0518, -0.0121, -0.0544, -0.0404, -0.0190, -0.0178, -0.0934, -0.2085,
        -0.3604, -0.1286, -0.0895, -0.0701, -0.2607, -0.0409,  0.0084,  0.0272,
        -0.0754, -0.2107,  0.0407, -0.0134, -0.1080,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0074, -0.1649, -0.1767, -0.0723, -0.2870, -0.4525, -1.1853, -0.0419,
        -0.2151, -0.5291, -0.3076, -0.0404, -0.2400, -0.0015, -0.1415, -0.1459,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1968, -0.0528,  0.1485,  0.1517,  0.1755, -0.0657, -0.1006,  0.2211,
         1.0995,  0.9610,  0.4134,  0.6367, -0.1507,  0.3020,  0.1908,  0.1019,
         0.0135, -0.0092, -0.0762,  0.0370,  0.0136,  0.0261, -0.0490,  0.0424,
        -0.1147, -0.0486, -0.0322,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2740e-02,  9.6465e-02, -8.9079e-02, -1.0229e-02, -9.9485e-02,
        -5.9276e-02, -7.9781e-02, -2.9826e-02, -1.1833e-01, -1.3294e-02,
         3.0658e-02, -4.7088e-02, -7.2257e-02, -1.2480e-02, -3.2737e-02,
        -2.7365e-02, -1.0750e-01, -2.6342e-02,  4.2071e-02, -2.0543e-02,
         9.1473e-02, -9.9825e-02, -9.3518e-02, -4.9725e-01,  7.0864e-02,
        -5.6545e-02, -1.1160e-01, -3.8255e-01, -6.5133e-02, -2.3147e-01,
         3.7422e-03, -1.4630e-01, -2.5020e-01, -2.2118e-02, -4.5585e-02,
        -2.7116e-02,  5.4921e-03, -1.6116e-01,  5.2661e-02,  4.4961e-02,
        -5.7702e-03, -4.4715e-02, -4.5664e-04,  2.2292e-02,  1.0177e-01,
         6.4753e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7144, -0.0469, -0.0096, -0.0357,  0.0272, -0.0948, -0.0813, -0.9350,
         0.1525, -0.0071, -0.1921, -0.0110,  0.1184, -0.1043, -0.0925,  0.0349,
         0.0013, -0.0511, -0.5262, -0.8852, -0.1917, -0.0670, -0.0035, -0.0106,
        -0.0106, -0.0088, -0.2781,  0.0465, -0.0210,  0.1190,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5642,  0.1019, -0.1695,  0.1704,  0.2422,  0.2922, -0.0655,  1.6871,
        -0.3068,  0.1816,  0.1154,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0673, -0.8424,  0.0089, -0.0835, -0.2649, -0.1434, -0.2307, -0.4133,
         0.0398,  0.0321,  0.0477,  0.0086, -0.1843, -0.0773, -0.0307, -0.2177,
        -0.0589, -0.0777, -0.0165, -0.0188,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1112, -2.3739, -0.6166, -0.2528,  0.4620, -0.3073, -0.5009,  0.0675,
         0.0438, -0.2062,  0.2245,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1941,  1.4753,  0.9407,  0.5433,  0.4448,  0.1431,  0.0806,  0.1837,
         0.2344, -0.0581,  0.0596,  0.1101, -0.0998,  0.0147, -0.0131,  0.1314,
        -0.0264,  0.0201, -0.0634,  0.0057, -0.2352,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8011, -1.0370, -0.8672, -0.0080,  0.1122, -1.6436,  0.3032, -0.0465,
         0.1114, -0.1499, -0.0040, -0.0042,  0.0414,  0.0654, -0.0317,  0.0640,
         0.5321, -0.2473,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1026, -0.3874, -0.6819,  0.0745,  0.0995,  0.0592,  0.0295,  0.0594,
        -0.0259,  0.0066, -0.0143,  0.0253, -0.0221, -0.1705, -0.2345, -0.0341,
        -0.0383, -0.1804, -0.1218,  0.0422, -0.0266, -0.2168, -0.3726,  0.0204,
        -0.3351, -0.0648, -0.2518, -0.1166, -0.1180, -0.2362, -0.0136, -0.0285,
         0.0008,  0.0203, -0.0332,  0.0572,  0.0554,  0.0566,  0.0721,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1851, -1.8801, -0.3221, -0.4354, -0.1274, -0.0101, -0.0463, -0.0177,
        -0.2164, -0.1024, -0.1346, -0.3990, -0.1035, -0.0601,  0.1574, -0.0414,
        -0.3324, -0.0284, -0.0094, -0.1128, -0.0666, -0.0187,  0.0394,  0.0588,
         0.0242, -0.0439, -0.0999, -0.0228, -0.2898, -0.4477, -0.0309,  0.0221,
        -0.1192, -0.3235,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.4556, -1.2532, -0.4835, -0.6738,  0.2463, -0.3364,  0.6590,  0.0240,
        -0.5093,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0483,  1.0303,  0.4361,  0.3837,  0.1698,  0.2407,  0.0587, -0.0030,
         0.0559, -0.1744,  0.2079,  0.0800, -0.0048,  0.0581,  0.0841,  0.3255,
         0.0701,  0.5398,  0.0953,  0.0484, -0.0810, -0.0183, -0.1006, -0.0338,
         0.1496,  0.1062,  0.0248, -0.0117,  0.0089, -0.0514,  0.1751,  0.2679,
        -0.0321,  0.2017, -0.1450, -0.0499, -0.0338,  0.0235,  0.1287, -0.0891,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0478, -0.3928, -0.5308, -0.0858, -0.2840, -0.0378,  0.0190, -0.0346,
        -0.2078, -0.2671, -0.0110,  0.0173, -0.0028, -0.0149, -0.0649,  0.0044,
        -0.0396,  0.1693, -0.0461,  0.1012, -0.0169,  0.0449,  0.0064, -0.1207,
         0.0705, -0.0961, -0.2039,  0.0530, -0.4020, -0.2941, -0.0704, -0.2482,
         0.0073, -0.0828,  0.0714, -0.1665, -0.1873, -0.2987, -0.0036,  0.0334,
         0.0227, -0.0212,  0.0127,  0.0229, -0.0318,  0.0398, -0.0743,  0.0751,
         0.0550, -0.0510,  0.1194, -0.0696,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3645,  1.8117,  0.0178,  0.0731,  0.0043, -0.0885,  0.2102,  0.0521,
        -0.0262, -0.1567, -0.1526, -0.0616,  0.3971,  0.0345,  0.1806,  0.3690,
         0.1462, -0.1086,  0.3092, -0.0891,  0.0126,  0.6136,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1171,  2.7534,  0.1341,  0.1109, -0.1189, -0.1247,  0.1434,  0.1288,
        -0.1381,  0.0934,  0.1327,  0.0516,  0.1901,  0.0898,  0.0248,  0.4807,
         0.2811,  0.1174,  0.0238,  0.0649, -0.1804,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1273, -1.1055, -0.1984, -0.1596, -0.0155, -0.0050, -0.0534,  0.0626,
        -0.0725,  0.0430, -0.0245, -0.0438,  0.0357,  0.0923,  0.0118,  0.0506,
         0.1196,  0.0232, -0.0653, -0.0085,  0.0315,  0.0318, -0.0135,  0.0958,
         0.0081,  0.0163, -0.1028, -0.1343, -0.0439, -0.1914, -0.3475, -0.3163,
        -0.0545, -0.2038, -0.1109, -0.0510, -0.0128, -0.0462, -0.0041, -0.0515,
        -0.3803, -0.0071,  0.0866, -0.0644, -0.1415,  0.0300, -0.0588, -0.0633,
         0.0243,  0.0773, -0.1856, -0.0563, -0.0234,  0.0598, -0.0428],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0285, -0.1710, -0.0052, -0.1979, -0.1072, -0.1042, -0.2405, -0.0085,
         0.0331, -0.0359, -0.0680,  0.0049, -0.0894, -0.1050, -0.0885, -0.0146,
         0.0650, -0.0619, -0.0275, -0.1458, -0.0279, -0.0550, -0.1189,  0.0568,
        -0.0570,  0.0539, -0.0750, -0.2238, -0.0376,  0.0587,  0.0906, -0.0034,
        -0.0766, -0.5234, -0.0312, -0.2261, -0.1224, -0.2520, -0.1905, -0.3433,
        -0.0965, -0.0954,  0.0008, -0.0559, -0.0518,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2884, -1.1043, -0.4367, -0.5416,  0.0918, -0.1390, -0.0826, -0.2259,
        -0.2023,  0.1185, -0.0639, -0.0207, -0.0497, -0.5379, -0.1564,  0.3842,
         0.0755, -0.3274,  0.0262,  0.0098, -0.3425,  0.0209,  0.2211,  0.1472,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1691,  0.0690,  0.0399,  0.2130, -0.0082, -0.0680, -0.2621, -0.3071,
         0.0455, -0.0133, -0.0647,  0.0483, -0.0782, -0.1544, -0.0026, -0.0506,
        -0.3237, -0.0615, -0.1793, -0.2079, -0.0681,  0.0186, -0.2030, -0.4237,
        -0.2836, -0.1858, -0.3061, -0.0082, -0.0696, -0.3665, -0.0429, -0.0877,
        -0.1525,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1192, -2.6549, -0.2330, -0.6962, -0.0602, -0.1590,  0.0521, -0.0591,
        -0.7167,  0.0940, -0.0616, -0.1127,  0.1736, -0.3426,  0.0823,  0.0574,
        -0.1660,  0.1342, -0.1261,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0800, -0.1406,  0.0516, -0.0459, -0.4830,  0.0566, -0.2626, -0.1941,
        -0.4400, -0.0257, -0.0145,  0.0012,  0.0236,  0.0503, -0.0006,  0.0256,
        -0.1550, -0.2083, -0.0861,  0.0975, -0.2901, -0.3759, -0.1559,  0.0081,
         0.2113,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4475, -2.2059, -0.5457, -0.2162, -0.4338, -0.9773, -1.1338,  0.1279,
        -0.0342,  0.0899,  0.0260, -0.0437, -0.3087, -0.0993, -0.0678,  0.1352,
         0.3058,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-9.8773e-02,  6.0213e-02, -2.6908e-01, -4.4655e-02, -3.3554e-03,
         2.5382e-05, -3.9407e-02, -3.5299e-02,  3.7215e-02, -4.2323e-01,
        -1.5296e-01, -2.3871e-02,  8.1107e-02,  1.0726e-02,  2.6790e-02,
         1.0581e-02, -4.1911e-01, -3.9435e-01, -4.4650e-02, -3.4927e-01,
        -1.1145e-01, -9.2783e-01, -1.3249e+00, -9.4533e-02, -1.9087e-01,
        -1.9129e-01, -6.2073e-03,  1.8332e-02, -1.1157e-01,  7.0992e-02,
         3.6954e-01, -2.5223e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2329,  0.4668,  0.3865,  0.2064,  0.0892, -0.0324,  0.3305,  0.6041,
         0.0974,  0.0415, -0.0288,  0.0441, -0.0188,  0.1012,  0.0905, -0.1065,
         0.0476,  0.1365,  0.1922,  0.0828,  0.1406, -0.0118,  0.3132,  0.4215,
        -0.0981,  0.0491, -0.1450,  0.0506,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4278, -0.6093, -0.0642, -0.0107, -0.0024, -0.2655, -0.7487, -0.1141,
        -0.1019, -0.1379, -0.0238, -0.2604,  0.0232, -0.1010,  0.0073, -0.3529,
        -0.1125, -0.4495, -0.1693,  0.0133, -0.0275, -0.0092, -0.0469, -0.0338,
        -0.2176, -0.0300, -0.0031,  0.0726,  0.4718,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5345,  0.1839, -0.0324,  0.2431,  0.8163,  0.3394,  0.0305,  0.0039,
         0.0330, -0.0597,  0.0249,  0.0704,  0.2761,  0.6843,  0.0773,  0.1777,
        -0.1606,  0.6157,  0.1215,  0.4478,  0.0198,  0.0864,  0.2192,  0.0337,
         0.1099,  0.1115,  0.0520,  0.0368, -0.0594,  0.0390, -0.0017,  0.0082,
         0.0516,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3123, -1.3261,  0.0121, -0.3281, -0.0346, -0.1836,  0.0143, -0.2989,
         0.0066, -0.1289, -0.1582,  0.0629,  0.0263,  0.0076, -0.0257,  0.0704,
        -0.1758,  0.0138, -0.5043,  0.0785, -0.3009, -0.0366, -0.2806,  0.2290,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3382, -0.8253, -0.6132, -0.0500, -0.3352, -0.0630,  0.0563, -0.1850,
         0.0407, -0.2785,  0.0142, -0.2735, -0.2841, -0.1140,  0.0396,  0.0237,
         0.1192, -0.1468, -0.0657, -0.6475, -0.1946, -0.0277,  0.2838,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0864, -0.5658,  0.0977,  0.0170, -0.0321, -0.1627,  0.0908,  0.0372,
         0.0451, -0.1204,  0.0808, -0.1288, -0.0810,  0.0094,  0.0074, -0.2545,
        -0.6453, -0.2891, -0.1877, -0.0397, -0.0682, -0.3230, -0.0870, -0.0270,
        -0.3121, -0.0064, -0.0173,  0.0100,  0.0352, -0.3684, -0.2214,  0.0409,
        -0.1363,  0.0281, -0.0929, -0.3632, -0.2179, -0.2793, -0.0744,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2789,  0.2046, -0.0093,  0.0549,  0.4050,  0.0604,  0.0224, -0.0557,
         0.0419, -0.0381, -0.0630, -0.0191, -0.0264, -0.0133, -0.1075,  0.3143,
        -0.0179, -0.0897,  0.0196, -0.0608,  0.3259, -0.0045,  0.0649,  0.1916,
         0.6850,  0.6948,  0.1966,  0.0707, -0.0901,  0.2456,  0.0498, -0.0468,
         0.0145,  0.0489,  0.0406, -0.0766,  0.0738,  0.3936,  0.0119,  0.0414,
         0.0851,  0.0398, -0.1569,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4620e-01, -9.5838e-01,  2.6479e-02,  3.7457e-02,  4.1164e-02,
         4.1074e-02, -8.3390e-02,  7.3585e-02, -3.1175e-01, -3.5855e-02,
        -1.1636e-03, -4.3289e-03, -5.6959e-02, -5.4232e-02, -3.1937e-02,
        -3.8151e-02, -1.3389e-01, -2.9943e-01, -3.5489e-02, -1.8233e-03,
         9.1129e-02,  1.4186e-01, -6.7081e-03,  8.0430e-03,  3.3411e-02,
         1.6956e-03, -3.2355e-02,  1.1380e-02,  3.0308e-02,  2.5348e-02,
         4.4061e-02, -2.9975e-01, -3.3557e-01, -1.1847e-01, -7.5589e-02,
        -2.2237e-01, -6.2430e-02, -3.0093e-01, -3.7635e-01, -1.7072e-01,
        -1.7507e-01, -2.1904e-01, -1.0994e-02,  1.4157e-03, -1.3765e-02,
        -3.2022e-03,  6.7600e-03,  3.1848e-02,  1.4158e-02,  5.0167e-04,
        -1.0880e-03,  4.5933e-02,  4.4579e-02,  3.1918e-02, -9.2169e-02,
         4.9401e-02, -8.2516e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3177, -2.1508,  0.1294, -0.2535, -0.3287,  0.3877, -0.4298, -1.1407,
        -0.4109, -0.1381, -0.3720,  0.0810, -0.1082,  0.1720,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4009,  0.6064,  0.3369,  0.0218,  0.0368,  0.0339, -0.0169, -0.0023,
         0.1609,  0.0087,  0.1287,  0.5695,  0.4852,  0.0649,  0.2979,  0.2194,
         0.2002,  0.2914, -0.1481,  0.2472,  0.4275,  0.0408,  0.1572, -0.0030,
        -0.0130,  0.0362,  0.0179, -0.0466,  0.0277, -0.2447,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1003,  1.3603, -0.2690,  1.3126,  1.0600,  0.4947,  0.2509, -0.1331,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.0123, -0.4661, -0.0676,  0.0208, -0.1667, -0.3995,  0.0259, -0.0012,
         0.0606,  0.0388,  0.0414, -0.0694, -0.2260, -0.3112, -0.0131, -0.0131,
        -0.0100, -0.0234,  0.0213,  0.0493, -0.0616, -0.0477,  0.0479, -0.0386,
        -0.2717, -0.1000, -0.2236, -0.0683, -0.2329, -0.3348, -0.0507, -0.1851,
        -0.1565,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7171e-01, -1.2326e+00, -4.6070e-01, -1.7045e-01,  1.9213e-02,
        -2.3322e-01, -1.0626e-01, -1.0604e-01, -3.6987e-02, -2.0018e-01,
        -3.3501e-02, -1.0369e-03, -2.4351e-02, -6.6979e-03,  9.7732e-02,
        -3.9423e-02,  8.9770e-03,  3.8478e-02, -6.3808e-02,  4.4565e-02,
        -3.9753e-02, -4.3359e-02,  1.1626e-02, -1.3536e-02,  8.0292e-03,
        -2.4156e-02, -1.2350e-02, -2.5498e-01,  1.9640e-02,  1.0325e-02,
        -3.4202e-02, -3.1538e-02, -3.4040e-01, -3.3693e-01,  2.4207e-02,
        -1.6788e-01, -8.0646e-02,  2.3597e-02,  4.0114e-03, -2.8008e-03,
         3.1185e-03,  5.1809e-02, -2.7216e-02, -5.2705e-02,  1.2932e-02,
         8.9859e-02, -1.1342e-02, -2.9099e-02, -1.3963e-02,  2.6936e-02,
        -1.6138e-02, -8.8966e-02, -1.0955e-02, -6.1744e-02, -6.5638e-02,
         1.0372e-02,  3.0169e-03, -3.7003e-02, -6.3160e-02, -9.5600e-02,
        -8.2443e-02, -8.6461e-03, -3.8404e-01, -5.7829e-02,  2.1508e-02,
        -8.6716e-03, -6.9022e-03,  2.4982e-02, -3.0494e-02, -1.1116e-01,
        -6.9711e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1637, -0.1153, -0.0807, -0.0293, -0.1381, -0.1875, -0.8510, -0.0414,
        -0.0276,  0.0450, -0.0090, -0.0547,  0.0317,  0.0196, -0.0297, -0.0147,
        -0.0198, -0.0507, -0.1130, -0.6138, -0.1494,  0.0451,  0.1028, -0.3045,
         0.0429, -0.2129, -0.1877, -0.0324, -0.0165, -0.0716,  0.0317, -0.0425,
        -0.2928, -0.2208, -0.1047, -0.0826, -0.3767, -0.0560,  0.0189,  0.0297,
         0.0817,  0.1477,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6234e-01, -1.2749e+00,  1.1153e-01,  4.3911e-02, -1.1875e-03,
         6.2826e-02,  8.4721e-02,  3.8785e-02,  2.8505e-03, -2.5985e-01,
        -1.9032e-01, -1.9289e-01, -5.4279e-01,  1.7513e-02, -3.8351e-02,
         1.0457e-01,  2.4146e-02, -5.2843e-01, -6.1795e-01,  1.8757e-03,
        -8.1687e-02, -1.9000e-01, -2.7226e-02, -9.4479e-02, -4.1327e-02,
        -3.8626e-01, -2.1914e-01, -5.1551e-03, -3.4989e-01, -9.1429e-02,
        -6.4946e-02, -3.2729e-02,  2.2861e-02, -1.5351e-02,  3.0122e-02,
        -2.5047e-02,  1.0119e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4957, -2.2427,  0.0087, -0.3327,  0.0565,  0.1601, -0.0401,  0.2056,
         0.1740, -0.0930, -0.1849,  0.1449, -0.5983, -0.2273, -0.1427,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0567,  0.1291, -0.0113,  0.1086,  0.5223,  0.5013, -0.0909,  0.1659,
         0.7136,  0.0338, -0.1917, -0.0655,  0.2883, -0.0810, -0.2692, -0.0063,
         0.1433, -0.2234, -0.0292,  0.1143,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0328,  0.0392,  0.0186,  0.0374, -0.7023, -0.1561,  0.0067, -0.0768,
        -0.1043, -0.0363, -0.1600, -0.3143, -0.0962, -0.0689, -0.1282, -0.0767,
        -0.0026, -0.0658,  0.0569, -0.1340, -0.0484,  0.0017, -0.0348,  0.0282,
        -0.0373, -0.1802, -0.3757, -0.0027,  0.0044, -0.1660, -0.3115, -0.0192,
        -0.0335, -0.0330, -0.0957, -0.1156,  0.0708, -0.0403, -0.0604,  0.0367,
         0.1027, -0.0533, -0.1703,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0330, -0.0517, -0.0928,  0.0613, -0.1434, -0.0636, -0.2207, -0.6686,
        -0.0502,  0.0671,  0.0729,  0.0796, -0.0149, -0.2854, -0.2989, -0.0392,
        -0.0028,  0.0710, -0.1087,  0.0179, -0.4590,  0.0021,  0.1342,  0.0900,
        -0.0217,  0.0045, -0.0165,  0.0408, -0.0630, -0.0279, -0.0434, -0.1346,
         0.0387, -0.2774, -0.2771, -0.1008,  0.0152, -0.0489,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0374, -2.0552, -0.7117, -0.5074,  0.0934, -0.3559,  0.1970, -0.0524,
        -0.1245, -0.5298, -0.1098,  0.0448, -0.3487, -0.0097,  0.0462, -0.2747,
         0.0473, -0.4274, -0.1171,  0.3413, -0.1284,  0.2800,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0304,  0.7655, -0.0557, -0.0163, -0.4575,  0.0725,  0.0289,  0.0800,
         0.2430,  1.0491,  0.2058, -0.0495,  0.2081,  0.1973,  0.1652,  0.2698,
         0.7518,  0.3407,  0.1160,  0.3823, -0.0379, -0.0349,  0.1350,  0.0625,
        -0.0249, -0.0439,  0.1073, -0.0990,  0.0872,  0.3491, -0.1288,  0.0119,
        -0.0782,  0.1144,  0.1925,  0.7197, -0.0582,  0.2197, -0.1737,  0.3272,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1733,  0.0321,  0.0492,  0.0329, -0.0397,  0.0443,  0.1320, -0.0843,
        -0.4328, -0.0382, -0.0443, -0.1086,  0.0088, -0.6130, -0.3627,  0.0087,
         0.0412, -0.0757, -0.1794,  0.1329,  0.0449,  0.0681, -0.0091, -0.0075,
        -0.0484, -0.3354, -0.1531, -0.2002, -0.3733,  0.0376,  0.1928, -0.2555,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0658, -1.7460, -0.1591, -0.0042, -0.1136, -0.0372, -0.0488, -0.2463,
        -0.4927,  0.0740, -0.0439, -0.2388, -0.1007, -0.0078,  0.0189, -0.0229,
        -0.1847,  0.0334, -0.0035,  0.0935, -0.0164,  0.0890,  0.0667, -0.3311,
         0.0143,  0.0848, -0.1029,  0.0275,  0.0369, -0.0685,  0.0326,  0.0076,
        -0.0777,  0.2533, -0.2352,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.5230, -0.3407, -0.6912, -0.3517, -0.0628, -0.1524, -1.1066, -0.5235,
         0.5266, -0.3344,  0.1461, -0.2675,  0.0702, -0.1870,  0.0120, -0.0144,
        -0.1673, -0.0177,  0.0989, -0.0249, -0.2685,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2086, -2.1544,  0.1041, -0.2226, -0.5007, -0.5641, -0.1078, -0.3049,
        -0.4941, -0.0229,  0.0126, -0.0698,  0.0166,  0.0282, -0.1418,  0.0836,
        -0.0692,  0.1205,  0.0866,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3924, -0.4146, -0.4599,  0.1094, -0.1210, -0.1082,  0.1764, -0.1087,
         0.0114, -0.2993, -0.7653, -0.0105, -0.1378, -0.4726, -0.1380, -0.0629,
        -0.1023, -0.2457,  0.6254, -0.0099,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1617, -1.4478, -0.0868, -0.4610,  0.0628, -0.0840, -0.0190, -0.1982,
        -0.0726, -0.0600, -0.0212,  0.0209, -0.0073, -0.1805, -0.0353, -0.0672,
        -0.1373, -0.1061, -0.1214, -0.1418,  0.0955, -0.0409,  0.1301,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1119, -0.9833, -0.2452, -0.2100,  0.0677, -0.3414, -0.0671, -0.3329,
        -0.3206, -0.6407, -0.0189, -0.4254, -0.2652, -0.7097,  0.2672,  0.0972,
        -0.0933, -0.1314, -0.2033, -0.0027, -0.1351,  0.0062, -0.0241, -0.1106,
         0.2032,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0040, -3.0913, -0.4980, -0.0374, -0.2115, -0.1784,  0.0095, -0.0297,
         0.3773, -0.1419, -0.7440,  0.0592, -0.0246,  0.0490, -0.0151,  0.1010,
        -0.0464,  0.0372,  0.0285, -0.0053, -0.2037, -0.2080, -0.1830, -0.1549,
        -0.0930, -0.0454,  0.0643,  0.0365, -0.1259, -0.0877,  0.0353,  0.1306,
        -0.0853,  0.0525,  0.5627,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2726,  0.0022, -0.0226, -0.1881, -0.0285,  0.1315, -0.3801, -0.1530,
        -0.5600, -0.1838, -0.2971, -0.0456,  0.0103, -0.2845, -0.2076, -0.4004,
        -0.1503, -0.4692,  0.1998,  0.1134,  0.0959,  0.1106,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3556,  0.0592,  0.3895,  0.0190,  0.1636,  0.3271,  0.0117,  0.0801,
        -0.0072,  1.0487,  0.6066,  0.2560,  0.1556,  0.1210,  0.0165,  0.1568,
         0.0254,  0.1366,  0.1332, -0.0412,  0.0172,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2933, -2.2064, -0.0477, -0.1094,  0.0915, -0.0657, -0.4466, -0.5614,
        -0.0182, -0.0083, -0.2371, -0.1959, -0.0086, -0.6021, -0.0199, -0.3401,
        -0.1711,  0.1620,  0.0047,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1539, -0.0416, -0.0518,  0.0604,  0.0345, -0.3359,  0.0077, -0.0405,
         0.0272, -0.0327, -0.0506, -0.1053, -0.0566, -0.0944, -0.0152, -0.0219,
        -0.1945, -0.1966,  0.0029, -0.1270, -0.0733, -0.0542, -0.1408, -0.0022,
        -0.0764,  0.0352, -0.0246, -0.2579, -0.1255, -0.3033,  0.0221, -0.0254,
        -0.0717, -0.2955, -0.0160, -0.0180,  0.0525,  0.0519,  0.0531,  0.0063,
         0.0009,  0.0300, -0.0114, -0.0143,  0.0264,  0.0375,  0.0308],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3031, -1.1539, -0.4395, -0.0944,  0.1335,  0.1288, -0.0210,  0.0272,
        -0.2676,  0.0299,  0.0530, -0.0760, -0.0210, -0.2856,  0.0093, -0.0341,
        -0.6933, -0.0091,  0.0258, -0.0546,  0.0923, -0.1374, -0.3915, -0.3190,
         0.0637,  0.1729, -0.1040, -0.0577, -0.0012, -0.0362, -0.0321,  0.1231,
        -0.6988,  0.0274,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1909, -2.1114,  0.0848,  0.0470, -0.1171, -0.1257, -0.0360,  0.1231,
        -0.0458, -0.0551, -0.0085, -0.0706, -0.2130, -0.3178,  0.0023, -0.0619,
        -0.0163,  0.1666, -0.0576, -0.4899, -0.0953,  0.0189, -0.0211,  0.0575,
         0.0247,  0.0535,  0.0464, -0.0938, -0.0593, -0.1733,  0.0416, -0.0094,
        -0.0341,  0.0046,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.7350,  1.9628,  0.2548,  0.0426,  0.4693,  0.7377,  0.3380, -0.0434,
         0.0685,  0.5464,  0.2285,  0.1484,  0.1847,  0.5123,  0.1208,  0.1630,
         0.0442,  0.1317,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5267,  0.0026, -0.0283,  0.1093, -0.0393,  0.0336,  0.0506, -0.0210,
        -0.2699, -1.3417, -0.0684, -0.0756, -0.0136, -0.1933, -0.0169, -0.0414,
        -0.1686,  0.1762, -0.0902, -0.9744,  0.1747, -0.1230, -0.2023,  0.0331,
        -0.0542,  0.2809, -0.0825,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3901e-01, -5.0198e-04, -1.8372e-01,  6.9013e-02,  1.1154e-01,
         3.8109e-02,  1.4449e-01, -5.0946e-02,  3.5850e-01,  5.3953e-02,
        -6.9143e-02,  1.5362e-01,  1.3636e+00,  2.7298e-01, -1.2364e-02,
         2.1117e-02,  1.5466e-01,  1.5788e-01, -1.1479e-01,  1.2262e+00,
         1.1639e-01, -4.6965e-02,  2.2780e-01, -1.1001e-01,  4.7613e-02,
        -9.8498e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0816, -0.9511,  0.1575, -0.2666, -0.5844, -0.0845,  0.0231,  0.0472,
        -0.2427, -0.1776, -0.2243, -0.1338,  0.0559,  0.1784,  0.0386, -0.2996,
        -0.0964, -0.0094, -0.1401,  0.0087,  0.0012,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5430, -1.1297, -0.8772, -0.8652, -0.1720, -0.0507, -0.6786, -0.0274,
        -0.1224,  0.1832,  0.0035, -0.1545,  0.1502,  0.0394, -0.2692,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2771, -0.0530, -0.0552, -0.0032, -0.1114,  0.0920,  0.0534,  0.2719,
         0.1754, -0.1055, -0.0589,  0.1564,  0.8813,  0.2662, -0.1875, -0.0390,
        -0.0232,  0.1550,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0382,  1.9041,  0.7020,  0.8503,  0.0533,  0.2388,  0.1093,  0.2395,
         0.1120,  0.4085,  0.4770,  0.1643, -0.2505,  0.3342,  0.1076, -0.1349,
        -0.0685,  0.1059, -0.7576,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3478, -0.5398, -0.2020,  0.0119, -0.0637, -0.1240,  0.0404, -0.0078,
        -0.0334,  0.0084, -0.0035,  0.0835,  0.0124, -0.0347, -0.0305,  0.0744,
        -0.3070, -0.0236,  0.0386, -0.6475, -0.2411, -0.0528, -0.2668, -0.0419,
        -0.3774, -0.0360, -0.3086,  0.0811, -0.0200, -0.2573, -0.0495,  0.2095,
        -0.1347, -0.0345, -0.0548, -0.0347, -0.4060, -0.0051,  0.0390, -0.0402,
         0.0244,  0.0515,  0.0281, -0.4238], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1833e-01, -1.5882e+00, -2.1608e-01, -1.9848e-01, -5.9059e-02,
        -4.7154e-02, -9.8038e-02,  6.1961e-02, -3.0861e-01, -7.7929e-02,
        -2.3108e-01,  6.0127e-02, -1.2507e-01, -1.3056e-02,  7.5262e-02,
         5.7860e-02,  4.3456e-02, -5.0066e-01,  2.2422e-02, -5.9893e-02,
        -2.0468e-01,  1.2249e-03,  2.4134e-02, -6.6357e-03, -1.2741e-01,
         3.4644e-02,  3.2433e-02, -3.8204e-01, -1.0240e-01, -8.4673e-02,
        -4.2920e-02, -9.8368e-02, -9.4288e-05, -2.1246e-01, -1.9616e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1407,  2.7424, -0.1088,  0.0864,  0.0709, -0.1603,  0.4713,  0.3166,
        -0.2207, -0.0345,  0.0396,  0.1921,  0.2700,  0.0036,  0.0192,  0.4306,
         0.0755,  0.0170, -0.1576, -0.0283, -0.0924,  0.1232, -0.2681, -0.6690,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6947e-01, -1.3303e+00, -1.7746e-01, -2.6330e-01, -1.4640e-01,
        -1.3070e-01, -1.0851e-01, -1.1957e-01, -5.5180e-01, -5.6152e-02,
        -1.1967e-03,  3.1402e-02, -1.8335e-02, -2.0301e-01, -3.0605e-01,
        -1.0418e-02,  7.6686e-02, -8.5179e-03, -4.0362e-01,  1.4744e-01,
         7.3381e-02, -2.0409e-01, -1.1572e-02, -1.4564e-01,  3.8963e-02,
        -2.5456e-01, -1.4939e-01, -8.3781e-02, -1.0449e-01, -2.9284e-02,
        -2.4547e-01, -3.6389e-02, -1.7744e-01, -1.7715e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2276, -0.1001,  0.0805,  0.1617, -0.1152, -0.1795,  0.9263,  0.0356,
        -0.0939, -0.0118,  0.0402,  0.0966,  0.5679,  0.1165,  0.1848,  0.8601,
         0.1290,  0.0348,  0.0119, -0.1567, -0.1566, -0.0271, -0.1400,  0.3473,
         0.4223,  0.1192,  0.2310,  0.0165, -0.2155,  0.2281,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.4616,  0.2371, -0.1341, -0.7456, -0.1238, -0.0211, -0.1214, -0.0668,
        -0.2814, -0.4090, -0.1266, -0.0474, -0.3540,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2489,  0.0479, -0.0464,  0.3078,  0.0112, -0.1221, -0.2647, -0.3900,
        -0.6694,  0.0608, -0.0011, -0.3235,  0.2549, -0.3060, -0.5936, -0.0314,
         0.1624, -0.1151,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2444,  0.7214,  1.3531,  0.1744,  1.0604, -0.2742, -0.1715,  0.5217,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3439e-02, -8.9526e-01,  2.4526e-02,  1.4280e-02, -1.1443e-01,
        -5.6144e-02, -5.5951e-02, -1.7307e-02,  5.2281e-02,  3.7380e-02,
         3.1444e-02,  1.0155e-01, -6.4824e-02, -7.0032e-03, -2.6072e-02,
        -3.2740e-02, -1.3530e-02,  6.6975e-04,  2.8236e-03, -6.4992e-02,
         6.9402e-02, -3.2190e-02, -2.2589e-01, -1.8705e-01, -6.1267e-02,
        -1.1875e-01, -5.3152e-02, -4.3561e-02, -4.4061e-01, -1.6305e-01,
        -5.6530e-02, -2.0043e-01,  1.1057e-02, -5.1613e-01, -1.4476e-02,
        -2.1094e-01, -1.5697e-01, -5.9891e-02,  8.1001e-02,  5.3163e-03,
        -3.5243e-02, -1.4574e-02, -3.7793e-02,  3.7173e-03,  1.1480e-01,
         5.4923e-02, -2.3261e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6234,  0.8608,  0.3614,  1.0831, -0.1470,  0.7902,  0.9847,  0.8711,
         0.6586,  0.0176, -1.2256,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0766,  2.1652,  0.0025,  0.6546, -0.2176, -0.1438, -0.0819,  0.4291,
         0.1206,  0.4048,  0.1316,  0.0271,  0.6138,  0.0877, -0.2764,  0.3192,
         0.5856, -0.2905,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1031, -0.3518, -1.5079,  0.1718,  0.1660, -0.1717, -0.9995, -0.1966,
        -0.2322, -0.0544,  0.1137,  0.0417, -0.1468, -0.0918, -0.1196,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7498, -0.9339,  0.0901, -0.0533,  0.0107, -0.5296, -1.2296, -0.2295,
        -0.1155, -0.4535, -0.6507,  0.0632,  0.0301,  0.1833, -0.2987,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4574,  0.1066, -0.0997,  0.4777,  0.1884, -0.1234, -0.2421, -0.1514,
        -0.1902, -0.0421,  0.0788, -0.2876, -0.6593, -0.0763,  0.0288, -0.2723,
        -0.2381,  0.1681,  0.0146,  0.0548, -0.4515, -0.5099,  0.0524,  0.1534,
        -0.0880,  0.2169,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6182,  0.2027,  0.0153, -0.0725, -0.0573,  0.1717,  0.2272,  0.6534,
         0.9391,  0.0037,  0.1173, -0.0305,  0.1302, -0.1328,  0.0830,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0269, -1.0584, -0.0659, -0.3344, -0.4621, -0.1117, -0.3596, -0.0182,
        -0.0476, -0.5529,  0.0587, -0.1263, -0.0648, -0.0872, -0.0031, -0.0076,
        -0.1192,  0.0048, -0.1824,  0.2289, -0.1895,  0.1316,  0.0621, -0.0379,
         0.0354,  0.0980,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1117,  0.0120,  0.1579,  0.0542, -0.0265, -0.0803,  0.0078,  0.0707,
        -0.0855, -0.1677, -1.0528, -0.1181, -0.1816,  0.1738, -0.3671, -0.2918,
        -0.1152, -0.0920, -0.0189, -0.2052, -0.0933, -0.0797,  0.0865, -0.2364,
        -0.0963,  0.1390,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.0117e-01, -1.6212e+00,  4.1904e-02,  7.2127e-02, -2.3156e-02,
        -3.3617e-04, -1.7965e-01, -1.2423e-01, -1.3944e-02, -1.3033e-01,
        -1.6011e-02, -1.1002e-01, -3.0470e-01,  8.0672e-02, -1.8016e-01,
         6.2208e-02,  5.1944e-03,  1.0559e-01, -1.4474e-01, -1.5447e-01,
        -1.0651e-01, -1.8180e-01, -9.9780e-02, -4.1749e-01, -3.2298e-01,
         5.5814e-02, -1.2980e-01, -7.0593e-02, -1.1893e-01,  1.2728e-03,
         4.5677e-02, -1.1312e-01, -1.7026e-02,  1.1710e-02, -7.3997e-02,
         1.5246e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0372, -0.7420, -0.3216,  0.0563, -0.3583, -0.1487, -0.4776, -0.4854,
        -0.0246,  0.0684, -0.1115,  0.0241, -0.0842, -0.0011, -0.0436, -0.0173,
         0.1596, -0.0045,  0.0738,  0.0838, -0.0353, -0.2295, -0.0551, -0.0029,
        -0.1891, -0.3659,  0.0310, -0.2507, -0.3163, -0.0907,  0.0647, -0.2838,
        -0.4195,  0.1038, -0.0016, -0.1149,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1854, -0.0647, -0.1845, -0.1057, -0.0531, -1.3034, -0.5047, -0.1740,
         0.2016,  0.1853, -0.0579, -0.1783, -0.5040,  0.0359,  0.0501, -0.0946,
        -0.0425, -0.0776, -0.2250, -0.0025,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0153, -0.4993,  0.0119, -0.0884, -0.0191, -0.0750, -0.0241, -0.1579,
         0.0064, -0.1936, -0.0377, -0.0094, -0.0544, -0.0032, -0.2404, -0.0538,
        -0.0724, -0.2364, -0.0512, -0.0155,  0.0428,  0.0330,  0.0129, -0.1125,
        -0.0597, -0.0097, -0.0081, -0.0432, -0.0550, -0.1448, -0.3511, -0.2620,
        -0.1760, -0.4403,  0.0398, -0.1062, -0.1900, -0.1749,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2101, -1.0875, -0.3218, -0.3903, -0.0282,  0.0661, -0.1703,  0.0694,
        -0.1974, -0.0825, -0.3264, -0.2773, -0.0276,  0.0909,  0.0397, -0.2627,
         0.0760, -0.2085, -0.0317,  0.0537,  0.0629,  0.0605,  0.0784, -0.0286,
        -0.0706, -0.0089, -0.0368, -0.1385, -0.4524,  0.0220, -0.1840,  0.0082,
         0.1094, -0.0187, -0.1025,  0.0337, -0.0163,  0.0713, -0.0036, -0.1290,
        -0.1953], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1106, -1.9666,  0.0988, -0.2081,  0.2021, -0.2687,  0.0600, -0.2219,
        -0.1553, -0.1129, -0.0659, -0.2991, -0.3775, -0.1426,  0.1018, -0.2576,
        -0.0030, -0.3550,  0.0493, -0.0973, -0.0643,  0.1955,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1046,  0.0133,  0.0194,  0.0054, -0.0433, -0.1649,  0.0055, -0.0517,
        -0.0376,  0.0368,  0.0183, -0.2509, -0.0349, -0.0767,  0.0757, -0.3599,
        -0.0074,  0.0022, -0.1338, -0.1740, -0.5100,  0.0993,  0.2343, -0.2089,
         0.0243,  0.0939, -0.0362, -0.1631, -0.4834, -0.1418, -0.2038, -0.2049,
        -0.0415, -0.0731,  0.0920,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8166,  0.1425,  1.5197,  0.2849,  0.4258,  0.0233, -0.0119, -0.1192,
        -0.0256,  0.0734,  0.0901,  0.0200,  0.1199,  0.0072,  0.0514,  0.2914,
        -0.2001, -0.0565,  0.2949, -0.0278,  0.2887, -0.0537,  0.0527,  0.1816,
         0.0662, -0.0202,  0.2943,  0.1290, -0.3620,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5014, -1.8284, -0.6125, -0.2714,  0.0218, -0.4141, -0.0927,  0.0232,
         0.0904, -0.2632, -0.0930, -0.1633,  0.0266,  0.0603,  0.0283, -0.1729,
        -0.2233, -0.0625, -0.1479,  0.1192,  0.0138,  0.0055,  0.1794,  0.3147,
         0.3258,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5637,  0.0354,  0.0602, -0.0820, -0.6456, -0.0513, -0.0653, -0.3891,
        -0.3873,  0.0517,  0.0821,  0.0437,  0.0057, -0.0547, -0.2655, -0.5980,
         0.0750, -0.1341,  0.0607,  0.0429, -0.0574, -0.0910,  0.1102,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3484, -0.0708, -0.0267,  0.0007,  0.0446,  0.1412,  0.0907,  0.0476,
        -0.0334,  0.0409,  0.0198,  0.0070, -0.1232, -0.4778,  0.0044, -0.1714,
        -0.0626, -0.2514, -0.0562, -0.0770, -0.0245, -0.0331, -0.0845,  0.0554,
        -0.3422, -0.0593, -0.6442, -0.2308, -0.5484, -0.1254, -0.2026, -0.1638,
        -0.1980, -0.1111, -0.2118, -0.0201, -0.1196,  0.0445, -0.1523,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0802,  0.2524,  0.0395, -0.9129, -1.1237,  0.0972,  0.1051, -0.4794,
        -0.4754,  0.1564, -0.1890,  0.1078,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.0749e-01,  2.5241e-01,  1.2526e+00, -3.4339e-02,  1.0910e-01,
         3.5103e-01,  2.0089e-01, -1.4023e-01,  1.6333e-02,  1.1321e-01,
        -2.2036e-02, -1.3321e-01,  3.5163e-02, -5.5807e-02,  1.9623e-02,
        -7.1571e-02,  5.0445e-02,  2.4582e-01, -6.7783e-02,  1.3471e-01,
         1.2561e-01,  1.7369e-01,  1.7034e-02,  3.0800e-02,  2.8920e-02,
         1.9204e-02,  2.8431e-02,  6.8419e-02,  4.6269e-02, -2.4676e-02,
        -1.5014e-02, -2.7251e-02,  2.1156e-01,  4.5534e-04,  5.0553e-04,
         1.2921e-02,  6.7230e-02,  2.3652e-03,  2.2796e-02, -1.0410e-02,
        -6.9869e-02, -5.4879e-02, -8.4584e-02,  7.2378e-02, -1.7679e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0065e-04,  2.6648e+00,  8.4500e-02,  5.4103e-01, -2.3592e-01,
         2.4378e-01, -4.0737e-01,  2.8742e-01,  1.1384e+00, -1.1752e-01,
        -3.1370e-01, -3.5836e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0588,  2.7720,  0.5664,  0.8448,  0.1476, -0.1357,  0.1732, -0.2068,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0634, -0.8740, -0.0103, -0.0752, -0.2142,  0.0292, -0.0486, -0.0695,
         0.0059, -0.2901,  0.0667, -0.0423, -0.0378,  0.0198,  0.0425,  0.0578,
         0.0693, -0.0150,  0.0151, -0.0565,  0.0459,  0.0480, -0.0642,  0.0237,
        -0.0498,  0.0320, -0.0015, -0.0870, -0.2066, -0.0915, -0.0235, -0.0347,
         0.0508, -0.0011, -0.0295, -0.0053,  0.0046, -0.0276,  0.0258, -0.0027,
        -0.0595,  0.0065, -0.0121, -0.0406,  0.0387,  0.2897, -0.4224,  0.1239,
        -0.2636, -0.3811, -0.1924, -0.0150, -0.1121,  0.0385, -0.0541,  0.0133,
        -0.0222, -0.0175,  0.0892,  0.0277,  0.0304], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4222e-02, -2.7078e-02, -1.0748e-03,  1.6070e-01,  6.5240e-01,
         1.2947e-02,  1.8624e-01, -4.6261e-02,  3.8286e-02, -7.3718e-02,
         8.6764e-02,  5.4552e-01, -4.7023e-02,  3.2330e-01,  3.5874e-01,
         4.3760e-04, -5.6146e-04,  4.7691e-02,  9.4718e-02,  1.0886e-01,
         2.5662e-01,  6.6478e-02,  2.1831e-01, -1.0162e-01,  2.5634e-01,
         1.1767e-01,  1.6057e-01,  8.1691e-02,  1.4857e-01,  1.4029e-01,
         1.0433e-01, -3.6921e-01,  1.9486e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0429,  0.3823,  0.9284,  0.4314,  0.0135,  0.1428,  0.2906,  0.0277,
         0.0228,  0.0974, -0.0424,  0.0161,  0.1294,  0.0421,  0.0822, -0.1043,
         0.1414,  0.1477,  0.6637, -0.0642, -0.2058,  0.1727, -0.1610, -0.1620,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0321, -0.5286, -0.3386, -0.1594, -0.0140, -0.3171, -0.0648, -0.2150,
        -0.0498, -0.1798, -0.3061, -0.2123, -0.5653,  0.0421,  0.0111,  0.0097,
        -0.0198, -0.0237,  0.3263,  0.2061, -0.0994,  0.0676,  0.1462,  0.2316,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2139, -0.0036, -0.2705, -0.6286, -0.0541, -0.1421, -0.2010, -0.4818,
         0.0615, -0.1496,  0.0616, -0.0743, -0.0321,  0.0249, -0.3964, -0.3018,
        -0.3570,  0.0938, -0.0463, -0.2820, -0.2667, -0.0282, -0.1922, -0.3465,
         0.0228, -0.0539, -0.2102, -0.0417, -0.0353,  0.0400, -0.1386, -0.1451,
         0.1194, -0.1127,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2930, -0.0961,  0.0987,  0.0389, -0.4000,  0.0402, -0.0608, -0.0430,
        -0.0972, -0.1332, -0.1015,  0.2221,  0.1656, -0.6937, -0.1992, -0.2017,
        -0.6000,  0.1112, -0.2638, -0.0800, -0.3163, -0.2324,  0.1144, -0.3315,
        -0.2895,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4548, -0.1143, -0.0891, -0.0084, -0.1141,  0.1284, -0.3925, -0.2308,
        -0.0569, -1.5612,  0.0868, -0.1074, -0.0196,  0.0333, -0.0120,  0.1626,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1795,  1.3957,  0.0691,  0.5911,  0.3447,  0.1445,  0.8154,  0.1646,
        -0.1013, -0.0370, -0.0456,  0.1195,  0.1451, -0.0876, -0.0688,  0.0020,
         0.1520,  0.1671,  0.0402, -0.0187, -0.0399,  0.1566,  0.2254,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5529,  0.2646,  0.3250,  0.7369,  1.6302,  0.2072,  0.0598,  0.1375,
         0.1951, -0.0261, -0.0547,  0.0280, -0.1196,  0.2969,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.0197,  0.3514,  0.4211,  0.9723,  0.1598, -0.2873,  0.2399, -0.1254,
         0.3384,  0.2264,  0.3788,  0.5074,  0.1188,  0.0279, -0.0163,  0.0680,
        -0.1710,  0.4426,  0.1328,  0.6189,  0.1131, -0.0764, -0.1620, -0.4802,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7230, -0.4394,  1.0568, -0.1551,  0.5693,  0.4462,  1.1409, -0.1695,
         0.0319, -0.1261,  0.1948,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2496,  0.0851,  0.0737, -0.0081, -0.1097, -0.3730, -0.4296,  0.1235,
        -0.9907, -0.2142, -0.0893, -0.0079, -0.0467,  0.2232, -0.1604, -0.0335,
         0.0499, -0.1315,  0.0039,  0.2014, -0.0187,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4992, -0.1271,  0.1462,  0.3152,  0.0961,  0.0426, -0.0505, -0.8046,
         0.0168, -0.6435, -0.0647,  0.0817,  0.0082, -0.4742,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4351,  0.2096,  0.2201, -0.0628,  0.0331,  0.0736,  0.0250,  0.0283,
        -0.0141,  0.2053,  0.2394, -0.1318,  1.0285,  0.2423,  0.0834, -0.0959,
         0.1739,  0.0836,  0.1491,  0.0912,  0.1074,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0053, -1.9307, -0.2774, -0.1724,  0.7956, -0.5123,  0.1278, -0.3962,
        -0.1146, -0.4791, -0.1408,  0.0134,  0.2286,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1043, -0.3476,  0.0056, -0.0661, -0.0502, -0.0158, -0.1282, -0.3654,
        -0.2449, -0.0390, -0.0430,  0.1068,  0.1045,  0.0466,  0.0475, -0.0792,
         0.0349, -0.0482,  0.0552,  0.1323,  0.0080, -0.0091,  0.0091, -0.0053,
        -0.7732,  0.0513, -0.5863, -0.3104, -0.3078, -0.1353,  0.0357,  0.0364,
        -0.0791,  0.0075,  0.0451,  0.0424, -0.1721,  0.0012,  0.0320, -0.0378,
        -0.1088, -0.0265,  0.0021,  0.1161, -0.0937], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0901, -0.2797, -0.4146, -0.1822, -0.0470,  0.0349, -0.0811, -0.1947,
        -0.0273, -0.0266, -0.2157, -0.0061,  0.0817, -0.0647, -0.1002, -0.0781,
        -0.6563,  0.0482, -0.0009,  0.0303,  0.0117, -0.1676, -0.0231, -0.2790,
         0.1121,  0.0060, -0.0526, -0.3504, -0.5839, -0.0397,  0.0422, -0.0325,
        -0.0316, -0.1125,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1598,  0.5268,  0.0327, -0.1896, -0.0535,  0.0403,  0.0469, -0.0459,
         0.1930, -0.0643, -0.0324, -0.0335,  0.2243, -0.0335, -0.0311, -0.0806,
         0.1213,  0.0972, -0.3469, -1.1070, -1.1562,  0.1193,  0.0552, -0.0980,
         0.0047, -0.3046,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1452, -1.6445, -0.1223, -0.4453,  0.2210, -0.2292,  0.0141, -0.2209,
        -0.0925, -0.1311,  0.0849,  0.0093,  0.0531,  0.1019, -0.1665, -0.5607,
        -0.5785, -0.0305, -0.2510, -0.0675, -0.1923, -0.1848, -0.1295,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8925e-01, -9.4545e-01, -1.9895e-01, -1.7355e-01,  1.7416e-02,
        -6.1097e-02, -1.0137e-01, -2.1184e-01, -4.5379e-01, -8.0740e-02,
         3.4883e-02,  9.4081e-02,  2.2862e-02, -4.6323e-02, -8.0478e-01,
         2.3552e-02, -5.8193e-02, -1.0504e-01, -2.2429e-01, -8.7762e-02,
         1.0369e-02, -1.0483e-02,  5.4164e-02,  1.4912e-02,  2.7720e-02,
         3.8107e-03, -2.8177e-01, -1.6101e-01, -8.4221e-02, -1.0291e-01,
         1.5965e-04,  9.8069e-02, -3.2232e-02,  4.0584e-02, -1.6481e-02,
         2.8544e-02, -7.6101e-02,  1.3430e-02,  2.4050e-02,  1.3617e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2935, -0.0143, -0.1132, -0.1086, -0.2083, -0.0131,  0.0217, -0.1791,
         0.0245, -0.0013, -0.0495, -0.0553, -0.1242, -0.0542, -0.1751, -0.2396,
        -0.2621, -0.0466, -0.0872, -0.1040,  0.0169, -0.2817, -0.1963,  0.0022,
        -0.1262, -0.0550, -0.0451, -0.2543, -0.0048, -0.0383, -0.1635,  0.0088,
        -0.1931, -0.0261,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.4386, -0.2330, -0.0085, -0.0545, -0.3604, -0.0451,  0.0932, -0.0178,
         0.0459,  0.0101, -0.0356, -0.1193,  0.1045,  0.0324, -0.3522,  0.1117,
         0.0290, -0.0300, -0.1076,  0.0141, -0.0047,  0.0231, -0.0085,  0.0599,
        -0.2278, -0.3210, -0.0625, -0.1137, -0.0916,  0.0712,  0.0287, -0.0039,
         0.0494, -0.1186, -0.1919, -0.0625, -0.1439, -0.3141, -0.0803, -0.2758,
        -0.1967, -0.1280, -0.1528, -0.0519, -0.0072, -0.0181, -0.0124, -0.0089,
        -0.0343, -0.0109,  0.0364, -0.0661], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1281, -0.0504, -0.0209, -0.0635, -0.6224,  0.0220, -0.0852,  0.2457,
        -0.0356, -0.1232,  0.0311,  0.0681,  0.0079, -0.7419, -0.0718, -0.1540,
        -0.4249,  0.0970, -0.2394, -0.0152, -0.1609, -0.0350, -0.1104, -0.5092,
         0.0091,  0.0103, -0.0192, -0.0606, -0.1053, -0.0562,  0.1788,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3136, -0.7733,  0.0976,  0.0860,  0.0809,  0.0375, -0.0350,  0.0372,
        -0.0601,  0.2912, -0.0398, -0.3576,  0.1225, -0.0859, -0.4178, -0.3806,
         0.1238, -0.3039, -0.5829, -0.5417, -0.1269,  0.0874, -0.2941, -0.0112,
        -0.1417,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0078,  0.2113,  0.3599,  0.1932,  0.3369,  0.4495,  1.9684,  0.4138,
         0.0881, -0.0187,  0.1299,  0.1703,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8605e-03, -1.4965e+00,  7.2605e-03, -1.6676e-01, -3.6543e-01,
        -1.1126e-01, -7.7632e-02, -5.2623e-02,  5.3793e-03,  4.3523e-03,
         4.1147e-03, -7.0908e-02, -2.4791e-01, -1.8542e-01, -1.0740e-01,
        -1.8641e-01, -6.6924e-02, -8.8187e-02, -1.1803e-04,  7.1971e-02,
        -1.5242e-01, -4.5987e-02,  1.4475e-02, -1.4977e-01,  9.1173e-02,
        -2.8555e-01, -6.9352e-02, -1.5158e-01, -7.2215e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0563, -1.4788,  0.0095, -0.2086, -0.0444, -0.4625, -1.0269,  0.5340,
        -0.0054, -0.0811,  0.1762, -0.1565,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1306, -1.7397,  0.4833, -0.3906,  0.2451, -0.6572, -0.7607,  0.0487,
        -0.3113, -0.2058,  0.0045, -0.1605,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1154,  0.3176, -0.2486, -0.0204, -0.2835,  0.0043,  0.0201, -0.0666,
        -0.9098, -0.0307, -0.0836, -0.9779, -0.0995, -0.0727, -0.1357, -0.5100,
        -0.0341,  0.0285, -0.1166, -0.1607,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9896e-01, -1.4600e+00, -5.4514e-02, -4.0993e-01, -7.0157e-01,
        -8.3589e-02, -2.2744e-01, -5.3539e-02, -9.5365e-02, -8.8721e-03,
        -6.8501e-03, -2.4505e-01, -3.8089e-02, -1.6648e-02, -9.8279e-03,
         5.3241e-03, -7.7742e-02,  5.7904e-02,  1.4285e-02, -3.4884e-02,
        -7.3189e-02,  1.3066e-01, -5.3398e-02, -1.0531e-02, -7.6740e-02,
         1.0530e-01, -1.3535e-02, -2.0311e-01, -1.8818e-01,  3.8574e-02,
        -1.7350e-02, -3.6361e-02, -6.0552e-02, -3.0648e-01, -1.1296e-02,
        -7.9483e-02, -8.8576e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4539,  4.2367, -0.1672,  0.1104, -0.2668,  0.2367,  0.2524,  0.1816,
        -0.1281,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3378, -0.0292,  0.0914, -0.6461, -0.1879, -0.2958, -0.4012, -0.3966,
        -0.0467,  0.0069,  0.0196, -0.1054, -0.2971, -0.4831,  0.0530, -0.0390,
         0.0832,  0.0984, -0.2047, -0.2817,  0.2170,  0.0777,  0.0584,  0.0337,
         0.0800,  0.1256, -0.0946,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8772e-03, -2.2122e+00,  8.6241e-02, -3.4592e-01, -1.2346e-01,
        -7.3514e-02, -5.0654e-03,  5.4938e-02, -4.5724e-02,  1.2806e-01,
         1.9733e-01, -5.6567e-01, -1.0296e-01, -1.1300e-02, -4.7107e-02,
        -4.6685e-02, -3.5030e-01,  4.9577e-02, -1.2156e-01,  5.7776e-02,
         1.1637e-02, -9.2181e-02, -3.2503e-01,  4.7847e-02, -1.2322e-01,
        -3.5449e-01, -3.4611e-02, -9.6611e-02, -7.6103e-01, -8.5160e-02,
        -7.2824e-05, -1.1910e-01,  2.0079e-01, -1.9998e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 3.1688e-01, -7.7599e-03, -5.3720e-02, -3.9328e-02, -8.9344e-02,
         3.2876e-02, -5.3006e-03, -4.9568e-03,  6.5026e-02,  1.2806e-01,
        -1.3515e-01, -3.9828e-01,  5.5495e-02, -8.5415e-03,  4.0646e-02,
        -1.7024e-01, -3.9576e-01, -1.0186e-01, -5.9900e-02,  1.6507e-02,
         7.0543e-02,  4.1487e-02, -9.1881e-02, -2.8054e-02,  4.5617e-02,
         3.1238e-02,  5.2361e-03,  4.0367e-04, -5.4173e-03, -1.9100e-02,
         1.0175e-02, -2.6062e-01, -4.5274e-01, -6.2520e-02, -9.9633e-02,
        -1.2455e-01, -2.5256e-01, -1.4237e-02,  5.1845e-02,  1.0637e-02,
        -4.8653e-02, -3.1306e-01,  4.2039e-02,  5.3257e-02, -6.7267e-02,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3023, -0.0458, -0.1060, -0.0585, -0.2806, -0.6891, -1.0045,  0.0515,
         0.0295, -0.5152, -0.0882, -0.0587, -0.4246,  0.1652, -0.0251, -0.0196,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0776, -0.1791,  0.0870,  0.1524,  0.0889,  0.1615, -0.0975,  0.2314,
         1.0227,  0.8802,  0.1338,  0.6147,  0.0849,  0.2745,  0.2226, -0.0642,
         0.0229,  0.0163, -0.1155,  0.0773,  0.0266, -0.0971, -0.0775, -0.1345,
         0.0444, -0.0912,  0.0036,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0011, -0.0293, -0.2647,  0.0090, -0.0347,  0.0662, -0.0657, -0.0120,
        -0.0846, -0.0497, -0.0099, -0.0416, -0.1682, -0.0536, -0.0169, -0.0390,
        -0.1182,  0.0477, -0.0433, -0.1709,  0.0851, -0.1084, -0.0810, -0.2964,
        -0.0222, -0.0062, -0.1263, -0.3157, -0.0564, -0.3399,  0.0540, -0.2247,
        -0.1346,  0.0696, -0.0771, -0.0099, -0.0411, -0.1530,  0.0340,  0.1091,
        -0.0221, -0.1200,  0.0080,  0.0096,  0.0217, -0.0129], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5305, -0.1159,  0.0050,  0.1050, -0.0325,  0.0816,  0.0279,  0.7147,
        -0.2105,  0.1079, -0.2185, -0.3101, -0.0637, -0.0048, -0.0744,  0.1185,
         0.0173,  0.2393,  0.7188,  1.3049,  0.1703, -0.0924, -0.0501,  0.1502,
         0.0096,  0.2774,  0.3956,  0.2615, -0.1545, -0.1168,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2495,  0.2584,  0.1323, -0.1070, -0.1226,  0.0805,  0.1007,  1.3735,
         0.0198,  0.3784, -0.0857,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8101e-02, -1.4120e+00, -7.1602e-02, -1.5278e-01, -1.8032e-01,
        -4.5039e-02, -1.5738e-01, -4.7871e-01, -3.6255e-02, -1.3199e-01,
        -2.5904e-02,  1.9558e-04, -8.6276e-02,  1.8694e-03, -8.6191e-03,
        -3.5633e-01, -1.9139e-01, -1.6301e-01, -1.4428e-01, -3.6715e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8747,  2.0982,  1.2717,  0.5039, -0.2320,  0.1564,  0.7015, -0.1059,
         0.1751, -0.0052,  0.0695,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2134, -1.2919, -0.4867, -0.8679,  0.1808,  0.0377, -0.0657, -0.2759,
        -0.4835, -0.2275, -0.0915, -0.0749, -0.0541, -0.0803, -0.0499, -0.2667,
         0.0048, -0.1426, -0.0669,  0.0565,  0.3304,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3964,  1.8218,  0.3788,  0.1482,  0.1168,  1.3052, -0.3697,  0.0151,
        -0.0295, -0.0271,  0.0335,  0.0328, -0.0395,  0.0038, -0.0804, -0.0811,
        -0.0464,  0.0449,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0073, -0.4833, -0.9390, -0.0335,  0.0108,  0.0182, -0.0182,  0.0284,
         0.0169, -0.0108,  0.0510,  0.0799,  0.0158, -0.2134, -0.3114, -0.0738,
        -0.1549, -0.1950, -0.1346, -0.1067, -0.0494, -0.2483, -0.5612,  0.0411,
        -0.1922, -0.0350, -0.1376, -0.0783, -0.0524, -0.3611, -0.0597, -0.0515,
        -0.0709,  0.0150, -0.1001, -0.0151, -0.0155, -0.0262,  0.0714,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3019, -2.0069, -0.1037, -0.5198, -0.0522, -0.0262,  0.0736, -0.0712,
        -0.0299, -0.1189, -0.1402, -0.4282,  0.0105,  0.0508, -0.0378, -0.0760,
        -0.2447, -0.0463, -0.0410, -0.0558,  0.0080, -0.0061,  0.0113,  0.0104,
        -0.0199,  0.0070, -0.1444, -0.0076, -0.1910, -0.5192, -0.0408, -0.1368,
         0.0640, -0.0172,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.0539,  0.8877,  0.8478,  0.9655,  0.1214,  0.1050,  0.2058, -0.0239,
         0.4124,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3369e-02, -7.7130e-01, -2.4696e-02, -2.2486e-01, -6.2340e-02,
        -3.9027e-01, -7.4505e-03, -1.4147e-01, -7.0160e-03, -2.5199e-03,
        -6.3458e-02, -4.1061e-02,  2.0866e-03, -5.1317e-02, -7.8436e-03,
        -2.9375e-01,  2.1882e-03, -6.4582e-01, -3.0862e-02, -8.5438e-02,
         2.1970e-02,  1.4599e-02,  4.6223e-02,  3.2982e-02, -1.9259e-01,
        -1.1003e-02,  2.1373e-02, -3.2677e-02, -7.2598e-05, -4.0703e-02,
        -1.0475e-01, -2.9442e-01,  2.8690e-02, -2.4600e-01,  6.3998e-02,
        -9.4997e-03,  1.2396e-02,  1.5044e-01,  9.8455e-03, -8.4387e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4478, -0.6762, -2.0909, -0.1267, -0.2240, -0.1068, -0.0191, -0.0569,
        -0.1507, -0.4726, -0.0054,  0.0547,  0.0154,  0.0219, -0.0386,  0.0748,
         0.0187, -0.0366, -0.2539,  0.0269, -0.1122,  0.0431, -0.0701, -0.0563,
        -0.0281, -0.2519, -0.2442, -0.0743, -0.1676, -0.5210, -0.1192,  0.0268,
         0.0828,  0.0109,  0.0085, -0.0098, -0.1171, -0.0476,  0.0186,  0.0282,
         0.1359, -0.1014,  0.0330,  0.0659,  0.0252, -0.0201,  0.0307, -0.0358,
         0.0706, -0.0249,  0.2446, -0.1213,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1677, -1.6151, -0.2111, -0.1137, -0.3305, -0.1009, -0.3773, -0.0878,
        -0.2070,  0.0362, -0.1659, -0.1512, -0.3810, -0.1780, -0.0666, -0.3553,
        -0.1935, -0.0801, -0.3002,  0.0488,  0.0110,  0.1897,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2812, -2.8903,  0.1597, -0.1731,  0.1259, -0.0845,  0.1022, -0.1891,
        -0.0514, -0.1674, -0.0811, -0.4232, -0.0652, -0.1440, -0.0171, -0.4939,
        -0.0341, -0.0448,  0.1979,  0.0076, -0.0155,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0374, -0.5952, -0.1817, -0.1353,  0.0371, -0.0261, -0.1028,  0.0944,
        -0.1007, -0.0405,  0.0040, -0.0250,  0.0179,  0.0055, -0.0058, -0.0160,
         0.0699, -0.0382, -0.1850, -0.0189, -0.0025, -0.0027, -0.0802,  0.0054,
         0.0163, -0.0700, -0.1976, -0.2426, -0.0678, -0.1922, -0.3522, -0.2005,
        -0.0135, -0.2332, -0.1160,  0.0213,  0.0104, -0.0153, -0.0251,  0.0357,
        -0.5181, -0.0120, -0.0308, -0.0292, -0.2384,  0.0114, -0.1596, -0.0169,
        -0.0082,  0.0561, -0.1340,  0.0140, -0.0391, -0.1377, -0.1229],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4854, -0.0073,  0.1303, -0.1125,  0.0222, -0.0887, -0.3083,  0.0250,
         0.0321,  0.0155, -0.0835,  0.0211, -0.0339, -0.1035, -0.1153, -0.0202,
        -0.0042, -0.1433, -0.0958, -0.1970, -0.0634, -0.0581, -0.2126, -0.0111,
         0.1045,  0.0576, -0.1393, -0.4106, -0.0342, -0.0263,  0.0050,  0.0007,
        -0.0320, -0.5044,  0.0562, -0.0392, -0.0448, -0.1998, -0.1696, -0.1053,
        -0.0137,  0.0054,  0.1227, -0.1401,  0.1282,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2573, -1.2724, -0.6463, -0.6114, -0.0666, -0.0598, -0.1873, -0.1847,
        -0.4526, -0.0699,  0.0070, -0.0707,  0.1189, -0.2604, -0.2483, -0.0239,
        -0.0764,  0.0598, -0.0744, -0.0480, -0.2327, -0.0715,  0.1066,  0.1387,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2450,  0.1517,  0.0278,  0.0393,  0.0026,  0.0193, -0.1449, -0.2333,
         0.1323,  0.0139, -0.0358,  0.0108, -0.0325, -0.2072, -0.0163,  0.1478,
        -0.1050, -0.1317, -0.1253, -0.2276,  0.0320, -0.0718, -0.3992, -0.4212,
        -0.0015, -0.2414, -0.3208, -0.0551, -0.2016, -0.3549, -0.1578, -0.1471,
        -0.2489,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0941, -1.7257, -0.0198, -0.5233, -0.0178, -0.2671, -0.0266, -0.3305,
        -0.6040, -0.1685,  0.0377, -0.0283,  0.0907, -0.3076, -0.1380, -0.0423,
        -0.0144, -0.1192, -0.0048,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1148e-01,  3.1079e-04, -8.4587e-02, -8.0908e-03, -4.5638e-01,
         5.2301e-02, -3.4955e-01, -2.2826e-01, -6.4386e-02, -8.5133e-03,
         1.2516e-02,  1.1100e-01, -4.3447e-02,  1.5990e-01,  2.6753e-02,
        -5.6539e-02, -4.1795e-02, -1.2749e-01, -9.4017e-02, -5.1299e-02,
        -2.3346e-01, -9.0325e-01, -2.6755e-02, -5.8053e-02, -2.5531e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4075, -1.3365, -0.3941, -0.0237, -0.1476, -0.6083, -0.6280,  0.2256,
        -0.1749, -0.1537,  0.0156,  0.0506, -0.2881, -0.0495, -0.0841, -0.2156,
        -0.0166,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.2673,  0.1139, -0.2240, -0.0024,  0.0775, -0.0556,  0.0925, -0.1128,
         0.0075, -0.6721, -0.1419, -0.1451,  0.0337, -0.1488,  0.1006,  0.0383,
        -0.3946, -0.4604, -0.0094, -0.3074, -0.0230, -0.4774, -0.7607,  0.0611,
        -0.1772, -0.0482, -0.0801,  0.0083, -0.0459,  0.0571,  0.0120,  0.2110,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2832, -0.5504, -0.7104, -0.0581, -0.1353, -0.0086, -0.2870, -0.5005,
         0.0143,  0.0762,  0.0373,  0.0989, -0.2476, -0.1175,  0.0104,  0.1171,
        -0.0130,  0.0107, -0.3869, -0.1106, -0.0158, -0.0083, -0.1422, -0.4678,
        -0.1350, -0.1869, -0.0865, -0.1328,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0619, -0.3979, -0.0026,  0.1055, -0.0059, -0.4431, -0.7648, -0.2553,
        -0.1148,  0.0109, -0.0070, -0.7686,  0.0097, -0.1689, -0.0748, -0.2085,
         0.0227, -0.3534, -0.0550,  0.1529, -0.1020, -0.0074, -0.0472, -0.0277,
        -0.1832,  0.0383, -0.0761,  0.0748, -0.1780,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1249, -0.0781, -0.0114, -0.1944, -0.7147,  0.1322, -0.0272,  0.0367,
        -0.0167,  0.0800,  0.2491,  0.0045, -0.3025, -0.6755,  0.3644, -0.1621,
        -0.0553, -0.3829, -0.0839, -0.1929,  0.2124,  0.0340, -0.2384, -0.0239,
        -0.0738, -0.0683, -0.1186, -0.0386, -0.0375, -0.0670,  0.0541,  0.0262,
         0.0402,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2110, -1.4913, -0.0239, -0.3907, -0.2300, -0.1588, -0.0712, -0.0647,
        -0.1027, -0.2616, -0.2913, -0.0145,  0.0531,  0.0476, -0.0227, -0.0562,
        -0.1374, -0.0363, -0.2955, -0.1614, -0.4172, -0.1521,  0.0304, -0.0714,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1901, -0.7463, -0.4728,  0.0419, -0.1534,  0.0869,  0.0137, -0.1280,
         0.1212, -0.2729, -0.0601, -0.2869, -0.2518, -0.0628,  0.0378,  0.0022,
         0.0041, -0.0884, -0.0215, -0.4874, -0.1229,  0.1001,  0.3463,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1914, -1.1169, -0.1040, -0.0569, -0.0377, -0.3134,  0.0883,  0.0354,
        -0.0126, -0.0214,  0.0085, -0.0274, -0.1165,  0.0422,  0.0148, -0.1043,
        -0.5166, -0.1577, -0.0149,  0.0309,  0.0421,  0.0598,  0.0633, -0.0209,
        -0.1740, -0.0388, -0.0199, -0.0039, -0.0544, -0.1989, -0.0925, -0.0458,
        -0.1130, -0.0284, -0.1812, -0.1738, -0.1002, -0.0068,  0.0541,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0966e-01, -2.2711e-01, -4.5847e-03, -7.7910e-02, -7.6867e-01,
        -1.3007e-03,  2.2164e-02,  4.4968e-02,  9.3544e-02, -1.9951e-02,
         2.8446e-02,  7.3261e-02,  5.4583e-02, -5.2273e-03, -6.5128e-02,
        -1.4182e-01, -1.1721e-02, -5.3000e-03,  4.4834e-04, -4.3688e-02,
        -1.0408e-01,  1.6330e-03,  3.8611e-02, -3.6053e-02, -2.5697e-01,
        -2.6461e-01, -4.8704e-02, -1.2205e-01, -5.4208e-02, -3.8415e-01,
        -2.0184e-01, -1.0555e-02,  1.5902e-02,  5.7807e-03,  4.8683e-02,
        -4.6019e-02, -9.4801e-02, -4.2190e-01,  2.6794e-02,  4.7108e-03,
        -3.8681e-02,  4.9971e-02,  1.2331e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2839, -0.7296, -0.0102,  0.0301,  0.0096,  0.0401, -0.0764,  0.0246,
        -0.2999, -0.0264,  0.1183,  0.0028, -0.1003, -0.0052,  0.0027, -0.0683,
        -0.0905, -0.2918, -0.0040, -0.0345,  0.0470, -0.0164, -0.0559,  0.0056,
         0.0430,  0.0076,  0.0526,  0.0315,  0.0407, -0.0017,  0.0645, -0.1073,
        -0.2726,  0.0405,  0.0176, -0.0329, -0.1075, -0.4311, -0.3931,  0.0352,
        -0.1374, -0.1764, -0.0644, -0.0324, -0.0399, -0.0336,  0.0026, -0.0116,
         0.0109,  0.0381,  0.0346,  0.0432, -0.0555,  0.0304,  0.0491, -0.0806,
         0.1693], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4454, -1.5958, -0.2492, -0.0187, -0.3330,  0.3822, -0.3998, -0.6995,
         0.0479, -0.2560, -0.7101, -0.1834,  0.3196, -0.0885,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5753, -0.8788,  0.2085,  0.3118,  0.0413, -0.1017, -0.2111, -0.0544,
        -0.0793,  0.0070, -0.3021, -0.3023,  0.1243, -0.0283, -0.2426,  0.0196,
        -0.0223, -0.1133,  0.3300, -0.3307, -0.7838, -0.1031, -0.1957, -0.0187,
         0.0314, -0.0159,  0.0697, -0.0056,  0.1721, -0.0744,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0471, -0.6864,  0.0253, -1.2525, -0.8900, -0.3961, -0.1634, -0.4985,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.1281, -0.9753, -0.1446, -0.1085, -0.1547, -0.3674,  0.0215,  0.0364,
         0.0251, -0.0879,  0.0819, -0.0316, -0.2668, -0.3479, -0.0789, -0.0376,
        -0.0463, -0.0065, -0.0103,  0.0121, -0.0951,  0.0211, -0.0595,  0.0860,
        -0.3074, -0.0019, -0.2540, -0.0072, -0.2852, -0.5121,  0.1275,  0.1469,
        -0.1941,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3743e-02, -1.2225e+00, -2.8647e-01, -2.6955e-01,  4.5138e-02,
        -2.4107e-01, -5.6209e-02, -1.0643e-01, -7.1258e-03, -1.0757e-01,
         2.2750e-03, -1.3248e-02, -7.4096e-02,  1.6280e-02,  9.3932e-02,
        -1.4025e-02, -6.7566e-03,  6.5321e-02, -3.8907e-02,  2.9982e-02,
        -3.3497e-02, -1.5299e-02,  4.8351e-03,  2.3481e-02,  1.7005e-02,
        -5.2462e-02, -7.5308e-02, -2.2925e-01,  1.7690e-02, -1.1770e-02,
         7.0081e-03, -1.0026e-01, -1.8186e-01, -3.4505e-01, -1.2724e-02,
        -2.0926e-02, -6.3668e-02, -2.7441e-02, -2.2093e-02,  2.6875e-02,
        -1.2979e-02, -1.0760e-02,  5.7275e-02, -2.0202e-01, -5.7897e-02,
        -7.3022e-03, -2.1398e-02, -3.0311e-02, -2.2208e-02,  1.8168e-02,
         4.5689e-02, -7.4410e-02,  1.3972e-02, -2.0731e-02, -3.9700e-02,
        -1.1681e-02, -1.7441e-02, -1.6532e-02, -3.0111e-02, -1.9922e-01,
        -4.4170e-02, -7.7206e-03, -3.6884e-01, -7.1342e-03,  2.8453e-04,
        -2.4254e-02, -1.0771e-02, -3.8602e-03,  3.9678e-03,  1.8942e-02,
         1.3365e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5372, -0.1621, -0.1140, -0.0015, -0.1645, -0.0735, -1.2435, -0.1342,
         0.0463,  0.0658, -0.0338, -0.0718, -0.0057, -0.0320,  0.0371, -0.0090,
        -0.0266,  0.0613, -0.0614, -0.6476, -0.0649, -0.0704,  0.0820, -0.2610,
         0.2214, -0.2645, -0.1317,  0.1090,  0.0059, -0.0357, -0.0692, -0.2018,
        -0.4859, -0.3464, -0.1645, -0.1798, -0.2537, -0.0189,  0.0355, -0.0201,
        -0.0924,  0.2468,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1296, -0.9707, -0.0051,  0.0347,  0.0369,  0.0017,  0.0454, -0.0856,
        -0.1328, -0.5238, -0.1881, -0.1398, -0.3940, -0.0198,  0.0106,  0.0064,
        -0.0064, -0.1999, -0.4430, -0.0508,  0.0465, -0.0499,  0.0279,  0.0081,
        -0.0099, -0.2543, -0.1072, -0.0838, -0.4363, -0.0314,  0.0062, -0.0234,
         0.0037, -0.0142, -0.0889, -0.0283, -0.1154,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3982, -2.8023, -0.0466, -0.2499,  0.0288,  0.5442,  0.1929,  0.2316,
         0.1222, -0.0414, -0.1318,  0.1008, -0.4875,  0.1064,  0.1642,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3244, -0.1525,  0.0473, -0.2390, -0.8206, -0.6395, -0.0995, -0.2460,
        -0.6898, -0.0785,  0.0216, -0.4590, -0.2258, -0.0821,  0.1396,  0.0071,
        -0.0418, -0.0299, -0.0413,  0.4402,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2768,  0.0938,  0.0195, -0.0754, -0.6672, -0.1280, -0.0616, -0.0601,
        -0.0497, -0.0636, -0.1499, -0.2754, -0.1294, -0.0747, -0.1259, -0.1135,
         0.0476, -0.1374,  0.0864, -0.1125, -0.0173,  0.0362, -0.0205,  0.0488,
         0.0225, -0.2012, -0.3662, -0.0305,  0.0207, -0.1769, -0.3290, -0.0548,
         0.0117, -0.0833, -0.1346, -0.2059,  0.0346, -0.0416, -0.0091, -0.0052,
         0.0039, -0.0487,  0.0856,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0346, -0.0632, -0.0104, -0.0079, -0.1132, -0.1181, -0.0981, -0.1421,
        -0.0103, -0.0377, -0.0184,  0.0280, -0.0309, -0.3527, -0.5899,  0.0217,
         0.0093,  0.0380, -0.0240, -0.1433, -0.4704, -0.0807,  0.1654,  0.0130,
        -0.0222, -0.0436, -0.0612, -0.0152, -0.7139, -0.0784, -0.0630, -0.2996,
        -0.0813, -0.2457, -0.4234, -0.1991,  0.0585, -0.1602,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2167, -1.4552, -0.6462, -0.8757, -0.1579, -0.3449,  0.3316, -0.2425,
        -0.0405, -0.2664, -0.1314, -0.0106, -0.3037, -0.0895,  0.1407,  0.0305,
        -0.0404, -0.2149,  0.0255,  0.1058,  0.1702,  0.0139,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0239, -0.5510, -0.0507, -0.0327, -0.0441, -0.0449,  0.1111, -0.0067,
        -0.2044, -0.7501, -0.0112,  0.0499, -0.1034, -0.0157, -0.1798, -0.2302,
        -0.2722, -0.1671, -0.0392, -0.2645, -0.0146,  0.0719, -0.1867, -0.1621,
         0.0213, -0.0289, -0.0473,  0.0624, -0.0306, -0.1940, -0.0197,  0.1215,
        -0.0413, -0.0350, -0.1345, -0.3074,  0.0187, -0.0554,  0.3110, -0.0727,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1313, -0.0302,  0.0059,  0.0429,  0.0641,  0.0132,  0.0643, -0.0045,
        -0.2025, -0.0327, -0.0237, -0.0308, -0.0063, -0.4886, -0.4273, -0.1071,
         0.1157,  0.0181, -0.3642, -0.0207, -0.0232,  0.0234,  0.0270,  0.1680,
         0.0136, -0.5416, -0.0570, -0.4444, -0.6548, -0.0290,  0.1298, -0.5843,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9081e-01,  2.3286e+00, -1.8600e-02, -2.9592e-01,  1.5297e-01,
        -7.7784e-02, -9.5252e-05,  2.0246e-01,  8.0247e-01,  5.1395e-02,
         6.0938e-02,  3.0070e-01,  6.2120e-02,  1.8300e-02,  6.0404e-02,
         6.2198e-02,  2.6199e-01,  1.1900e-01, -8.0483e-02, -8.5360e-03,
         6.4100e-02,  4.7842e-02,  1.9893e-01,  5.9922e-01,  1.3833e-01,
         5.8370e-02,  1.6046e-01,  8.7643e-02,  1.3731e-02,  1.0786e-01,
         4.0837e-02, -1.7478e-02,  3.5390e-02, -4.7545e-01,  2.7589e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.1894, -0.1834, -0.4703, -0.2751,  0.0166, -0.0411, -0.9698, -0.7632,
        -0.2893, -0.5144, -0.0066, -0.2825,  0.0947, -0.3381, -0.1048, -0.0511,
        -0.0246, -0.0910,  0.0295,  0.3119, -0.0302,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0096, -2.7201,  0.0600, -0.4266, -0.1518, -0.3479, -0.1685, -0.5142,
        -0.6954,  0.1240,  0.0291,  0.0848,  0.0605, -0.0875, -0.2292,  0.1617,
        -0.0928,  0.3778, -0.3633,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4840, -0.4232, -0.4127,  0.0348, -0.1002, -0.0998,  0.0326, -0.0212,
        -0.0793, -0.4454, -0.7332, -0.0280, -0.1745, -0.5972, -0.0438, -0.1706,
        -0.0068, -0.1490,  0.0720, -0.0158,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9557e-02, -1.5856e+00, -1.1849e-03, -4.7865e-01,  1.3259e-01,
         4.7147e-02, -2.3789e-01, -2.0590e-01, -6.3769e-02,  6.8736e-02,
        -4.5226e-02, -6.3444e-03, -4.9764e-02, -2.0517e-01, -2.0364e-02,
        -1.4397e-01, -3.0731e-01, -2.7458e-02, -1.2175e-01, -1.5747e-01,
         9.5970e-02,  1.0724e-01, -9.0539e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0991, -0.5301, -0.4230,  0.0846,  0.0096, -0.2465,  0.0420, -0.1074,
        -0.2611, -0.1988, -0.0892, -0.3189, -0.0602, -0.6640,  0.0506, -0.0097,
         0.0287, -0.0343, -0.1062, -0.0479, -0.2541, -0.0537, -0.1108,  0.0622,
        -0.0242,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1869e-02, -2.6526e+00, -1.8826e-01, -3.9939e-01,  1.0668e-01,
         1.1353e-01,  9.0310e-02, -6.1996e-02,  1.4843e-01,  3.0175e-02,
        -3.8407e-01,  9.3004e-02,  4.4154e-03, -3.1543e-01,  6.6159e-02,
         1.2832e-01, -6.2812e-02, -5.7243e-03,  1.7769e-01, -1.3916e-01,
        -5.5363e-01, -2.1863e-01, -2.6332e-01, -1.1787e-01, -1.5447e-01,
        -1.1421e-01,  1.2253e-01, -2.8027e-02,  1.2315e-01, -8.8864e-02,
        -3.1014e-02, -2.1139e-03, -1.4946e-01, -2.3405e-02,  1.8497e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0588,  0.0139, -0.0105, -0.0929, -0.0423, -0.0184, -0.2955, -0.0317,
        -0.3049, -0.2045, -0.1193,  0.0162, -0.0119, -0.2207, -0.3069, -0.7210,
         0.0678, -0.2622,  0.0597,  0.0770,  0.1953, -0.1644,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5367,  0.1666, -0.0861,  0.1346,  0.1013,  0.2686, -0.0086, -0.0010,
        -0.0522,  0.6877, -0.1932,  0.0528, -0.0793,  0.0946, -0.2568,  0.3260,
         0.1650,  0.0453,  0.0940,  0.0158,  0.0898,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7917e-01,  2.0084e+00,  3.0992e-01, -1.6365e-01, -2.5453e-02,
         8.4928e-04,  4.6951e-01,  7.4571e-01,  2.1628e-02, -6.1591e-02,
         5.2043e-02,  1.0955e-01, -1.0538e-01,  5.0620e-01, -9.7175e-02,
         1.9589e-01, -2.9000e-01, -5.7075e-03, -1.6644e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3321e-01,  1.6599e-01, -7.7317e-02,  9.1934e-02,  1.6594e-02,
        -2.5494e-01,  2.5046e-02, -2.5355e-02,  6.2393e-02, -5.3117e-02,
        -3.7675e-02, -1.2803e-01,  3.7548e-03, -7.2674e-02,  1.2402e-01,
         2.2363e-04, -1.5553e-01, -1.1021e-01, -6.6438e-03, -6.7197e-02,
        -2.2685e-02, -1.9242e-01, -2.1590e-01,  5.3230e-02, -3.0697e-01,
        -4.5419e-02, -1.5984e-02, -3.0094e-01, -1.1116e-01, -6.0093e-01,
        -4.2349e-02, -2.2761e-01, -2.1936e-01, -3.9295e-01,  1.7523e-02,
        -2.3515e-02,  4.7411e-02, -1.5566e-02,  3.1308e-02,  2.0678e-02,
         1.1621e-02,  4.7969e-02,  2.5639e-02,  8.7433e-03,  1.4997e-02,
        -1.4932e-01,  8.9066e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0886,  2.4625,  0.7768,  0.0954,  0.0460,  0.0557, -0.0229,  0.1521,
         0.2429,  0.0448, -0.1051, -0.0106, -0.0693,  0.0296, -0.1719, -0.0312,
         0.4336,  0.0994, -0.0456,  0.0314, -0.1031,  0.0710,  0.2963,  0.5128,
        -0.0069,  0.0339,  0.1375,  0.0687,  0.0340, -0.0122, -0.0097,  0.0343,
        -0.0290, -0.1532,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1892,  2.5339, -0.0496,  0.0691,  0.1910,  0.1973, -0.0979, -0.1055,
         0.0309,  0.0830,  0.0103,  0.0137,  0.2360,  0.1453,  0.0050,  0.0078,
         0.1968, -0.1379, -0.0241,  0.4561, -0.0046,  0.0955,  0.1227, -0.0616,
         0.0333,  0.0265,  0.0554,  0.0681,  0.0499,  0.0952,  0.0205,  0.0652,
         0.0221, -0.2018,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.0301, -2.1090,  0.1118,  0.1532, -0.2293, -0.5688,  0.3002,  0.0767,
        -0.1374, -0.7536, -0.0940,  0.1165, -0.2114, -0.6332, -0.0782, -0.1106,
         0.0294,  0.0806,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7813,  0.1545,  0.0749, -0.2155, -0.0304,  0.0482, -0.0659,  0.0441,
        -1.0098, -1.5324,  0.2888,  0.0675, -0.2148, -0.1665,  0.0916, -0.0562,
        -0.0911, -0.1215,  0.1153, -0.6273, -0.0155,  0.1447, -0.1013,  0.0106,
        -0.0277,  0.1282,  0.5844,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2596, -0.0105,  0.0096, -0.0459, -0.0686,  0.0522, -0.0243, -0.1452,
        -0.8611, -0.0881,  0.0311, -0.1961, -1.2401, -0.2753,  0.0125,  0.0135,
        -0.0507, -0.1521,  0.1266, -0.6311, -0.0742, -0.0247, -0.1254, -0.0963,
        -0.1666,  0.0474,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3321, -1.7235,  0.1358, -0.3763, -0.5132, -0.0181, -0.0027, -0.1499,
        -0.2717, -0.2286, -0.3532, -0.2657, -0.0715,  0.0684, -0.0942, -0.3422,
        -0.0860, -0.1108, -0.0808, -0.0923,  0.3142,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3513, -1.5303, -0.6946, -0.8891, -0.1498, -0.0405, -0.9265,  0.0324,
        -0.0910, -0.1526, -0.0740, -0.2613,  0.2948,  0.0992, -0.1957,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4163e-01,  6.7109e-02, -5.3255e-04, -3.5206e-02,  6.5133e-02,
        -1.4680e-01,  5.8222e-02, -2.4038e-01, -7.8027e-02, -2.2454e-01,
        -4.5623e-02, -1.8443e-01, -1.7146e+00, -6.8867e-01, -2.1992e-01,
        -2.6605e-01,  3.4299e-01,  7.8180e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2051, -1.3637, -0.7882, -0.7640, -0.0367, -0.1722,  0.0244, -0.1439,
        -0.0289, -0.3385, -0.4373, -0.2121, -0.0278, -0.2011, -0.0818,  0.1883,
        -0.0556,  0.0769, -0.0249,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0780, -0.5899, -0.4160, -0.1646, -0.0138, -0.0128,  0.2166,  0.0193,
        -0.0012,  0.0092, -0.0137, -0.0041, -0.0226,  0.0384,  0.0535,  0.0180,
        -0.1301,  0.0194,  0.0927,  0.1474, -0.3995, -0.0654, -0.2025, -0.0166,
        -0.2917,  0.0447, -0.1541,  0.0138,  0.0149, -0.1145,  0.0120, -0.0422,
        -0.2255, -0.0222, -0.1251, -0.0079, -0.2157,  0.0507, -0.0440,  0.0084,
         0.0095, -0.0258, -0.0169, -0.1919], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2253,  1.3703,  0.0041,  0.2986,  0.1492, -0.0168,  0.2682,  0.1245,
         0.7172,  0.0263,  0.1632, -0.1203, -0.0484,  0.0139, -0.0796, -0.0426,
         0.0411,  0.5034, -0.0777,  0.1299,  0.2442, -0.0174,  0.0038, -0.0092,
        -0.1208,  0.0030,  0.0536,  0.3610, -0.0137,  0.0689,  0.0813,  0.0753,
         0.0479, -0.1333, -0.1768,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2647e-01, -1.9434e+00, -4.6263e-02, -2.8925e-01, -1.5058e-01,
        -9.1853e-02, -5.6768e-01, -1.7489e-01, -6.9654e-02, -4.8875e-02,
        -1.1795e-03, -1.0798e-01, -3.2041e-01, -9.3164e-04, -1.4575e-02,
        -4.3390e-01, -6.3605e-02,  1.4284e-02,  1.1036e-01,  2.7456e-02,
         4.2785e-02, -3.5487e-02,  9.0773e-02,  1.7918e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3543, -1.8694,  0.0886, -0.2165, -0.1575, -0.1412,  0.1952, -0.1149,
        -0.5816, -0.0750, -0.0105, -0.0053,  0.1474,  0.0532, -0.3636, -0.0743,
         0.0999, -0.0930, -0.4040, -0.0668,  0.0156, -0.3434,  0.0028, -0.1733,
        -0.0593, -0.2264, -0.2209, -0.1141, -0.0833, -0.0866, -0.1783, -0.0185,
         0.0184,  0.2059,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0555,  0.0771,  0.0186, -0.1280,  0.1208,  0.0877, -0.4892,  0.0488,
         0.0105,  0.1286, -0.0754, -0.2636, -0.8278, -0.0214, -0.5973, -0.8351,
        -0.0584, -0.0154,  0.0339,  0.0753,  0.1163,  0.0341,  0.0992, -0.3662,
        -0.2420, -0.0282, -0.3447, -0.2230,  0.0051, -0.0350,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.0018, -0.3901,  0.2913,  1.0364,  0.1734,  0.2342,  0.1090,  0.3513,
         0.2948,  0.9959, -0.1446, -0.2097,  0.2580,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8780,  0.0559, -0.0915,  0.2341, -0.1144, -0.1559,  0.1498, -0.3370,
        -0.5343,  0.0780,  0.0611, -0.5912, -0.0647, -0.2607, -0.3766,  0.3616,
        -0.1219,  0.1706,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4807, -1.6215, -1.5638, -0.2105, -0.4047,  0.0249, -0.6798, -0.0535,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5639e-03, -6.9115e-01,  4.9367e-02, -5.2346e-02, -7.4913e-02,
        -1.1204e-01, -2.7098e-02, -4.2510e-02,  5.6904e-02,  1.7446e-02,
         4.8903e-02,  8.6026e-02, -6.8162e-02,  3.1262e-03,  6.2876e-03,
        -4.7783e-02,  1.3744e-02,  4.6875e-02,  3.7855e-02, -7.7987e-03,
         2.2981e-02,  1.0289e-01, -1.2981e-01, -2.2369e-01,  3.6207e-02,
        -2.6617e-02,  6.2959e-05, -1.0267e-01, -5.9603e-01, -2.6697e-01,
        -1.6573e-01, -1.7558e-01, -9.4135e-02, -5.4923e-01,  1.7398e-02,
        -4.1525e-01, -3.1289e-01, -4.5017e-02,  7.0870e-02, -1.3503e-02,
        -6.7383e-02, -1.0791e-01, -1.2918e-01,  1.3699e-02,  1.8557e-02,
         1.8820e-01,  2.5050e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3135,  0.3488, -0.2471,  1.2050,  2.0355, -1.8141, -0.1589, -1.6995,
        -2.8489,  1.3823, -0.3760,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4662, -1.6271, -0.0402, -0.3315, -0.0784,  0.0018, -0.0160, -1.0030,
        -0.2707, -0.5540, -0.1186, -0.0934, -0.6242,  0.0342,  0.0622,  0.0093,
        -0.0032, -0.3919,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0042, -0.4519, -1.6253, -0.3799,  0.1137,  0.0703, -0.8285,  0.2345,
        -0.4985,  0.0124,  0.0043, -0.0624, -0.1083,  0.2328,  0.3906,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4773,  2.2521,  0.3269,  0.0628, -0.0579,  0.7314,  0.7644,  0.0761,
         0.2022,  0.3014,  0.5970, -0.0878, -0.1448, -0.1103,  0.0275,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6598,  0.0224,  0.0495,  0.1889,  0.0339,  0.0251, -0.1254, -0.0486,
        -0.2245, -0.2448, -0.0856, -0.5003, -0.4782, -0.1262, -0.0504, -0.1396,
        -0.4503,  0.1704, -0.1419,  0.0130, -0.5835, -0.8923,  0.0425,  0.0257,
        -0.1120,  0.0370,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0643,  0.1839,  0.1677,  0.1092, -0.3065, -0.0667, -0.0278,  0.9356,
         1.4186,  0.1857, -0.0952, -0.0288,  0.1687, -0.2500,  0.4981,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4276, -1.5680, -0.0883, -0.0199, -0.4036, -0.1511, -0.2257,  0.0273,
        -0.0687, -0.5362,  0.2239,  0.0236, -0.0799, -0.0443,  0.1325, -0.0803,
        -0.0661, -0.0484, -0.1782,  0.1396, -0.1296,  0.0585,  0.1693,  0.0882,
        -0.2505,  0.1904,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0072,  0.1421,  0.0852,  0.0564, -0.0372, -0.1355,  0.0424,  0.0255,
        -0.1949, -0.2876, -0.6844,  0.0052, -0.0617,  0.1540, -0.3987, -0.2618,
         0.0624, -0.1411, -0.0831, -0.2112,  0.0028, -0.0534, -0.1040, -0.1831,
         0.0192, -0.1246,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.4205e-01, -1.3304e+00,  4.7790e-02, -6.0238e-02, -4.0126e-02,
         7.1725e-03, -1.7836e-01, -2.6887e-01, -1.4304e-01, -2.2849e-01,
        -1.0063e-01, -2.0070e-01, -2.8276e-02,  3.3575e-03, -2.2788e-01,
        -3.5165e-02, -2.6333e-02, -1.3329e-02, -8.1428e-02, -6.1240e-02,
         4.3349e-02, -3.0059e-02,  9.5951e-02, -3.3321e-01, -3.4645e-01,
        -9.1288e-02, -2.0618e-01, -3.9334e-02,  3.5658e-02,  4.0040e-02,
         1.8553e-04, -3.3521e-01, -5.5723e-02,  4.6942e-02, -1.7913e-01,
         9.1474e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0936, -0.3598, -0.2242, -0.1424, -0.4456, -0.1543, -0.3336, -0.4223,
         0.0069,  0.0937, -0.1155, -0.0294, -0.0656,  0.0223, -0.0120, -0.0654,
        -0.0854, -0.0026,  0.0070,  0.1288,  0.0032, -0.2190, -0.0650, -0.0194,
        -0.2167, -0.3686,  0.0579, -0.1719, -0.1690, -0.0555,  0.0742, -0.2618,
        -0.3201,  0.0941, -0.0249,  0.0595,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0835, -0.4800, -0.1383, -0.0313,  0.0075, -0.4688, -0.1454, -0.0516,
        -0.1470, -0.0297, -0.1961, -0.3246, -1.0827, -0.1942, -0.2152, -0.1568,
        -0.2932, -0.0460, -0.2345, -0.0773,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2170e-01, -4.6000e-01,  6.2131e-02, -2.0316e-01, -9.0786e-02,
        -1.8005e-01,  1.8545e-03, -1.5699e-01, -6.5711e-02, -2.1073e-01,
        -5.0989e-02,  7.6350e-03, -7.2634e-02, -5.4635e-02, -5.4039e-01,
        -1.9486e-01, -3.6439e-02, -2.9389e-01, -1.5333e-01, -3.1449e-02,
        -5.4159e-03, -3.6973e-02,  1.9670e-03,  1.1693e-02, -1.0830e-01,
         1.1962e-01,  4.2618e-02, -3.3903e-02,  1.8586e-04, -2.4692e-01,
        -3.4339e-01, -1.3493e-01, -1.6445e-01, -3.7103e-01,  2.1137e-01,
         2.0879e-01,  4.4247e-02,  1.0509e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0630, -1.5330, -0.5515, -0.5074, -0.1157,  0.0148, -0.0945,  0.1090,
        -0.0540, -0.0720, -0.0863, -0.4366, -0.0170,  0.1199, -0.0664, -0.2889,
         0.0711, -0.2534, -0.0098, -0.0332,  0.0446,  0.0805,  0.0520,  0.0549,
        -0.0093,  0.0037, -0.0443, -0.1530, -0.4827, -0.0056, -0.0920,  0.0107,
        -0.0035,  0.0091, -0.1403,  0.0451, -0.0787, -0.0373,  0.0301,  0.0964,
        -0.0033], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9972e-01, -2.3062e+00, -1.5764e-01, -1.1899e-01, -2.5194e-02,
        -1.8205e-01, -2.5145e-02, -9.8546e-02, -1.2435e-01, -2.3881e-02,
        -8.7637e-02, -3.4432e-01, -2.2702e-01, -9.9275e-02,  1.6228e-01,
        -2.9672e-01,  5.6927e-02, -3.1600e-01,  2.2176e-02, -2.7249e-02,
         1.7166e-03,  1.1101e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5293e-01, -1.4006e-02,  1.9578e-02, -2.0620e-02,  7.6822e-02,
        -8.2546e-02, -3.4744e-02,  4.5040e-02, -3.9312e-02, -2.6596e-04,
         1.3905e-02, -4.0100e-01, -2.4449e-02, -1.1123e-01, -1.0549e-01,
        -3.6371e-01,  2.8378e-02, -3.0953e-01,  7.1452e-02, -2.8913e-01,
        -1.1622e-01,  1.3584e-01,  4.3374e-02, -1.9317e-01,  6.5437e-03,
         9.1167e-03, -7.0698e-03, -1.5091e-01, -5.6360e-01,  1.0073e-01,
        -1.4242e-01, -1.8625e-01, -1.1972e-01, -5.8752e-02,  2.2406e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7274,  0.2155,  1.2870,  0.3363,  0.8288,  0.0930,  0.0066, -0.0581,
        -0.1228,  0.0769,  0.1947, -0.0779, -0.0428, -0.0356, -0.0266,  0.0194,
        -0.1906,  0.0033,  0.4111, -0.1140,  0.2406, -0.0234, -0.0503,  0.1183,
         0.0434,  0.1285,  0.3330, -0.0478,  0.1162,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0716,  2.1715,  0.4234,  0.4078, -0.0170,  0.4260,  0.0123,  0.1581,
         0.0791, -0.0684,  0.1436,  0.2318,  0.2394,  0.1748, -0.0530,  0.3990,
         0.2055,  0.0932,  0.1850, -0.0481,  0.1086, -0.0102,  0.0733, -0.2321,
         0.0485,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1941, -0.0200,  0.0862,  0.0154, -0.4641,  0.0230, -0.1763, -0.4693,
        -0.4869, -0.0571,  0.0265,  0.1283,  0.0171, -0.1539, -0.2115, -0.8135,
         0.1017, -0.1778, -0.0311, -0.1333, -0.0445, -0.0152, -0.0068,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1495,  0.0113,  0.0470,  0.1247,  0.0835,  0.0648, -0.0286,  0.0247,
         0.0144,  0.0062,  0.0080,  0.0103, -0.3311, -0.6264,  0.0109, -0.1123,
        -0.0526, -0.1213, -0.0615, -0.0169, -0.0293, -0.0329,  0.0235,  0.0099,
         0.0174, -0.0039, -0.1130,  0.0267, -0.3478, -0.0742, -0.2669, -0.1190,
        -0.2021, -0.1359, -0.1761,  0.0045, -0.1261, -0.0245, -0.1810,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1876,  0.0969,  0.2123,  0.7747,  0.8621, -0.0077,  0.0379,  0.2157,
         0.8261, -0.1864, -0.0074,  0.0933,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.0569, -0.5149, -1.3200, -0.0373, -0.0583, -0.1909, -0.1996,  0.0344,
        -0.0403, -0.2228, -0.0350,  0.0523, -0.0161,  0.0072, -0.0339,  0.0736,
         0.0323, -0.1455,  0.0050,  0.0162, -0.0212, -0.0720,  0.0542, -0.0031,
        -0.0109, -0.0105, -0.0321, -0.0331, -0.0397, -0.0949,  0.0274,  0.1862,
        -0.1346, -0.0052, -0.0426, -0.0139, -0.1645, -0.0167,  0.0375, -0.0168,
         0.0324,  0.0077,  0.0440,  0.0264, -0.0693,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3634, -2.8013, -0.3333, -0.5723,  0.2138,  0.1897, -0.0408, -0.4178,
        -0.8465, -0.2852,  0.0904,  0.5340,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1390, -2.8107, -0.4760, -0.6522, -0.3865,  0.6763, -0.4865, -0.1356,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7456e-01,  1.5274e+00, -7.8843e-02, -2.5094e-01,  3.7645e-01,
         7.3630e-02,  1.7334e-01,  5.9860e-02,  1.3377e-01,  3.1472e-01,
         1.9892e-01,  9.2141e-02,  1.6447e-03,  1.1873e-02, -5.5386e-03,
         1.2903e-02, -8.7466e-02,  2.1460e-03, -1.8885e-02,  1.2419e-02,
        -2.2569e-02, -5.3780e-02,  2.9110e-02,  1.0466e-02,  3.6241e-02,
        -1.9770e-02, -1.1615e-02,  4.5050e-02,  1.7667e-01,  1.5161e-02,
         5.5863e-02,  2.3034e-02, -1.6462e-02, -5.0097e-02, -3.1278e-02,
        -2.0921e-02, -3.7037e-02,  2.4049e-02,  1.7828e-04, -9.5350e-03,
         4.0837e-02,  4.9620e-02,  1.5013e-03,  1.2968e-01,  1.1339e-01,
         7.1820e-02,  5.9248e-01, -2.0213e-01,  4.3109e-01,  3.4072e-01,
         1.1933e-01,  5.6522e-03,  2.1027e-01,  1.0538e-02,  7.5417e-02,
         2.0144e-03,  1.6074e-01,  6.9260e-02,  1.7922e-02,  1.0668e-01,
        -1.1563e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1899, -0.1305,  0.0163, -0.1619, -0.4076, -0.0683, -0.1215, -0.0585,
         0.1910,  0.0283, -0.1600, -0.3081, -0.0500, -0.2579, -0.3273,  0.0317,
        -0.0202, -0.0919, -0.1894, -0.1388, -0.3658, -0.3083, -0.1418, -0.0675,
        -0.2087, -0.0822, -0.0314, -0.0835, -0.1274, -0.1773, -0.1730, -0.1124,
        -0.1483,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2808, -0.3549, -1.3260, -0.3717, -0.0476, -0.0281, -0.1675,  0.1326,
         0.1171,  0.0165,  0.0711,  0.0319, -0.1686,  0.0367, -0.0170,  0.2456,
        -0.1303, -0.3423, -1.3920,  0.0456,  0.0471, -0.2831,  0.2802, -0.2284,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0866, -0.3727, -0.5162, -0.2350, -0.0906, -0.4038,  0.0423, -0.1417,
         0.0731, -0.2770, -0.3479, -0.4077, -0.3915, -0.0284, -0.0320,  0.0104,
         0.0526,  0.0805,  0.0504,  0.0645, -0.1037, -0.0640,  0.0101,  0.1857,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2748, -0.0957, -0.3329, -0.3594,  0.0032,  0.0737,  0.1211, -0.1768,
         0.0718,  0.0337,  0.0344, -0.0653, -0.0303,  0.0278, -0.3687, -0.5898,
        -0.6497, -0.0763, -0.0463, -0.1588, -0.5526,  0.0312, -0.2154, -0.5793,
        -0.0121, -0.0626, -0.2442, -0.0934,  0.1054, -0.0214, -0.0945, -0.0674,
         0.1322, -0.1382,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2173,  0.0851, -0.0416,  0.0426, -0.3708,  0.0286, -0.0197,  0.0249,
         0.0072,  0.0977,  0.0289,  0.1095,  0.2082, -0.4805,  0.0126,  0.1648,
        -0.7100,  0.0878, -0.2092, -0.0700, -0.4542, -0.6467,  0.1733, -0.0664,
        -0.1720,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0021,  0.0659, -0.0951,  0.1219,  0.0414, -0.0488, -0.6276, -0.3738,
        -0.4390, -1.1855, -0.0339, -0.2491, -0.1775,  0.0459,  0.1034, -0.2093,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0627, -1.1277,  0.0596, -0.4375, -0.2426, -0.2955, -1.2635, -0.1172,
         0.1308,  0.1000, -0.0371, -0.1145, -0.2639, -0.0536,  0.0683, -0.1588,
        -0.2128, -0.3854, -0.1652, -0.0849,  0.1002,  0.1399, -0.0556,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1100,  0.1579,  0.2128,  0.6484,  1.3592, -0.1977, -0.3371, -0.0516,
         0.0595, -0.1036,  0.1722, -0.1395, -0.1169,  0.1677,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.0173, -0.4119, -0.2941, -0.5463, -0.0888,  0.0152, -0.2702,  0.1524,
        -0.2924, -0.2078, -0.5125, -0.3205, -0.1695,  0.0720, -0.0276,  0.1038,
        -0.0400, -0.6567, -0.1157, -0.6728, -0.0754, -0.0688, -0.2854, -0.2029,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2019,  0.0816, -0.5626,  0.1584, -0.9756,  0.1505, -0.8334, -0.2153,
        -0.0116,  0.2107, -0.4159,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4702,  0.1857,  0.2543,  0.0285,  0.0783, -0.1507, -0.6412,  0.0210,
        -0.4806, -0.2042, -0.1157,  0.0645, -0.0529, -0.2037, -0.4266,  0.0620,
         0.0104, -0.5437, -0.0360, -0.0668, -0.1623,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1043, -0.0399,  0.0122, -0.1272, -0.1864, -0.0488, -0.1349, -1.5286,
        -0.4823, -1.0246, -0.1358,  0.0192, -0.1603, -0.3395,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0573e-04,  9.2869e-02,  8.9662e-02,  8.2997e-02,  1.4886e-01,
         5.4574e-02,  1.0739e-01, -1.6929e-01, -1.1796e-01,  1.1173e-01,
         7.1068e-02, -5.6541e-02,  1.0071e+00, -9.2916e-03,  1.5216e-01,
         1.3535e-01,  3.9904e-02,  3.8620e-02,  7.6251e-02, -1.9436e-02,
         1.6717e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4083,  2.1840, -0.1612,  0.4221, -0.4382,  0.5878, -0.0623,  0.7707,
        -0.0189,  0.3286,  0.1651,  0.4785, -0.1522,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4316, -0.7104, -0.1344, -0.0723, -0.0550, -0.0903, -0.2685, -0.3789,
        -0.0560,  0.0066, -0.0093,  0.2299, -0.0159,  0.1084, -0.0030,  0.0449,
         0.0468,  0.0269,  0.0278,  0.3574,  0.0212,  0.1744,  0.1346, -0.1209,
        -1.1201,  0.0084, -0.7666, -0.5776, -0.3575, -0.0779, -0.1220,  0.2743,
        -0.0457, -0.0218, -0.0787, -0.0620, -0.1231, -0.0138,  0.0254, -0.0429,
        -0.1324,  0.0321, -0.0526,  0.4790,  0.2001], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6202e-01, -3.0744e-01, -4.0554e-01, -1.8183e-02, -2.3243e-02,
        -6.6811e-02, -4.0167e-03, -8.9855e-02,  4.4178e-02, -9.1327e-02,
        -6.3424e-01,  1.9537e-01, -6.9196e-02,  3.1879e-02, -3.3725e-01,
         8.9950e-02, -7.4926e-01,  1.0497e-01, -9.3648e-02,  8.8926e-02,
        -4.4441e-02, -5.9324e-01, -8.5092e-02, -5.8281e-01,  6.8589e-02,
         1.0056e-01,  3.9916e-04, -6.2442e-02, -2.8628e-01, -2.5332e-02,
         1.2843e-01,  4.2424e-02, -9.3689e-02, -7.7902e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3510e-01,  2.0333e-01, -1.6911e-01, -2.6005e-01, -2.4029e-01,
         6.6921e-05, -2.0654e-01, -2.1999e-02,  3.1041e-01, -3.1580e-02,
        -9.1065e-02, -1.2638e-02, -9.6780e-04,  1.2237e-02,  3.6264e-02,
         3.8004e-02, -8.0388e-03,  4.7745e-02,  4.9747e-02, -1.2435e+00,
        -1.5997e+00, -4.5368e-01, -2.5677e-02,  1.9283e-01,  1.8600e-01,
        -5.3858e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0217, -1.0700, -0.0723, -0.4062, -0.0151,  0.0968,  0.2083, -0.1068,
        -0.0203, -0.2065,  0.0297, -0.0209, -0.0545, -0.0633, -0.2319, -0.5147,
        -0.6283,  0.0651, -0.1755, -0.0951, -0.2236, -0.0862, -0.2197,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2615, -1.1791, -0.3191, -0.1741, -0.0038,  0.0180, -0.0080, -0.1088,
        -0.2868, -0.0410,  0.0328,  0.1056,  0.0203,  0.0594, -0.8824,  0.0110,
        -0.0749, -0.1434, -0.3083,  0.0050, -0.0100,  0.0075,  0.0673, -0.0424,
         0.0490, -0.0205, -0.2425, -0.4012, -0.0185, -0.0098, -0.0321,  0.0840,
        -0.0197,  0.0884, -0.0140, -0.0378, -0.0851, -0.0197,  0.1602, -0.0604,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9295e-01,  1.0998e-03, -4.4991e-02, -1.0632e-01, -4.7253e-01,
        -5.3043e-02, -3.5533e-02, -3.2028e-01,  5.0276e-02, -3.5797e-02,
        -1.1765e-02, -3.0731e-02, -1.1189e-01, -6.6743e-02, -7.0254e-02,
        -2.5015e-01, -2.1231e-01, -1.1916e-01, -2.3737e-01, -1.3262e-01,
         3.6224e-02, -1.8234e-01, -1.2557e-01, -1.2815e-02, -5.8209e-02,
        -1.6212e-01, -5.6805e-02, -2.3997e-01,  9.3349e-04, -3.5696e-04,
        -4.2847e-01, -6.1362e-02, -1.6281e-01, -1.1905e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.2696, -0.0940, -0.1854, -0.0555, -0.2524,  0.0521,  0.1180,  0.1352,
        -0.0312,  0.0368, -0.0410, -0.1041,  0.0688, -0.0855, -0.6098,  0.0766,
        -0.0266,  0.0133, -0.2260,  0.0205, -0.0670, -0.0119,  0.0235,  0.0763,
        -0.1587, -0.3747, -0.0292, -0.0726, -0.0438, -0.0203, -0.2147, -0.0455,
        -0.0108, -0.0665, -0.2119, -0.0356, -0.1010, -0.2171, -0.0711, -0.1115,
        -0.1008, -0.0092, -0.1041, -0.0121,  0.0249, -0.0084,  0.0077,  0.0393,
        -0.0362, -0.0145,  0.0294, -0.0372], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2156,  0.0273, -0.2376,  0.0263, -0.5438,  0.0037,  0.0702,  0.0977,
         0.0195, -0.0249,  0.0167,  0.0563, -0.0952, -0.3906,  0.0152, -0.1954,
        -0.5816, -0.2200, -0.2686,  0.0714, -0.1560, -0.1061, -0.1534, -0.3240,
         0.0681, -0.0159, -0.0133, -0.0344, -0.1060,  0.0067,  0.1553,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4678, -0.8607,  0.0585,  0.0541,  0.0763,  0.0479, -0.1919,  0.0176,
        -0.0283,  0.1443, -0.0652, -0.3926,  0.0707, -0.4187, -0.4067, -0.2107,
         0.1024, -0.3549, -0.5375, -0.4648, -0.1209, -0.0989, -0.1854,  0.2240,
        -0.0192,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1765, -0.0290,  0.1317, -0.1375,  0.0385, -0.6032, -0.8385, -0.1126,
        -0.1751, -0.0912,  0.0128,  0.1210,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0376, -2.0450, -0.0147, -0.2860, -0.4768, -0.1221, -0.0317, -0.0076,
        -0.0368, -0.0030,  0.1917, -0.1088, -0.3981, -0.5078, -0.3460, -0.1998,
        -0.0199, -0.1145,  0.0452,  0.0738, -0.1543,  0.0575,  0.1307,  0.1005,
         0.1125, -0.3071,  0.0325, -0.0670,  0.1669,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1667, -1.8690, -0.0152,  0.2451, -0.0885, -0.6028, -0.9828,  0.2072,
        -0.1559,  0.1466,  0.2626,  0.0805,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0401, -1.6039,  0.0128, -0.5641, -0.0038, -0.5302, -0.5039, -0.0882,
        -0.1714,  0.0205,  0.0323, -0.0943,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0710,  0.1306, -0.3645,  0.0396, -0.3990,  0.1350, -0.1504, -0.1317,
        -0.8234, -0.1062, -0.0662, -0.5362, -0.0723,  0.0736, -0.2123, -0.4499,
         0.0011, -0.0040, -0.0368, -0.3861,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1075, -1.5079, -0.1340, -0.2920, -0.8304, -0.0093, -0.1117,  0.1489,
        -0.1786, -0.0177, -0.0565, -0.2034, -0.1273, -0.0555, -0.0350,  0.0246,
        -0.0276, -0.1307, -0.1531, -0.0158, -0.1582,  0.0325,  0.0775,  0.0917,
        -0.0712,  0.0793, -0.0709, -0.1516, -0.1771,  0.0528, -0.0043,  0.0206,
        -0.2610, -0.2522, -0.0505, -0.0420, -0.1314,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2068,  4.0944, -0.2997, -0.3611, -0.0516,  0.1970, -0.0535,  0.1750,
        -0.0964,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4229,  0.1410,  0.2945, -0.6397, -0.1098,  0.0389, -0.4024, -0.4682,
        -0.0115,  0.0247,  0.0731, -0.2064, -0.6013, -0.5475, -0.0010,  0.1119,
         0.0341, -0.0610, -0.2456, -0.2307, -0.0299,  0.0056, -0.0366,  0.0123,
         0.0307,  0.0635, -0.1296,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3266,  1.8354, -0.0456,  0.3022,  0.0743,  0.2592, -0.0138, -0.0322,
         0.0781, -0.1600,  0.0295,  0.4093,  0.1051, -0.0466,  0.0209, -0.0108,
         0.2846, -0.0112, -0.0740, -0.0380,  0.0019,  0.2092,  0.3175, -0.0684,
         0.0126, -0.2206,  0.0043,  0.1385,  0.5473, -0.0031,  0.0557,  0.0902,
        -0.1290, -0.0090,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.0082, -0.0176,  0.0339, -0.0241, -0.2445, -0.0558,  0.0292,  0.0518,
         0.0436,  0.1546,  0.0150, -0.3353,  0.0535,  0.0015,  0.0490, -0.2561,
        -0.4303, -0.0030, -0.0506,  0.0045,  0.0348,  0.0190, -0.1790,  0.0125,
         0.0248, -0.0439, -0.0069, -0.0176, -0.0624, -0.0098,  0.0399, -0.3367,
        -0.3726, -0.0477, -0.0349, -0.1423, -0.2337, -0.1095, -0.0020, -0.0289,
        -0.0837, -0.2407, -0.0362,  0.0108, -0.1037,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5661, -0.0418, -0.2380, -0.0959, -0.3154, -0.5099, -0.9735,  0.0070,
         0.0217, -1.1061, -0.0985, -0.1893, -0.4339, -0.1369, -0.1362, -0.1388,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3026,  0.0962, -0.0077, -0.0687,  0.0494, -0.0096,  0.1342, -0.0805,
        -0.9509, -0.7510, -0.3072, -0.5076,  0.1518, -0.1795, -0.2725,  0.0279,
        -0.1349, -0.0054,  0.0263,  0.0179,  0.0049,  0.0667, -0.0442, -0.0086,
         0.0758, -0.1668, -0.1128,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0476, -0.0991, -0.2300, -0.0087, -0.0992, -0.0075, -0.0423, -0.0194,
        -0.0640,  0.0223,  0.0062, -0.0041, -0.0961,  0.0048, -0.0611, -0.0608,
        -0.0793,  0.0416,  0.0963, -0.0393,  0.0502, -0.1111, -0.0513, -0.4178,
         0.0289,  0.0283, -0.2213, -0.2414, -0.0149, -0.1156,  0.0038, -0.1172,
        -0.2143, -0.0008,  0.0051, -0.1148, -0.0315, -0.1612,  0.0212,  0.0218,
        -0.0447, -0.1669,  0.0037, -0.0130,  0.1410,  0.3070], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3038,  0.0304,  0.0232,  0.0382, -0.0694, -0.1101, -0.1296, -0.3382,
        -0.0049,  0.1240,  0.0054,  0.0768, -0.0101, -0.0901, -0.0573,  0.0355,
        -0.0145, -0.1338, -0.2402, -1.2920, -0.0820, -0.0866, -0.0520,  0.0360,
         0.0230, -0.0297, -0.2513,  0.2182,  0.0568, -0.2070,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3609,  0.4881,  0.2836, -0.0775,  0.2422,  0.1791, -0.0791,  0.4228,
        -0.7595, -0.1275, -0.2855,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1020, -2.3149, -0.0980, -0.2342, -0.2423, -0.1349, -0.2976, -0.3382,
         0.1757, -0.1180,  0.1082, -0.0064, -0.2734, -0.0977, -0.0294, -0.4576,
        -0.1243, -0.1544, -0.0107,  0.1993,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3105, -1.1151, -0.8154, -0.5688,  0.0603, -0.2742, -0.4080,  0.0400,
        -0.0038, -0.2978, -0.1655,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0991e-01, -1.4951e+00, -5.9783e-01, -7.9717e-01,  6.6245e-02,
         2.8988e-01, -2.9611e-02, -2.7335e-01, -3.6142e-01,  1.2359e-01,
         3.5066e-02, -1.4800e-01, -1.4551e-01,  1.0347e-02, -6.4700e-04,
        -1.8850e-01,  1.0128e-01, -4.4777e-02, -6.0056e-02, -6.6724e-02,
         3.9993e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0689, -0.8855, -0.6905, -0.1053,  0.0368, -1.5045,  0.3145,  0.0254,
        -0.2506, -0.0381, -0.0798, -0.0674, -0.0612,  0.0231, -0.1363, -0.0769,
        -0.0390, -0.0822,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6235e-02, -3.7433e-01, -6.9669e-01, -7.6910e-03, -4.7322e-02,
         1.0246e-01,  1.0618e-01,  2.7502e-02, -3.4022e-02, -7.4277e-02,
         6.5939e-02,  7.2947e-02, -3.7916e-02, -2.5435e-01, -4.2914e-01,
         2.9958e-04, -3.5410e-02, -1.4870e-01, -4.4290e-02, -7.3842e-02,
        -2.6709e-02, -1.3074e-01, -3.6871e-01, -3.3065e-02, -1.0397e-01,
        -2.3396e-03, -9.6526e-02,  1.5791e-02, -8.7202e-02, -1.6883e-01,
         6.2526e-02, -1.9525e-02, -1.3847e-02,  4.0944e-02, -9.8806e-02,
         1.6142e-02, -2.7899e-02, -7.1206e-02, -9.9703e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4998, -1.7829, -0.0597, -0.5087, -0.0136, -0.0023, -0.0493, -0.0585,
        -0.0595, -0.0108, -0.1953, -0.3806,  0.0065,  0.0278,  0.1166, -0.0296,
        -0.2522, -0.0324,  0.0489, -0.0859, -0.0231,  0.0455,  0.0543,  0.0038,
         0.0124, -0.0357, -0.1040, -0.1183, -0.2642, -0.5493, -0.0444, -0.1010,
        -0.2161,  0.4158,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.2765,  1.3062,  1.1786,  1.6563, -0.8502,  0.1454,  0.8313,  0.6013,
         1.1053,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0339, -1.1351, -0.1221, -0.1962, -0.0320, -0.5756,  0.0099, -0.0898,
        -0.0289,  0.0106, -0.1305, -0.0579,  0.0521, -0.1211, -0.1549, -0.3364,
        -0.0771, -0.7069, -0.0632, -0.0289,  0.0394,  0.0168, -0.0122,  0.0623,
        -0.1770, -0.0253, -0.1344, -0.0565, -0.0365,  0.0042, -0.0646, -0.1588,
         0.0468, -0.1294,  0.0986,  0.0496, -0.0017, -0.0528, -0.0136,  0.3219,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0056e-02, -2.7211e-01, -6.6504e-01, -8.6660e-02, -3.0277e-01,
        -1.1822e-01, -4.4809e-02, -8.0121e-02, -2.2957e-01, -1.7299e-01,
        -5.0256e-03,  7.1298e-03,  3.1474e-03,  4.9162e-03, -7.4962e-02,
         2.3544e-02,  4.9259e-02,  9.4569e-02, -1.3650e-01, -4.0561e-02,
        -4.6621e-02,  3.2309e-02,  3.5543e-02, -2.0608e-03,  2.5015e-02,
        -8.0425e-02, -7.3571e-02,  6.0245e-02, -2.3333e-01, -3.7999e-01,
        -8.3186e-03, -7.3560e-02, -4.2841e-03,  1.1480e-02,  1.2114e-02,
        -5.7986e-03, -1.4767e-01, -2.0693e-01, -1.4064e-02, -2.5122e-02,
         3.0456e-02, -2.8455e-02,  2.7160e-02,  4.0653e-02,  6.1699e-03,
         1.4384e-02, -4.8129e-02, -1.0314e-02,  2.1487e-04,  1.5682e-02,
        -3.8618e-02, -5.8515e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0133, -1.3658,  0.0916, -0.1763, -0.2129, -0.0245, -0.4900, -0.0723,
        -0.1403,  0.0032, -0.1429, -0.1324, -0.3068, -0.2949, -0.2029, -0.4714,
        -0.2119, -0.1032, -0.4505, -0.0726, -0.0144,  0.0369,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6625, -2.4319, -0.2869, -0.0870, -0.0873, -0.0058,  0.0559, -0.2604,
        -0.1256, -0.0101,  0.0280, -0.3133, -0.0516, -0.1460,  0.1183, -0.2273,
        -0.0824,  0.1133,  0.2260,  0.0709,  0.3818,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4243, -0.6541, -0.2612, -0.1245, -0.0071, -0.0113, -0.1185,  0.0297,
        -0.1103, -0.0183,  0.0066,  0.0293,  0.0241,  0.0563,  0.0405,  0.0126,
         0.0917,  0.0501, -0.0840, -0.0078,  0.0064, -0.0136, -0.0349,  0.0036,
        -0.0133,  0.0779,  0.0138, -0.0905, -0.0158, -0.0845, -0.2745, -0.3740,
        -0.0929, -0.2558, -0.0867,  0.0069,  0.0094,  0.0026,  0.0275, -0.0705,
        -0.6697, -0.0406,  0.0151,  0.0538, -0.1519,  0.0211, -0.0966, -0.0634,
        -0.0266,  0.0612, -0.0216, -0.0120,  0.0477,  0.0269, -0.0926],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0738,  0.0018,  0.0820, -0.0701, -0.0361, -0.0718, -0.2605, -0.0664,
        -0.0238, -0.0164, -0.0095,  0.0514,  0.0087, -0.0313, -0.1711, -0.0174,
        -0.0153, -0.0488,  0.0192, -0.2594,  0.0140, -0.0665, -0.1671, -0.0149,
        -0.0105,  0.0654, -0.0645, -0.3439,  0.0411,  0.0180,  0.0479, -0.0440,
        -0.0622, -0.3798,  0.0055, -0.1235, -0.0083, -0.1629, -0.1776, -0.1743,
        -0.0397, -0.0022, -0.1342, -0.0551, -0.0418,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8117,  1.4411,  0.5364,  0.5368, -0.1108,  0.0796,  0.1581, -0.0902,
         0.3119,  0.0689,  0.0063,  0.3616,  0.1277,  0.6585,  0.5348,  0.2704,
         0.0903,  0.3592,  0.1590,  0.1977,  0.3048, -0.0636,  0.2368,  0.0596,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1966,  0.1324, -0.0044, -0.0279, -0.0865, -0.0542, -0.3236, -0.5523,
         0.0791,  0.0592, -0.0479, -0.1049, -0.0307, -0.2323,  0.0621,  0.0735,
        -0.2881, -0.2659, -0.1371, -0.3447, -0.0713, -0.0662, -0.2434, -0.5320,
        -0.0362, -0.1726, -0.0749, -0.0876, -0.1501, -0.1960, -0.0989,  0.0064,
        -0.0538,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0939,  2.3737,  0.0714,  0.5894, -0.0085,  0.0563, -0.0610, -0.0735,
         0.7649,  0.0094, -0.0633,  0.0243,  0.0998,  0.3481, -0.1718,  0.0758,
         0.1180, -0.2100, -0.0416,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6902, -0.0339,  0.1685, -0.0993, -0.2817, -0.1400, -0.3188, -0.3191,
        -0.5426, -0.0349,  0.0636,  0.0028,  0.0403,  0.0127, -0.0584,  0.0247,
        -0.0572, -0.5447, -0.1346, -0.2082, -0.2358, -0.5931, -0.0192, -0.0990,
         0.0057,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3145, -0.8319, -0.4260, -0.0614, -0.0411, -0.5954, -0.9048,  0.0940,
        -0.1385, -0.1601,  0.1138, -0.0251, -0.2812, -0.0695, -0.0579,  0.1221,
         0.1244,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.4366,  0.0423, -0.1089, -0.0617, -0.0280,  0.0045, -0.0674, -0.0649,
        -0.0088, -0.2919, -0.1457, -0.1067,  0.0101, -0.0056, -0.0680, -0.0829,
        -0.3607, -0.8303,  0.0166, -0.5266, -0.0202, -0.8502, -0.6981,  0.0372,
        -0.1049, -0.0347,  0.0087,  0.0221,  0.0950,  0.0589,  0.2109,  0.2574,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2654, -0.6161, -0.7599, -0.0336,  0.0604, -0.0141, -0.4049, -0.6050,
         0.0086, -0.0037,  0.0293, -0.0692, -0.2370, -0.0292,  0.0142,  0.0247,
        -0.0964,  0.0102, -0.2751, -0.0208, -0.1869,  0.0317, -0.2790, -0.4388,
         0.1156, -0.0528,  0.0265, -0.0884,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3573,  0.2676, -0.0079, -0.0558,  0.1423,  0.3369,  0.5380,  0.1810,
         0.0616,  0.1689,  0.0466,  0.6230,  0.0252,  0.0548,  0.0505,  0.1997,
         0.1028,  0.6603,  0.1234, -0.0966,  0.0266,  0.0415,  0.0740,  0.0559,
         0.2174, -0.0218, -0.1669, -0.0484, -0.3712,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1706,  0.1653, -0.0479,  0.1074,  0.8849,  0.0375, -0.1040,  0.0047,
         0.0293, -0.0042, -0.2130,  0.0465,  0.2428,  0.8115, -0.1221,  0.2141,
        -0.1767,  0.5057,  0.1073,  0.1389, -0.1087,  0.0214,  0.3027,  0.0435,
         0.0565,  0.4652,  0.2675,  0.0517,  0.0751,  0.1885, -0.0404, -0.3201,
         0.2263,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1050, -1.6024, -0.0548, -0.2476, -0.0978, -0.2769,  0.0191, -0.0392,
        -0.0963, -0.3349, -0.2427, -0.0711,  0.0139,  0.0475, -0.0487,  0.0565,
        -0.1028,  0.0909, -0.4639,  0.1238, -0.2085,  0.0697,  0.0162,  0.3512,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3113, -0.7725, -0.5007, -0.0265, -0.1346, -0.1000, -0.0539, -0.1963,
         0.1309, -0.2787, -0.0391, -0.3138, -0.1466, -0.1125,  0.0421,  0.0163,
        -0.0306, -0.1640, -0.0602, -0.4937, -0.2461, -0.0325, -0.1821,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0434, -0.9906,  0.0378,  0.0701, -0.0563, -0.1843,  0.1151,  0.0910,
         0.0066, -0.0488,  0.0398, -0.0837, -0.0783,  0.0790, -0.0450, -0.1150,
        -0.4713, -0.0445, -0.0500,  0.0324, -0.0539, -0.1435, -0.0860, -0.0880,
        -0.3530, -0.0619, -0.0236, -0.0627, -0.1079, -0.5439, -0.4227, -0.1466,
        -0.0608, -0.0401, -0.0966, -0.1933,  0.0025, -0.2149,  0.2996,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3240,  0.5568, -0.0375,  0.0665,  0.5037, -0.0130, -0.0829, -0.0725,
         0.1190, -0.0416, -0.0399,  0.0172, -0.0312, -0.0178, -0.0365,  0.2921,
        -0.0743,  0.0200, -0.0030, -0.0563,  0.2289,  0.0311, -0.1146,  0.0776,
         0.4173,  0.3303,  0.1520,  0.1707,  0.0681,  0.2908, -0.0257, -0.0164,
        -0.0447, -0.0093,  0.0254, -0.0427,  0.0501,  0.2298, -0.0924,  0.0022,
         0.0077, -0.2249,  0.0360,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2258, -1.1916,  0.0236,  0.0205, -0.1060,  0.0326, -0.1678, -0.0377,
        -0.3512, -0.0795,  0.0389,  0.0158, -0.1562, -0.0038,  0.0132,  0.0321,
        -0.2380, -0.2551,  0.0163, -0.0634,  0.0283, -0.0347, -0.0733, -0.0064,
         0.0263, -0.0050,  0.0121,  0.0181,  0.0132, -0.0078,  0.0057, -0.0788,
        -0.3336,  0.0069, -0.0744,  0.0249, -0.0107, -0.1456, -0.1379, -0.0883,
        -0.1148, -0.0543, -0.0045,  0.0948, -0.0100,  0.0039, -0.0222, -0.0147,
         0.0223,  0.0098,  0.0745,  0.0054,  0.0244,  0.0326, -0.0390,  0.0855,
        -0.0030], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7576e-01, -2.8483e+00,  1.3957e-03,  6.4946e-02, -2.5168e-01,
        -1.5209e-01, -4.6265e-01, -5.8775e-01, -9.5304e-02, -9.5308e-02,
        -4.1014e-01, -1.0818e-01, -2.6585e-02, -1.6763e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6393e-01, -1.1742e+00,  2.5283e-02,  2.6637e-02,  1.5510e-01,
        -1.1281e-02,  4.9466e-02, -2.1794e-02, -1.6922e-01, -1.8786e-01,
        -3.9509e-01, -3.4863e-01,  1.6415e-01,  4.9702e-02, -2.3904e-01,
        -1.3694e-04, -7.2049e-02,  1.8259e-02, -4.2187e-02, -1.7095e-01,
        -7.7603e-01,  2.1502e-02, -2.4655e-01, -7.8603e-02,  1.4525e-01,
        -7.9397e-02, -2.2540e-02,  2.8239e-02, -4.8133e-02, -1.8740e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4732, -1.1855,  0.4878, -1.9158, -0.7144, -0.0475, -0.1301, -0.3216,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0900, -0.3790, -0.1113, -0.1569, -0.2341, -0.6186,  0.0428,  0.0734,
         0.0283, -0.0861, -0.0639, -0.0526, -0.3235, -0.4770, -0.0805, -0.0289,
        -0.0536, -0.0308,  0.0271,  0.0181,  0.0834,  0.0020,  0.0853,  0.0985,
        -0.4601,  0.0224, -0.1868,  0.0335, -0.3766, -0.5604, -0.1431,  0.1661,
        -0.1215,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2587e-01,  1.9248e+00,  8.6560e-01,  3.2588e-01,  7.8633e-03,
         2.7683e-01,  2.0156e-01,  8.4076e-02,  4.5057e-02,  1.4159e-01,
        -1.6925e-02,  3.0626e-02,  1.6944e-02, -3.2758e-02, -2.3212e-02,
        -2.9986e-02, -5.6366e-02, -8.6989e-02,  3.6912e-02, -7.3457e-02,
         2.1296e-02, -4.1545e-02,  2.1458e-02, -3.0983e-02, -2.2981e-02,
         5.0657e-02, -6.2517e-03,  3.7163e-01, -5.8256e-02, -2.0970e-02,
         5.9520e-02,  1.7926e-01,  3.2429e-01,  4.2664e-01, -1.0342e-01,
         5.0652e-02,  1.4196e-01,  5.8884e-02,  1.8600e-03, -1.6271e-02,
        -1.2606e-02,  4.2434e-02, -5.3187e-02,  1.7059e-01, -1.3460e-01,
         3.0322e-02,  2.1780e-02,  9.2943e-02,  7.2716e-02,  5.7160e-02,
        -2.2186e-02,  1.9596e-01,  7.8508e-02,  1.3504e-01,  8.0045e-02,
        -4.2272e-02,  3.0325e-02,  1.0954e-03,  6.7603e-02,  1.2772e-01,
         1.3907e-01,  1.5259e-01,  4.8074e-01,  1.1471e-01, -6.1212e-02,
         9.6856e-02,  4.3125e-02, -1.0609e-01,  1.5415e-02,  1.9909e-01,
         3.7433e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2046, -0.0442, -0.1470, -0.0831, -0.2606, -0.0722, -0.7463, -0.1277,
        -0.0220,  0.1079, -0.0499,  0.0481,  0.0768,  0.0441, -0.0098,  0.0114,
        -0.0228, -0.0066, -0.1462, -0.7144, -0.3241, -0.0110, -0.0939, -0.2866,
         0.0085, -0.2662, -0.1677,  0.0288,  0.0234,  0.0009, -0.0441, -0.1221,
        -0.2910, -0.4390, -0.1117,  0.0614, -0.6651, -0.0753, -0.0214,  0.0436,
        -0.0347, -0.2688,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0697,  1.6665, -0.2058, -0.0064, -0.0242, -0.0498, -0.1334, -0.1270,
         0.0810,  0.2777,  0.1325,  0.1901,  0.3994,  0.0471, -0.0299,  0.0068,
         0.0620,  0.3314,  0.5034,  0.0205,  0.0924,  0.0239,  0.0418,  0.0270,
        -0.0461,  0.4058,  0.1889,  0.1457,  0.5213,  0.0721,  0.0721,  0.0228,
        -0.0370, -0.0044,  0.0282, -0.0782,  0.1938,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6567, -2.2978, -0.1512, -0.4936, -0.1657,  0.1653,  0.1999, -0.1386,
        -0.2038, -0.0128, -0.1187, -0.0116, -0.2695, -0.2308,  0.7421,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1701, -0.3884,  0.1898, -0.1767, -0.4149, -0.8765, -0.0936, -0.2871,
        -0.7224,  0.0059,  0.1131, -0.1712, -0.2327,  0.0351,  0.3760, -0.0858,
        -0.1078, -0.0221,  0.1112, -0.0252,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1958,  0.0856, -0.0621,  0.0613, -0.4609, -0.0792,  0.0062,  0.0201,
         0.0167, -0.0086, -0.1657, -0.1963, -0.0129,  0.0231, -0.1077, -0.1486,
         0.0082, -0.0924,  0.0627, -0.0900, -0.0331,  0.0026, -0.0381, -0.0143,
         0.0419, -0.1761, -0.2826, -0.0188,  0.0225, -0.1316, -0.3074, -0.0370,
        -0.0530, -0.0177, -0.1435, -0.2292, -0.0264, -0.1000, -0.0262,  0.0142,
         0.0757,  0.0388,  0.0070,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5479, -0.0055,  0.0072, -0.0404, -0.1171, -0.0816, -0.2528, -0.7140,
        -0.1409,  0.0976,  0.1430,  0.0099, -0.0777, -0.2162, -0.6767, -0.0566,
         0.0725,  0.0541, -0.0574, -0.1360, -0.4283, -0.0972,  0.0889,  0.2407,
        -0.1057,  0.0328, -0.3088, -0.0655, -0.3598,  0.0780, -0.0345, -0.2637,
        -0.0946, -0.2266, -0.2819, -0.0682,  0.1414, -0.3188,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1384, -2.8232, -0.7194, -0.7963,  0.2482, -0.6558, -0.0179,  0.1398,
        -0.0659, -0.4531, -0.0071, -0.0802, -0.2021, -0.0447,  0.1251, -0.0781,
        -0.0203, -0.3528, -0.1497,  0.1517,  0.0714, -0.1009,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9172e-02, -5.4460e-01, -6.7537e-02, -3.0909e-02,  7.0367e-02,
        -2.5600e-02,  1.9728e-01,  1.1968e-01,  9.2677e-02, -4.8658e-01,
        -8.3388e-02,  4.4404e-03, -1.2989e-01, -1.7344e-03, -1.9202e-01,
        -1.4752e-01, -4.6103e-01, -1.1907e-01, -3.1759e-02, -6.2884e-01,
        -1.0320e-01, -5.3152e-05, -3.5263e-01, -1.5292e-01,  8.8998e-03,
        -8.0800e-04, -9.3030e-02, -3.2932e-02, -5.8286e-02, -2.3599e-01,
        -5.7195e-02,  6.7925e-02, -1.0941e-01,  1.3727e-02, -4.3784e-02,
        -5.9667e-01,  3.1030e-03, -1.3290e-01,  1.8656e-01, -4.0673e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2887, -0.0456, -0.0657,  0.0610, -0.0361, -0.0266,  0.0158,  0.0609,
        -0.4138,  0.0085, -0.0386, -0.0354, -0.0904, -0.7119, -0.5270, -0.1180,
        -0.0290, -0.0661, -0.2016, -0.0090,  0.1265,  0.1202,  0.0863, -0.0169,
        -0.0675, -0.2832, -0.1411, -0.3775, -0.6153, -0.1638,  0.1900, -0.1537,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0909, -2.0187, -0.3106,  0.0607, -0.1500,  0.1605, -0.0572, -0.2794,
        -0.5747, -0.0029,  0.0340, -0.1760,  0.1337, -0.0515,  0.1179, -0.1161,
        -0.3333, -0.0438,  0.0139,  0.0347, -0.0170, -0.0608,  0.0342, -0.2880,
         0.0066,  0.0121, -0.1579, -0.0609, -0.0155, -0.1290,  0.0052, -0.0381,
        -0.0655, -0.1349, -0.1399,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.6709, -0.2714, -0.6724, -0.5845,  0.0421, -0.0286, -0.6234, -0.6349,
        -0.2395, -0.6068,  0.2901, -0.1330, -0.0215, -0.3933, -0.0185, -0.0713,
        -0.1464,  0.0028, -0.0635, -0.0115,  0.0192,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2612, -2.3280, -0.1231, -0.4232, -0.5044, -0.0046, -0.1094, -0.3014,
        -0.2970,  0.0471,  0.0245,  0.1060, -0.0693, -0.0106, -0.2582, -0.0379,
        -0.4034, -0.0644,  0.0465,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0896,  0.4163,  0.5933,  0.0276,  0.4426,  0.0700, -0.0603,  0.1117,
        -0.0323,  0.5158,  0.7346,  0.0103,  0.3687,  0.8063,  0.0860,  0.3010,
        -0.0792,  0.1316, -0.0897, -0.0598,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2130e-01, -1.2587e+00, -1.0157e-01, -6.6808e-01,  2.5829e-02,
         4.5287e-03, -6.4832e-01, -3.3010e-01, -2.4345e-02,  1.4826e-02,
         2.8546e-02, -1.0352e-01,  3.0534e-02, -2.2961e-01, -1.1825e-01,
        -1.6552e-01, -5.1247e-01, -4.4025e-04, -1.1607e-01, -2.5841e-01,
         4.4696e-02,  6.5906e-02, -3.7221e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1439, -1.1901, -0.4755, -0.5013,  0.0099, -0.2940, -0.0053, -0.2561,
        -0.3607, -0.6034, -0.0814, -0.2750, -0.0155, -0.4054, -0.0117, -0.0678,
        -0.0187, -0.0722, -0.1238, -0.0125, -0.3207, -0.0091, -0.1326, -0.1353,
         0.0633,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8424e-01,  1.7830e+00,  1.1437e-02,  2.0656e-01,  1.1287e-01,
         7.2815e-02,  6.4907e-02,  1.8261e-01, -7.6992e-02,  4.6972e-02,
         2.3369e-01,  1.6687e-03,  1.4098e-02,  1.1300e-02, -7.4960e-02,
        -4.2641e-03,  4.1792e-02,  4.4494e-03, -9.8852e-02,  1.2493e-01,
         5.2490e-01,  3.9393e-01,  2.0029e-01,  1.2589e-01,  1.5536e-01,
         1.3876e-01,  1.1806e-01,  1.0290e-01,  1.9207e-01, -2.5626e-03,
         4.3050e-02, -7.2151e-02,  3.3304e-02,  1.4073e-01, -2.5189e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1701, -0.0263, -0.0381, -0.3350, -0.0553,  0.0188, -0.3706, -0.1073,
        -0.7517, -0.0444,  0.0851, -0.1171, -0.1359, -0.3433, -0.2498, -0.6673,
        -0.1330, -0.3556,  0.0545,  0.2275, -0.0728,  0.2142,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3380,  0.2747, -0.1148,  0.1397, -0.0008,  0.1646,  0.0443,  0.0036,
         0.1228,  0.4782,  0.0186,  0.0960,  0.0455, -0.0947,  0.2256,  0.4175,
         0.1282, -0.0427, -0.0176,  0.0060,  0.0576,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0702, -3.4999,  0.1090, -0.1118, -0.0862, -0.0509, -0.5283, -0.7823,
         0.1123,  0.0906, -0.2130, -0.0886,  0.1380, -0.3227, -0.0617, -0.2675,
         0.1454,  0.1406, -0.1284,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7603e-02, -6.3938e-02, -1.4205e-01, -3.6269e-02,  4.6555e-04,
        -2.7975e-01, -8.5225e-03, -2.1783e-02,  1.4510e-02, -2.5915e-02,
        -4.6542e-02, -2.2649e-02, -6.4305e-02, -1.2565e-01,  4.0991e-02,
         2.2066e-03, -1.4573e-01, -2.4905e-01, -1.4428e-02, -8.6922e-02,
        -5.1546e-03, -5.3062e-02, -2.2810e-01,  4.6642e-02, -2.2837e-01,
        -3.9863e-02, -4.6014e-02, -1.6587e-01, -1.0100e-01, -5.5643e-01,
        -1.0591e-02, -1.5215e-01, -3.3648e-01, -3.8804e-01,  2.7024e-02,
        -4.4751e-02, -5.1691e-02,  2.5754e-02, -4.0922e-02, -1.2245e-01,
         1.6742e-02,  4.5376e-02,  3.0125e-02, -2.4259e-02, -8.3521e-03,
         1.3174e-01,  1.4951e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2089, -1.2474, -1.0142, -0.0317, -0.0192, -0.1205,  0.0346, -0.0594,
        -0.1767, -0.0221,  0.0545, -0.0882,  0.0054,  0.0197,  0.0766,  0.0869,
        -0.2650, -0.0165,  0.0234, -0.0881,  0.0275, -0.0594, -0.3055, -0.4346,
        -0.0726,  0.0992, -0.0677, -0.0019, -0.0707, -0.0184,  0.0736,  0.0616,
        -0.0024, -0.0898,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4710,  1.9079,  0.0479,  0.0066,  0.0344,  0.3376, -0.0803, -0.0892,
        -0.0144,  0.0830, -0.0511,  0.0131,  0.2419,  0.5247, -0.0453,  0.0744,
         0.0841,  0.1494,  0.4658,  0.6408, -0.0233,  0.0360,  0.0708, -0.0592,
         0.0276, -0.0462, -0.0628,  0.0409,  0.0529,  0.1207, -0.0332,  0.0060,
         0.1541, -0.1312,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.2504,  2.1045, -0.1573, -0.0699,  0.2283,  0.4542,  0.0209,  0.0165,
        -0.0139,  0.4437,  0.0697,  0.0298,  0.1938,  0.5400,  0.1156,  0.1952,
        -0.0615,  0.3201,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1527e-01,  6.7591e-02,  5.0672e-02,  9.6241e-02, -2.6929e-02,
        -5.1441e-02, -3.7761e-02,  4.3552e-02,  7.1146e-01,  9.9155e-01,
        -9.6818e-02,  5.7354e-03,  1.6444e-01,  1.1744e-01, -6.3407e-04,
         2.0641e-01,  1.0117e-01, -1.0494e-02,  8.8996e-02,  1.0513e+00,
        -4.7809e-02, -6.8498e-03,  1.3539e-01, -2.1074e-03,  5.6585e-02,
        -2.9681e-02,  2.3940e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2626, -0.0205,  0.0992,  0.0512, -0.1329,  0.0160,  0.0569, -0.3140,
        -0.8721,  0.1661, -0.0051,  0.0992, -0.7511, -0.0978, -0.0315, -0.0122,
        -0.0751, -0.0673, -0.2064, -0.6872,  0.0159,  0.0434, -0.1713,  0.0914,
         0.1170, -0.2045,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0546, -1.3861, -0.1013, -0.4243, -0.7347,  0.0120, -0.1055, -0.1171,
        -0.4142, -0.3128, -0.4192, -0.2712,  0.0410, -0.1043, -0.1251, -0.3832,
        -0.1524, -0.0538, -0.1500,  0.0486,  0.3932,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2936, -0.9326, -0.8056, -0.5131, -0.3598,  0.0215, -1.2156,  0.0133,
        -0.0522, -0.0222,  0.0013, -0.2200,  0.2632,  0.0085, -0.4296,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1656, -0.0755, -0.0086,  0.0461, -0.1595, -0.0604, -0.0933, -0.1708,
         0.0791, -0.0909,  0.0344, -0.0762, -0.9986, -0.4754,  0.0529,  0.1205,
        -0.0500, -0.2086,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0691, -1.7375, -0.7739, -0.7190,  0.0652, -0.3664,  0.0823, -0.0892,
        -0.0340, -0.2447, -0.5147, -0.2102,  0.0212, -0.3609, -0.0348,  0.0669,
         0.0235, -0.1075,  0.1887,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1545, -0.4620, -0.3022,  0.0203, -0.1111, -0.1100,  0.1498,  0.0414,
        -0.0553, -0.0078,  0.0195,  0.0023, -0.1099,  0.0278, -0.0477, -0.0214,
        -0.4564, -0.0277, -0.0395,  0.0394, -0.3971,  0.0598, -0.1757,  0.0484,
        -0.3627, -0.0298, -0.1460,  0.0098, -0.0880, -0.1312, -0.0476,  0.1745,
        -0.2395, -0.0312, -0.1239, -0.0336, -0.2376,  0.1106, -0.0194, -0.0275,
        -0.0077,  0.0104,  0.2707,  0.0230], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2139, -1.6621, -0.1685, -0.3018, -0.2704, -0.0858, -0.0988, -0.0368,
        -0.2863, -0.0751, -0.1281,  0.0865, -0.0191, -0.0378,  0.0462,  0.0590,
        -0.0100, -0.5874, -0.0341, -0.1926, -0.0908,  0.0395, -0.0384,  0.0141,
         0.1254,  0.0061,  0.0156, -0.2385,  0.0836,  0.0125, -0.0760, -0.0914,
         0.0386,  0.0296, -0.0088,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3520,  2.5813,  0.0599,  0.4981,  0.1281,  0.0220,  0.5928,  0.4172,
        -0.1499, -0.0794,  0.0358,  0.2147,  0.5450, -0.1028, -0.1591,  0.2764,
         0.0337,  0.0178,  0.0061,  0.0238, -0.1177, -0.0401, -0.2701, -0.2678,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5862e-01, -1.8939e+00, -4.9406e-02, -1.5124e-01, -7.3409e-02,
         5.5509e-02, -2.9195e-02, -1.6631e-01, -6.3910e-01,  6.5045e-02,
        -1.0642e-01,  9.9238e-04,  8.6128e-03, -2.0759e-01, -2.5764e-01,
         1.2599e-02,  3.9139e-02, -5.0491e-02, -2.3485e-01,  1.0242e-01,
        -6.0847e-02, -2.7384e-01,  3.8452e-02, -1.2014e-01, -5.3383e-02,
        -2.4647e-01, -8.0843e-02, -1.0342e-01, -4.9410e-02, -1.4565e-01,
        -3.0401e-01, -2.7056e-02, -1.2182e-03, -4.0649e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4335e-01,  4.6245e-02, -5.8375e-02,  1.8741e-02,  6.0146e-02,
         5.8338e-02, -3.1762e-01,  6.6449e-02,  6.3295e-02,  2.1386e-02,
         6.0469e-02, -1.8077e-02, -6.3291e-01, -1.0428e-01, -4.1479e-01,
        -9.3461e-01,  4.8953e-02,  4.1042e-02, -2.0195e-04,  9.5086e-02,
         8.8537e-02, -1.0391e-01, -3.5352e-02, -3.9795e-01, -2.6407e-01,
        -1.0806e-01, -2.2556e-01,  7.0155e-03, -1.6971e-03, -1.4015e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.2804,  0.1886, -0.1727, -0.9083, -0.2546,  0.0775, -0.1664, -0.3181,
        -0.4876, -1.2280,  0.3446,  0.4608, -0.1606,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1084,  0.1055,  0.3318,  0.0203, -0.0334, -0.0717, -0.2286, -0.3453,
        -0.7450, -0.0285,  0.0814, -0.4004,  0.0320, -0.2202, -0.7320, -0.0091,
        -0.0514, -0.1422,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5581, -0.6042, -0.6999, -0.3112, -0.2353,  0.2893,  0.1455, -0.4381,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1816, -0.8008,  0.0967, -0.0831,  0.0271, -0.0612, -0.0896, -0.0726,
         0.0257,  0.0234,  0.0234,  0.0588, -0.1145, -0.0035, -0.0280, -0.0803,
         0.0011, -0.0297,  0.0374,  0.0037,  0.0742, -0.0540, -0.1817, -0.1032,
         0.0357, -0.0670, -0.0359, -0.0704, -0.4100, -0.1222,  0.0151, -0.3248,
        -0.0866, -0.8480, -0.0344, -0.3545, -0.1655, -0.0425,  0.0583, -0.0215,
        -0.0908, -0.0601, -0.0719,  0.0217,  0.1385, -0.0645,  0.2158],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3811,  0.9164, -0.5562,  1.0075,  0.0602,  0.2311, -0.0663,  0.0041,
        -0.1134,  0.2845, -0.0123,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3022, -1.2862, -0.1883, -0.7558, -0.1125,  0.0879, -0.2407, -0.9387,
        -0.0851, -0.4948, -0.0407,  0.0691, -0.3490,  0.0063,  0.1214, -0.0755,
        -0.1059,  0.1395,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0236, -0.5821, -1.4435,  0.0798,  0.2734,  0.2111, -0.2471,  0.0478,
        -0.5486, -0.1717, -0.1899, -0.1179, -0.1479, -0.1236,  0.0362,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0175, -2.0998, -0.1950, -0.3661,  0.0774, -0.6202, -0.7849,  0.2763,
        -0.1984, -0.2609, -0.4325,  0.1261, -0.2319, -0.1081,  0.2041,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5776,  0.0556,  0.0160,  0.0113,  0.0131,  0.0442, -0.0109,  0.0104,
        -0.0971, -0.1743,  0.0126, -0.6198, -0.5972, -0.0659,  0.0407, -0.0396,
        -0.3414,  0.3451, -0.0550, -0.0439, -0.4208, -0.5939,  0.0276,  0.0665,
        -0.0206,  0.1674,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3767,  0.1460, -0.1377, -0.0097,  0.1991,  0.3056, -0.0205, -0.8806,
        -1.5918, -0.1296,  0.0705,  0.0168, -0.0991,  0.0317, -0.0559,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0383, -1.2830, -0.1204, -0.4162, -0.2887, -0.1393, -0.3354, -0.1261,
        -0.1260, -1.0331,  0.1233,  0.0334, -0.1818, -0.0897,  0.1414, -0.0226,
        -0.0704, -0.0311, -0.2113, -0.1897, -0.2037, -0.0458,  0.0365,  0.0910,
         0.1089,  0.1774,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9133, -0.0622,  0.1153,  0.2930,  0.0463,  0.0055, -0.0500, -0.1793,
        -0.1665, -0.1148, -0.5295, -0.0779, -0.1231, -1.2233, -0.2995, -0.3144,
        -0.1395, -0.4499,  0.1445, -0.3096,  1.5688, -0.1295,  0.4967,  1.2953,
         2.3242, -1.0374,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.3059, -1.5730,  0.2105,  0.0360,  0.0157,  0.0498, -0.2766, -0.2174,
        -0.0663, -0.2986, -0.1497, -0.1861, -0.3533,  0.0502, -0.1961,  0.0249,
        -0.0177, -0.0170, -0.0589,  0.0221,  0.0822, -0.0850,  0.0643, -0.1667,
        -0.3959, -0.0174, -0.1006, -0.0059,  0.0155, -0.0067,  0.0189, -0.2108,
         0.0805,  0.0868,  0.0054,  0.0484,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1135, -0.5679, -0.2847,  0.0604, -0.2972,  0.0166, -0.2811, -0.4036,
         0.0093, -0.0085, -0.1060,  0.0139, -0.0097,  0.0470, -0.0523, -0.0806,
         0.0297, -0.0203,  0.1318,  0.0720,  0.0014, -0.5553, -0.0939,  0.0260,
        -0.2774, -0.0162,  0.0373, -0.1336, -0.3842,  0.0828, -0.0434, -0.2389,
        -0.2162,  0.1079, -0.0359,  0.0665,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2614, -0.1455, -0.2047, -0.1518, -0.1260, -0.6478, -0.1179, -0.0707,
        -0.0614, -0.0079, -0.1111, -0.3016, -0.4115, -0.0225,  0.0816,  0.1511,
        -0.2065, -0.0227, -0.0418,  0.0393,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3495, -0.2704, -0.0157, -0.2367, -0.0052, -0.0008,  0.1041, -0.0056,
         0.0180, -0.2443, -0.0256, -0.0306, -0.0307, -0.0969, -0.4300, -0.1964,
        -0.0946, -0.2263, -0.0925, -0.0443,  0.0736, -0.0517,  0.0319,  0.0909,
         0.0227, -0.0172, -0.0485, -0.0813, -0.0371, -0.1635, -0.4986,  0.0558,
         0.0172, -0.3509,  0.0747, -0.1347,  0.2516,  0.0336,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8140e-01, -8.9685e-01, -5.3185e-01, -4.4104e-01, -1.9273e-02,
         5.9862e-03,  2.0545e-03,  6.1068e-02, -1.2024e-02,  1.5863e-02,
        -2.3866e-01, -2.2040e-01, -4.6607e-03,  3.9322e-02, -5.3960e-02,
        -2.2820e-01,  4.8275e-02, -2.4409e-01,  1.4737e-02,  2.3025e-02,
         5.5390e-02,  6.4434e-02,  8.0621e-02,  6.1518e-02,  8.2689e-02,
        -3.6121e-02, -2.0012e-02, -1.2497e-01, -3.8460e-01, -2.1080e-02,
        -8.6738e-04, -2.3279e-02,  3.5009e-03, -2.2107e-02, -9.4955e-02,
        -1.2038e-02, -3.8540e-02,  2.5648e-02,  2.1561e-02,  7.5288e-02,
        -1.6196e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8652e-01, -3.1787e+00, -1.4324e-01, -8.6429e-02, -1.5359e-01,
        -4.2624e-01, -1.4634e-01, -9.0067e-02, -9.6464e-02,  3.0224e-02,
         8.2627e-04, -3.1095e-01, -3.9912e-01, -2.0013e-02,  8.2406e-02,
        -4.6833e-01, -7.7052e-02, -5.8604e-01,  3.7400e-02,  7.1284e-03,
        -1.5362e-01,  4.9324e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6623e-02,  9.4780e-03, -3.3596e-03,  2.8182e-02,  1.1741e-01,
        -1.3690e-01, -7.2502e-03,  3.6278e-02, -7.3407e-02,  2.1263e-02,
        -1.1623e-02, -2.1989e-01, -2.2665e-04, -8.6158e-02, -7.6241e-02,
        -2.5388e-01,  5.8190e-02, -1.2003e-01, -2.0558e-02, -2.2230e-01,
        -3.1991e-01,  1.0100e-01,  4.2414e-02, -2.0088e-01, -3.2303e-02,
         6.1180e-02, -2.3706e-02, -1.8318e-01, -2.7046e-01,  1.0373e-02,
        -1.2130e-01, -1.4580e-01, -6.3747e-02, -4.9027e-02, -6.3403e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3685, -0.2554, -1.0155, -0.2687, -0.3669, -0.0962,  0.0979,  0.0300,
        -0.0169, -0.0392,  0.0159,  0.1103,  0.0123,  0.0774,  0.0836, -0.0691,
         0.3980, -0.1505, -0.5436, -0.1474, -0.3668, -0.0394, -0.0582, -0.4026,
        -0.0434,  0.0853, -0.4868,  0.1212, -0.0280,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5319, -2.9769, -0.6254, -0.3088, -0.3563, -0.3763, -0.0142,  0.0769,
        -0.0387, -0.1453, -0.1410, -0.2448, -0.0948, -0.0591, -0.0156, -0.2945,
        -0.1548, -0.0141, -0.0397,  0.1079, -0.0525, -0.0193, -0.0198,  0.1024,
         0.2916,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2534,  0.0378,  0.0866,  0.1232, -0.6450, -0.0683, -0.1755, -0.5112,
        -0.6154, -0.1197,  0.0347,  0.0749,  0.0172,  0.0238, -0.4153, -0.4625,
        -0.0293, -0.1448,  0.0110,  0.0236, -0.0686,  0.1105,  0.0179,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2750e-02, -6.6349e-02, -6.8227e-03, -2.7904e-02,  6.1184e-02,
         2.1129e-01,  3.6173e-02,  2.4899e-02,  3.3300e-02, -1.0031e-02,
        -6.5000e-03, -4.3752e-02, -1.7494e-01, -3.3677e-01,  4.8209e-02,
        -8.6446e-02, -6.0467e-02, -1.9897e-01, -8.2612e-02, -5.3016e-02,
        -9.6500e-02, -4.8707e-02, -6.8763e-02, -4.9872e-04, -6.3018e-02,
         1.8570e-02, -6.1538e-01,  5.9798e-03, -3.4658e-01, -5.9638e-02,
        -1.3875e-01, -2.1197e-02, -5.7599e-02, -2.0866e-02, -9.6188e-02,
         1.5921e-02, -8.1758e-03,  3.0088e-02, -1.4396e-03,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0955, -0.4669,  0.0035, -0.6890, -1.7491, -0.1213,  0.4065, -0.5751,
        -0.7408,  0.0350, -0.0331, -0.4442,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 7.8599e-03, -5.1487e-01, -1.6747e+00, -1.8801e-02, -7.2696e-02,
        -1.7738e-01, -2.0268e-01,  1.6669e-01, -8.7022e-02, -1.7788e-01,
        -1.5362e-02,  7.1569e-02, -1.8716e-02, -1.9364e-03, -1.0880e-01,
         7.5430e-02, -1.4546e-01, -2.3544e-01, -1.1756e-02, -5.6994e-02,
        -1.4354e-01, -9.9198e-02,  2.1422e-02, -2.3568e-02,  8.4759e-03,
         1.2229e-02,  9.7895e-03, -5.5863e-03, -4.3907e-02, -9.5064e-02,
         7.3513e-02, -2.6345e-02, -1.8950e-01,  6.3219e-04,  3.4608e-02,
        -4.5630e-02, -2.8271e-01, -1.6270e-02,  3.5521e-02, -1.7452e-04,
         4.1999e-02,  2.1251e-02,  1.4172e-02, -2.8316e-03,  9.1069e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8153e-02,  3.1913e+00,  2.0947e-01,  5.6953e-01, -1.4782e-01,
        -3.4665e-03, -9.7820e-02,  1.9512e-01,  7.9838e-01, -1.7769e-01,
        -4.5283e-02,  2.6088e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4205,  3.2292,  0.0450,  0.4597, -0.0533,  0.1882,  0.1042, -0.1660,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4415, -1.4654, -0.1649, -0.1241, -0.4147,  0.0856, -0.0654, -0.0872,
        -0.1122, -0.5187, -0.0566, -0.0839, -0.0944, -0.0105, -0.0137,  0.0531,
         0.0620, -0.0554,  0.1643, -0.0129,  0.0283,  0.0807, -0.0365,  0.0463,
        -0.0828, -0.0150,  0.0343, -0.0767, -0.0857,  0.0163, -0.0309, -0.0495,
         0.0033,  0.0306, -0.0715, -0.0095,  0.0036, -0.0159, -0.0167,  0.0224,
        -0.0411, -0.0328, -0.0078, -0.1161,  0.1105,  0.3734, -0.4921, -0.1592,
        -0.2606, -0.3457, -0.1931, -0.0321, -0.1682,  0.0137,  0.0115, -0.0527,
        -0.1520, -0.0187, -0.0206,  0.0340,  0.2152], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1967, -0.0032,  0.0253, -0.1035, -0.2343,  0.1340, -0.1422, -0.0251,
        -0.0731, -0.0565, -0.0527, -0.3397,  0.0021, -0.2313, -0.5208,  0.0078,
        -0.0328,  0.0564, -0.2600, -0.1499, -0.3888, -0.3198, -0.1230, -0.0231,
        -0.2399, -0.0863, -0.0328, -0.1077, -0.1322, -0.1650,  0.0555,  0.0792,
        -0.4239,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5978, -0.3597, -1.2119, -0.5606,  0.0163,  0.0545, -0.4859,  0.0446,
         0.0763, -0.0257,  0.0932, -0.0372, -0.1608,  0.0227, -0.0938,  0.2727,
         0.1505, -0.5350, -1.2419,  0.1116,  0.1807, -0.2191, -0.0750, -0.2348,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1836, -0.6595, -1.0348, -0.3201, -0.2298, -0.6254, -0.1499, -0.2932,
        -0.0459, -0.2412, -0.5002, -0.2295, -0.5543,  0.0452, -0.0893, -0.0488,
         0.0580,  0.0184,  0.1209,  0.1627, -0.1569, -0.0187,  0.0894, -0.1438,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0679,  0.1863, -0.1735, -0.4821,  0.0593, -0.0168,  0.0607, -0.2707,
         0.0135,  0.0546,  0.0166, -0.0434, -0.0545, -0.0625, -0.4556, -0.4184,
        -0.3507, -0.0310, -0.1117, -0.0793, -0.4920, -0.0418, -0.1867, -0.1455,
         0.0290, -0.0257, -0.2894, -0.0373,  0.0493, -0.0056, -0.1116, -0.0569,
         0.3180,  0.3380,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4555,  0.1201,  0.0137, -0.0158, -0.3540,  0.0609, -0.0756, -0.0264,
        -0.1204, -0.0411,  0.0243,  0.0702,  0.0521, -0.5859, -0.2046,  0.1965,
        -0.5575, -0.0227, -0.3642,  0.0738, -0.2187, -0.4839, -0.0641, -0.0149,
        -0.1221,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3277,  0.1178, -0.1889, -0.0283, -0.0571, -0.2503, -0.6821, -0.2901,
        -0.4515, -1.5091,  0.1715, -0.1613, -0.0561, -0.0164, -0.0018, -0.4042,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2827, -1.8776, -0.1174, -0.4956, -0.1627, -0.1681, -1.1185, -0.0430,
         0.0869, -0.0058,  0.0600, -0.0532, -0.1116,  0.0508, -0.0439,  0.0297,
        -0.1610, -0.2342, -0.1170, -0.0502,  0.0442, -0.0422, -0.0988,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5866, -0.4550, -0.2260, -1.2024, -1.3048, -0.3308, -0.0881, -0.0997,
        -0.1953, -0.0191, -0.0797,  0.0845,  0.0928, -0.0070,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.0208, -0.4831, -0.2766, -0.5464, -0.0660,  0.0648, -0.3808,  0.1411,
        -0.1853, -0.1562, -0.7023, -0.6152, -0.0635, -0.0809, -0.0517,  0.1023,
        -0.0222, -0.6622,  0.0517, -0.5515, -0.0757,  0.0741, -0.0719, -0.0607,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5440,  0.2379, -0.4303, -0.1365, -0.9075, -0.1084, -1.0258, -0.0245,
         0.0237,  0.1214, -0.3598,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0970, -0.0143, -0.0516, -0.0311,  0.0031, -0.3864, -0.7897, -0.0040,
        -0.3445, -0.0602, -0.1272,  0.0448, -0.0628,  0.1250, -0.2888, -0.0832,
        -0.2113, -0.3757, -0.1950, -0.2154, -0.2330,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2915e-01, -1.2725e-02,  1.6134e-01,  1.5906e-01, -1.1819e-01,
         9.0883e-02,  1.8542e-01, -1.1671e+00, -6.9975e-02, -4.0591e-01,
        -4.5370e-02, -9.3041e-04,  3.8566e-02, -3.9832e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1739,  0.2980,  0.0449, -0.0089, -0.0933,  0.0275,  0.0040, -0.0184,
         0.0695,  0.0690, -0.2049, -0.6972, -1.7555,  0.0745, -0.0645, -0.2195,
        -0.2578, -0.0171, -0.1721, -0.0985, -0.2460,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8053e-01,  2.7177e+00, -2.2554e-01,  4.9659e-01, -5.3513e-01,
         5.2487e-01,  1.8838e-01,  1.2825e+00, -2.3673e-03,  1.8300e-01,
         4.1661e-01, -2.9497e-01,  2.2644e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0743e-02, -7.8567e-01, -9.4274e-02, -8.8721e-02, -6.5605e-02,
        -1.2067e-01, -2.9830e-01, -5.7748e-01, -2.7116e-01,  3.8081e-02,
         4.3976e-02,  3.0878e-02, -3.0622e-02,  3.4486e-02,  1.8759e-02,
        -3.6135e-02,  5.3244e-03,  2.3944e-04, -1.9630e-02,  9.4542e-02,
         9.7501e-03,  9.8728e-02,  3.6025e-02, -9.3059e-02, -6.9412e-01,
        -9.7871e-03, -3.9289e-01, -4.0065e-01, -1.7126e-01, -2.2276e-03,
        -5.1994e-02, -1.8996e-02,  1.2544e-02, -2.1942e-02,  3.3022e-02,
         2.2683e-02, -1.1797e-01, -2.9410e-02,  1.6629e-02, -4.3465e-02,
        -9.1318e-02, -3.5393e-02,  2.4239e-02,  3.9063e-02,  1.0778e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2497, -0.3975, -0.2109, -0.0272, -0.0132,  0.0240, -0.0424, -0.4209,
        -0.1139, -0.1535, -0.5941, -0.2215, -0.0407,  0.0543, -0.2899, -0.1118,
        -0.7552,  0.0247, -0.0480,  0.0723, -0.0442, -0.3186,  0.0232, -0.2975,
         0.0813,  0.0775, -0.1087, -0.0380, -0.4513, -0.1491, -0.0791, -0.1064,
         0.0140,  0.1565,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7311,  0.0475,  0.0171, -0.0363, -0.0051, -0.0706,  0.0663, -0.2320,
         0.2877, -0.0400, -0.0037, -0.0336,  0.0746, -0.1560, -0.1724, -0.1161,
        -0.0311,  0.0640, -0.0958, -1.7760, -1.5702, -0.0530,  0.2195,  0.0938,
        -0.1250, -0.4226,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1231, -2.6077, -0.0716, -0.7386,  0.3607, -0.2823,  0.0958, -0.3097,
        -0.0722,  0.0154,  0.0218,  0.0668,  0.0844, -0.0697, -0.0815, -0.2393,
        -0.2159,  0.1039, -0.2115, -0.0152, -0.1008,  0.0852,  0.3546,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1030, -1.0397, -0.0772, -0.1798, -0.0093, -0.0233, -0.1464, -0.3709,
        -0.8784, -0.1100, -0.0368, -0.1180,  0.0566, -0.0117, -0.9262,  0.2738,
         0.0044, -0.2832, -0.4714, -0.1183, -0.0309, -0.0502, -0.0370, -0.0542,
        -0.0311, -0.0038, -0.4003, -0.5968, -0.0198,  0.0264, -0.0540, -0.0630,
         0.0145,  0.0499, -0.0038,  0.0239, -0.1480,  0.0470,  0.0119, -0.0040,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5121, -0.2586, -0.0192, -0.0563, -0.4727, -0.0664, -0.2382, -0.3135,
         0.1198, -0.0340,  0.0063,  0.0155, -0.1310, -0.0350, -0.1208, -0.4363,
        -0.2287,  0.0485, -0.0192, -0.2349,  0.0196, -0.3061, -0.1908, -0.0391,
        -0.1595, -0.0067, -0.0220, -0.0570, -0.0327, -0.1586, -0.3506,  0.0389,
        -0.0966, -0.1184,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.1106, -0.0246,  0.0167, -0.0138, -0.1734,  0.0204,  0.0332,  0.0179,
         0.0088,  0.0112, -0.0399, -0.0977,  0.0542, -0.0479, -0.6172,  0.1214,
        -0.0813, -0.0562, -0.2000, -0.0118, -0.0474,  0.0080,  0.0081,  0.0586,
        -0.1084, -0.3323,  0.0752,  0.0622, -0.0181, -0.0118,  0.0309, -0.0044,
         0.0633, -0.0963, -0.1464, -0.0372, -0.1255, -0.1718,  0.0041, -0.0909,
        -0.1635,  0.0010, -0.1631, -0.0271,  0.0359,  0.0115,  0.0111,  0.0238,
        -0.0424, -0.0195, -0.0347,  0.0011], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2776, -0.0329, -0.0582, -0.1172, -0.6602, -0.0963, -0.2441,  0.0784,
        -0.0737,  0.0702,  0.0860, -0.0156, -0.1517, -0.6156, -0.1713, -0.1349,
        -0.5258,  0.0076, -0.2633,  0.0072, -0.3117, -0.2326, -0.1746, -0.7204,
        -0.0265, -0.0199,  0.0329, -0.0581, -0.1474, -0.0207,  0.0502,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1449,  1.0781, -0.1012,  0.1202, -0.1483, -0.0180,  0.1491,  0.1345,
         0.0561, -0.2739,  0.0996,  0.5503,  0.0018, -0.0438,  0.3634,  0.2584,
         0.0049,  0.3521,  0.5147,  0.4075,  0.1616,  0.0461,  0.1001, -0.1985,
        -0.2078,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3362,  0.0200, -0.2003, -0.0377, -0.1238, -0.6266, -1.0519, -0.1623,
        -0.0528,  0.0439,  0.0238,  0.0329,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0099, -2.5494, -0.1704, -0.2957, -0.6619, -0.2257,  0.0469, -0.0700,
        -0.0345, -0.0897, -0.0715, -0.1599, -0.4068, -0.2958, -0.0519, -0.4610,
        -0.0649, -0.0981, -0.0450,  0.0043, -0.1751, -0.0261, -0.1921,  0.0781,
         0.2261, -0.3868, -0.0123, -0.0442, -0.0601,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0244, -2.3135, -0.1983, -0.0578, -0.0516, -0.5764, -0.8620,  0.4176,
         0.0737,  0.0156,  0.1999,  0.1357,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4110, -2.2334, -0.2341, -0.5949,  0.0577, -0.5086, -1.2382, -0.0811,
        -0.3772, -0.1879,  0.1414, -0.2670,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4809,  0.3719, -0.4457,  0.1374, -0.5564, -0.1026,  0.0533, -0.2200,
        -0.7567,  0.0301, -0.0602, -0.7790, -0.0651, -0.1313, -0.2702, -0.5882,
         0.0219,  0.0920,  0.1964, -0.1836,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6669e-01, -1.5735e+00, -9.5525e-02, -3.7546e-01, -7.2856e-01,
         7.5536e-03, -1.5381e-01, -1.2251e-01, -1.0249e-01, -3.6371e-02,
         1.8456e-02, -1.7071e-01, -1.0747e-01, -4.8618e-02,  5.5574e-02,
        -1.2647e-03, -1.3259e-01,  6.1669e-02,  9.4365e-03, -2.4703e-02,
        -8.9878e-02,  1.0116e-01,  1.0211e-01, -6.3490e-02,  2.2708e-02,
        -4.4599e-02,  3.8498e-02, -2.0203e-01, -2.4244e-01,  2.9436e-02,
        -5.4250e-02, -1.7430e-02, -1.7190e-01, -1.6369e-01, -7.3163e-03,
         5.5340e-02, -1.5888e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0033, -3.9349, -0.3934,  0.4650,  0.1351,  0.0914, -0.3922,  0.0326,
         0.1426,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0875,  0.1429,  0.0866, -0.7936, -0.0447, -0.0699, -0.2702, -0.3512,
         0.0172, -0.0666, -0.0878, -0.0711, -0.3293, -0.5590,  0.0043,  0.0128,
         0.0205,  0.0658, -0.2383, -0.3942,  0.0312,  0.0102, -0.0354,  0.0042,
         0.0200, -0.0631, -0.1752,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6854,  2.5090, -0.0338,  0.4102,  0.2329,  0.3948, -0.0457, -0.0310,
        -0.0087, -0.0205, -0.1495,  0.3026,  0.0197, -0.0385, -0.0172, -0.0739,
         0.2365, -0.0461,  0.0122, -0.0575, -0.0477,  0.3222,  0.3340, -0.0569,
        -0.0348, -0.0793,  0.0274,  0.1339,  1.0854,  0.0213, -0.0497,  0.1965,
        -0.4259,  0.3303,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.2486,  0.0168,  0.0200,  0.0290, -0.1236,  0.0158,  0.0231,  0.0251,
         0.0391,  0.0694, -0.0387, -0.1467, -0.0474, -0.0416, -0.0020, -0.1919,
        -0.5524, -0.0922, -0.1357,  0.0061,  0.0883,  0.0517, -0.1654, -0.0991,
        -0.0074, -0.0596, -0.0471, -0.0277,  0.0198,  0.0408, -0.1606, -0.2618,
        -0.2554, -0.0543, -0.0819, -0.1556, -0.1973, -0.0132,  0.0599,  0.0390,
        -0.0799, -0.2393,  0.0476,  0.0475, -0.1066,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4904,  0.0047, -0.0883, -0.0485, -0.0700, -0.6013, -0.9376, -0.0296,
         0.2049, -0.7140, -0.1052, -0.2923, -0.6180, -0.2565, -0.0369, -0.1740,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0724, -0.0629,  0.1189,  0.0714, -0.1121, -0.0913, -0.1153,  0.1582,
         1.1145,  0.9115, -0.0286,  0.6008, -0.0733,  0.2512,  0.3108, -0.1378,
         0.0420, -0.0554, -0.0699,  0.0115, -0.0029, -0.1195,  0.0345, -0.0483,
        -0.1551,  0.0233, -0.0824,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1277,  0.0308, -0.0907,  0.0171, -0.0718, -0.0013, -0.0648,  0.0019,
        -0.0525, -0.0906,  0.0058, -0.0237, -0.1471, -0.0103, -0.1402, -0.0992,
        -0.1570,  0.0075, -0.0051, -0.0819,  0.0110, -0.0799, -0.0772, -0.2481,
         0.0089, -0.0181, -0.1062, -0.2686, -0.0476, -0.1968,  0.0108, -0.2240,
        -0.1987, -0.0282,  0.0012, -0.0308, -0.0008, -0.1472,  0.0809,  0.0755,
        -0.0739, -0.0822, -0.0069,  0.0311,  0.0723, -0.1670], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2968,  0.0300,  0.0837,  0.0463, -0.0094,  0.0301, -0.0211, -0.2638,
         0.0433, -0.0445,  0.0126,  0.2050,  0.0693, -0.0216, -0.1094, -0.0112,
         0.0124, -0.0785, -0.4793, -1.1017, -0.1230, -0.1350, -0.0369, -0.0739,
        -0.0609, -0.0419, -0.4293, -0.0914, -0.2004, -0.0422,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2582, -0.2670, -0.4363, -0.1840,  0.1207,  0.0321,  0.1670,  1.3449,
        -0.2364,  0.4211,  0.5330,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6919, -1.7347, -0.2540, -0.4954, -0.2431, -0.1039, -0.0331, -0.5126,
         0.1247, -0.0936, -0.0059, -0.0267,  0.0994,  0.0107, -0.0456, -0.2778,
        -0.0699, -0.1067, -0.1116, -0.0766,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1195, -1.6651, -0.9975, -0.3780,  0.1017, -0.3787, -0.4530, -0.0749,
        -0.1367,  0.3001,  0.3132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1125, -1.0525, -0.5164, -0.5401, -0.1366,  0.0313, -0.1580, -0.2984,
        -0.4176,  0.0658, -0.0233, -0.0154, -0.0665, -0.0495,  0.0021, -0.1448,
         0.0124, -0.0445, -0.0968, -0.0188,  0.2588,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0631, -1.7111, -0.7108, -0.1274,  0.0171, -1.1812,  0.1804,  0.0097,
         0.0180,  0.0247,  0.0878, -0.0517,  0.0125,  0.0067, -0.0274,  0.0575,
         0.2022, -0.0177,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6315e-02, -6.4658e-01, -9.0123e-01,  6.1419e-02, -8.5865e-02,
         5.1512e-02, -9.0366e-02,  2.5382e-02, -1.6849e-02, -1.4536e-04,
         6.2277e-02,  8.7839e-02, -6.6363e-02, -3.8933e-01, -2.3916e-01,
         2.1887e-02, -1.5625e-01, -4.6418e-01, -6.0077e-02, -8.0551e-02,
        -1.2569e-01, -1.8946e-01, -3.3668e-01,  5.5917e-03, -1.0877e-01,
         1.7757e-02, -1.6242e-01, -4.1058e-02, -7.0152e-02, -2.1385e-01,
        -1.6178e-02,  1.4803e-02, -7.6140e-02,  2.4424e-02, -8.8726e-02,
        -2.0366e-02,  1.6481e-02, -3.0228e-02,  1.3592e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1736e-01, -2.0151e+00, -5.4791e-02, -3.3291e-01, -5.6994e-02,
        -8.0775e-02, -1.2862e-01, -5.9972e-04, -2.1893e-01, -6.7505e-02,
        -1.2583e-01, -6.1201e-01, -1.3569e-01, -5.4195e-03, -1.0873e-01,
        -8.9615e-03, -5.5346e-02, -2.7503e-03,  2.5261e-03, -4.2090e-02,
         7.0122e-02,  4.4147e-02,  4.1026e-02, -7.5251e-03,  3.4700e-02,
        -2.3365e-02, -6.6390e-02,  5.7863e-02, -2.4805e-01, -4.4815e-01,
         7.3974e-03, -7.4575e-03,  1.3136e-01,  7.7413e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.5429, -0.1047, -1.1275, -1.5371,  0.2301, -0.3536,  0.2047, -0.2799,
        -0.5543,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6941e-02, -1.1031e+00, -2.3994e-01, -2.5423e-01, -1.1138e-01,
        -8.5625e-01,  4.3649e-02, -1.1179e-01,  2.7852e-02,  7.5155e-02,
        -9.4700e-02,  1.2268e-04, -5.6142e-02, -9.9738e-02, -8.2354e-02,
        -4.6366e-01, -1.1263e-01, -7.3336e-01, -6.3687e-03, -6.2642e-02,
        -1.7888e-02,  9.8395e-03,  1.7820e-02, -1.0278e-02, -1.6152e-01,
         4.4196e-02, -3.2591e-02, -1.3451e-01,  7.5581e-03, -4.2090e-02,
        -1.5936e-01, -3.1385e-01,  4.8109e-03, -2.4117e-01,  1.0726e-02,
        -3.5692e-02, -2.8678e-02,  1.1102e-01,  3.7299e-02,  9.8909e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0017, -0.2423, -0.6183, -0.0502, -0.2222, -0.1001, -0.0489, -0.0656,
        -0.3715, -0.3680, -0.0187, -0.0211,  0.0084, -0.0101,  0.0145,  0.0300,
         0.0427, -0.1463, -0.2281, -0.0025, -0.1310, -0.0339,  0.0110, -0.0199,
         0.0130, -0.0844,  0.0533,  0.0703, -0.1840, -0.4084,  0.0453,  0.0619,
         0.0211,  0.0185,  0.0349,  0.0890, -0.0643, -0.2497, -0.0536,  0.0387,
        -0.0219, -0.0060,  0.0246, -0.0250, -0.0219,  0.0235, -0.0054,  0.0144,
         0.0750, -0.0117, -0.0065, -0.0144,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2126,  1.0946,  0.1870,  0.1920,  0.0946,  0.1830,  0.4586,  0.0637,
         0.1933,  0.2323, -0.0524,  0.3228,  0.7226,  0.1589,  0.2410,  0.4354,
         0.3231,  0.2622,  0.5185,  0.0834,  0.0708,  0.3777,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1328,  3.0287,  0.0269, -0.3435, -0.0213, -0.1433,  0.1383,  0.4564,
         0.1178, -0.0295, -0.0074,  0.2209,  0.0629,  0.0960,  0.0690,  0.4441,
        -0.0085, -0.1141,  0.0788, -0.3656,  0.0369,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2533e-01, -1.5557e+00, -1.4943e-01, -1.2457e-01,  7.1458e-02,
         8.1885e-02, -3.0540e-02,  1.2394e-01, -1.1463e-01,  1.6797e-02,
         4.1443e-02, -8.2916e-03,  2.5619e-02,  6.3395e-02, -3.0879e-02,
         9.0650e-03, -2.6437e-02, -5.1098e-03, -1.1674e-01,  7.1622e-02,
         3.6054e-02, -3.2195e-02, -3.7588e-02,  5.5910e-02,  1.4695e-02,
         3.7764e-02, -1.4099e-01, -2.0831e-01,  2.1171e-02, -1.9512e-01,
        -6.2375e-01, -4.1422e-01, -5.1291e-02, -1.4012e-01, -1.1469e-01,
         1.9430e-02,  9.3656e-03,  1.2602e-02, -6.2543e-03, -1.2328e-02,
        -5.8435e-01,  1.7077e-02,  3.7354e-02, -1.1163e-01, -3.1105e-01,
         1.6817e-02, -1.6578e-01, -1.0367e-01,  4.5577e-02,  1.2408e-02,
        -1.0109e-01,  9.5978e-04,  3.1687e-02, -5.1692e-02,  1.3606e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0826, -0.0546,  0.1570,  0.0079,  0.0357, -0.0763, -0.3725,  0.0022,
         0.0322,  0.0236, -0.0484,  0.0423,  0.0641, -0.0669, -0.2095, -0.0690,
         0.0228, -0.1105, -0.0376, -0.2200, -0.0346, -0.0844, -0.1388,  0.0143,
        -0.0128,  0.0255,  0.0036, -0.2397,  0.0169,  0.0303,  0.1312, -0.0403,
        -0.0680, -0.3882, -0.0829, -0.2520, -0.0172, -0.1237, -0.1681, -0.0869,
        -0.0364, -0.0349, -0.0212,  0.0126,  0.0676,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3963, -1.9223, -0.5779, -0.5287,  0.3335, -0.0605, -0.1035, -0.1269,
        -0.4182,  0.1350,  0.0149, -0.1933,  0.0467, -0.5872, -0.1549,  0.2666,
        -0.0500, -0.3714, -0.0952, -0.1958, -0.3359,  0.2160, -0.1383,  0.0872,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1657,  0.0928,  0.0545,  0.0811, -0.0777,  0.0290, -0.1434, -0.1696,
         0.0761,  0.0400,  0.0633, -0.0141,  0.0449, -0.1047, -0.0173,  0.1448,
        -0.1833, -0.1530, -0.1773, -0.2741, -0.0732, -0.0040, -0.2211, -0.3617,
         0.2153, -0.2508, -0.4658, -0.1559, -0.1997, -0.4365, -0.0969, -0.0710,
        -0.1650,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2004e-01, -3.5487e+00, -2.4149e-03, -6.2260e-01,  3.6534e-02,
         6.9733e-02,  5.3215e-02,  1.0646e-01, -8.0542e-01, -1.2222e-01,
         2.6021e-02,  3.4184e-02,  2.8501e-01, -5.6069e-01,  1.4895e-01,
        -5.2202e-02, -1.3273e-01,  1.7348e-02,  1.7560e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2882, -0.0444,  0.1171, -0.2901, -0.6516,  0.0768, -0.2986, -0.5367,
        -0.6681, -0.0583,  0.0694,  0.0555, -0.0473,  0.1921, -0.0162,  0.0231,
        -0.0881, -0.2629, -0.0663,  0.0669, -0.2095, -0.7559, -0.0599,  0.0125,
         0.0216,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0870, -1.2716, -0.3349, -0.0920, -0.1670, -0.5627, -1.3506, -0.1815,
        -0.1334, -0.0706, -0.0071, -0.1157, -0.4252, -0.0564, -0.1011,  0.0311,
        -0.1904,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.5551, -0.0989,  0.1244,  0.0234, -0.0122,  0.2007,  0.0024,  0.1379,
        -0.0392,  0.8002,  0.2597,  0.1482, -0.0418,  0.0443, -0.0785, -0.0013,
         0.2223,  0.6501,  0.0276,  0.2551, -0.0615,  0.6397,  0.9183,  0.0737,
         0.0492, -0.1240,  0.0136,  0.0242, -0.0849,  0.0647, -0.1415, -0.3424,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9898e-01,  4.7384e-01,  6.2264e-01,  1.0363e-01, -1.3533e-01,
         1.2088e-02,  2.1722e-01,  5.3609e-01,  4.9941e-02, -2.0021e-02,
        -1.2004e-01, -1.5060e-02, -1.1237e-01, -6.4120e-03,  8.2026e-02,
        -7.6480e-02,  1.2242e-01,  3.1993e-02,  3.1888e-01,  2.1995e-01,
         1.6679e-01,  1.1110e-01,  1.4723e-01,  4.4750e-01,  2.1835e-01,
         1.9352e-01, -2.2806e-05,  7.9222e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1145, -0.5561,  0.0249, -0.0412,  0.0616, -0.4196, -0.4718, -0.2037,
        -0.0115,  0.0027,  0.0927, -0.3906, -0.0644, -0.0376,  0.0271, -0.4094,
         0.0476, -0.5552, -0.1091,  0.0161, -0.0579,  0.0268, -0.0798, -0.0539,
        -0.1739,  0.1244,  0.1922, -0.0023, -0.1547,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1752,  0.9099,  0.2606,  0.3141,  0.7109,  0.1276,  0.1472, -0.0793,
        -0.0499, -0.1011, -0.0774,  0.0248,  0.1567,  0.6412, -0.0618,  0.1204,
        -0.0094,  0.5904,  0.0858,  0.1659,  0.0825, -0.1221,  0.3149,  0.0951,
         0.2067,  0.2196,  0.2245,  0.0342,  0.0171, -0.0019,  0.0533, -0.1536,
         0.0920,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0603, -2.1884,  0.1492, -0.3043, -0.1486, -0.4859,  0.0166, -0.3026,
        -0.1283, -0.1333, -0.1831, -0.0628, -0.0476,  0.0903,  0.0077,  0.0415,
        -0.1842,  0.0074,  0.0657,  0.1350, -0.4694,  0.0706,  0.3173,  0.4983,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1160, -0.8147, -0.5569, -0.0739, -0.2661, -0.1436, -0.0136, -0.1523,
         0.0775, -0.6163,  0.0175, -0.2968, -0.1656, -0.1106, -0.0032,  0.0453,
         0.0647, -0.1404, -0.0435, -0.7464, -0.1001,  0.2366,  0.1423,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1829, -1.0193,  0.1277, -0.0869, -0.0058, -0.2800,  0.0784,  0.0815,
         0.0034, -0.0540,  0.0452, -0.0452, -0.0754,  0.0048, -0.0017,  0.0344,
        -0.3673, -0.2051, -0.1203, -0.0026, -0.0257, -0.1165,  0.0242, -0.0881,
        -0.3075, -0.0812,  0.0145, -0.0269, -0.0769, -0.3336, -0.4621,  0.0210,
        -0.1634, -0.2414, -0.1538, -0.2874, -0.1430, -0.0207, -0.0154,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6858e-01,  4.8355e-01,  2.5291e-02,  3.0774e-02,  4.7458e-01,
         5.8250e-03, -1.7837e-02,  5.8704e-03,  2.2162e-02, -3.9817e-02,
         1.0411e-02, -1.0726e-02, -3.0416e-02,  6.3337e-02,  3.6117e-02,
         2.6400e-01,  6.9248e-02, -6.4693e-02,  4.1745e-04,  6.6153e-02,
         2.0296e-01, -3.5333e-02, -1.1593e-02,  7.5787e-02,  2.8077e-01,
         4.0664e-01,  1.0539e-01,  1.5913e-01,  3.5216e-02,  3.5348e-01,
         6.6139e-03,  6.3596e-02,  3.7327e-02,  6.4801e-03, -4.0911e-02,
        -6.5432e-02,  1.0200e-01,  3.2193e-01, -5.3256e-02,  1.0579e-02,
         1.8436e-02,  9.3043e-03, -1.3726e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3466, -1.0473,  0.0136,  0.0420,  0.0384,  0.0890, -0.0641, -0.0555,
        -0.3728, -0.0885,  0.0498, -0.0076, -0.1182, -0.0142,  0.0209, -0.0255,
        -0.2445, -0.3916, -0.0250, -0.0651,  0.0141, -0.0024, -0.0031, -0.0626,
         0.0208, -0.0340, -0.0575, -0.0072,  0.0221,  0.0303,  0.0281, -0.1551,
        -0.3138, -0.0340, -0.0640, -0.0843, -0.1148, -0.1422, -0.1806, -0.0258,
        -0.1447, -0.1515, -0.0436,  0.0137, -0.0771, -0.1275, -0.0039, -0.0152,
        -0.0091, -0.0521,  0.0798, -0.0122, -0.0791, -0.0097, -0.0271, -0.0135,
         0.0278], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4485e-01, -3.1005e+00, -2.2783e-01,  2.6476e-02, -2.7378e-01,
        -2.0270e-02, -5.7951e-01, -5.6965e-01, -1.8382e-01, -2.1218e-01,
        -3.3527e-01,  1.2284e-01,  4.4939e-01,  2.1351e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6082, -1.5823, -0.0381,  0.1180,  0.0572, -0.0833, -0.0153,  0.0280,
        -0.1635, -0.0317, -0.2683, -0.6747,  0.1298, -0.0809, -0.2812,  0.0503,
        -0.1032, -0.2430,  0.1631, -0.1781, -0.5082,  0.0040, -0.2876, -0.0413,
         0.1058, -0.0683,  0.0163,  0.0230, -0.1526,  0.0396,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1760,  1.4165, -0.4377,  1.1304,  0.4223, -0.0712,  0.3946,  0.0943,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2913, -0.7580,  0.2229, -0.0903, -0.2763, -0.5680, -0.0934,  0.0038,
        -0.0593, -0.0607,  0.0722, -0.1675, -0.3882, -0.5663, -0.0707, -0.0210,
        -0.0549, -0.0029,  0.0520, -0.0058,  0.0871,  0.0931,  0.0305, -0.3049,
        -0.2383,  0.0330, -0.3612, -0.0965, -0.2614, -0.5213,  0.0178, -0.3622,
        -0.1063,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4595e-01, -1.1929e+00, -4.4836e-01, -2.0345e-01, -1.4378e-02,
        -1.9840e-01, -2.3189e-02, -6.7286e-02,  1.3498e-01, -1.1735e-01,
         9.1222e-03, -1.9823e-03, -5.4873e-02,  3.3360e-02,  4.3285e-02,
         1.7211e-02,  2.1478e-02,  7.5257e-02, -7.5515e-02,  2.8654e-02,
         4.0222e-04,  4.1933e-03, -1.8283e-02,  4.3021e-02, -1.8466e-02,
        -4.8264e-02, -7.2794e-02, -2.4503e-01, -1.0757e-03, -2.5406e-02,
        -4.0146e-02, -7.2596e-02, -2.7242e-01, -3.8220e-01, -1.5394e-01,
        -3.6641e-02, -1.4610e-01, -1.7465e-02, -9.9284e-03,  7.0893e-02,
         2.5734e-02,  3.0430e-02, -2.3956e-02, -3.2923e-01, -2.6353e-02,
        -3.9828e-02, -3.5264e-02, -3.3364e-02, -4.7520e-02, -5.7170e-02,
        -2.4959e-02, -1.4721e-01, -1.8799e-02, -2.8009e-02,  2.4073e-02,
         4.4449e-02,  1.6273e-02,  2.7981e-02,  4.0772e-03, -4.4934e-02,
        -1.6419e-02, -7.0147e-02, -3.2584e-01, -1.0187e-02,  8.5700e-03,
         3.2273e-03,  1.7547e-03,  5.2357e-02,  6.5664e-02, -3.1963e-02,
        -2.0033e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4825e-01,  2.7903e-02, -4.7882e-02,  7.4799e-02, -1.6480e-01,
        -8.9547e-02, -1.0547e+00, -5.6751e-02,  6.5391e-02,  8.2423e-02,
         5.8523e-03, -5.4263e-02,  4.5555e-02, -1.1975e-02, -3.2248e-02,
        -2.2235e-02, -2.1732e-02,  1.0221e-01, -3.8763e-02, -4.6582e-01,
        -3.5169e-04,  2.8204e-03,  1.0661e-01, -2.7088e-01,  1.4194e-02,
        -2.7508e-01, -1.0145e-01, -7.3124e-02,  3.5267e-02, -1.0741e-01,
        -5.8202e-02, -6.9325e-02, -2.6459e-01, -1.8104e-01, -1.1711e-01,
        -1.7252e-02, -3.4404e-01, -1.4190e-01, -4.7985e-02, -2.9979e-02,
         3.3547e-02, -1.2388e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1052,  2.2774, -0.0945,  0.0931,  0.0060, -0.0448, -0.0782, -0.0601,
         0.1599,  0.5436,  0.0944,  0.1116,  0.4446, -0.0398, -0.0802, -0.0409,
        -0.0554,  0.3969,  0.5669,  0.0315,  0.0995,  0.1131,  0.0184, -0.0099,
        -0.0186,  0.4836,  0.1866,  0.0885,  0.5443, -0.1038, -0.0633,  0.0245,
        -0.0042, -0.0425,  0.0895,  0.1187, -0.2438,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0885, -2.7999, -0.6680, -0.1308, -0.0373,  0.2733, -0.1632, -0.1332,
        -0.1567,  0.0170, -0.0379,  0.0272, -0.4227, -0.2451,  0.1511,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5427,  0.0358, -0.3145, -0.1654,  0.6141,  0.9177,  0.2873,  0.5575,
         1.1131,  0.1734,  0.0420, -0.0732,  0.4167, -0.0052, -0.3620, -0.1690,
         0.0345,  0.1021, -0.1615,  0.0511,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0707, -0.0120,  0.0053,  0.0044, -0.3278, -0.0736,  0.0209,  0.0197,
        -0.0049, -0.0119, -0.2050, -0.3821, -0.0358, -0.0278, -0.1021, -0.0953,
         0.0363, -0.0723,  0.1819, -0.1891, -0.0730, -0.0128, -0.0073, -0.0058,
         0.0181, -0.2871, -0.5865,  0.0103,  0.0172, -0.2698, -0.4138, -0.0503,
         0.0205, -0.0192, -0.1463, -0.2923,  0.0747, -0.0791,  0.0046, -0.0459,
         0.0417,  0.1097, -0.1979,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0258,  0.0091, -0.0928,  0.0270, -0.2334, -0.1250, -0.1970, -0.7307,
        -0.1192,  0.0090, -0.0227,  0.0200, -0.0919, -0.2008, -0.3592, -0.0651,
         0.0113,  0.0200,  0.0280, -0.0032, -0.2001, -0.0497,  0.0292,  0.0795,
        -0.0254, -0.0785,  0.1182,  0.0883, -0.3612, -0.0715, -0.0175, -0.2283,
        -0.0250, -0.2146, -0.3933, -0.1251,  0.1051, -0.0870,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3652, -1.2539, -0.4539, -0.7245,  0.0433, -0.5086, -0.0221,  0.0648,
        -0.1455, -0.4683, -0.0202,  0.0329, -0.1988,  0.0299,  0.0587,  0.0247,
        -0.1430, -0.3571,  0.1737,  0.1440,  0.1822, -0.0177,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0640, -0.5715,  0.1889,  0.0514,  0.0555,  0.0144, -0.0895,  0.1332,
         0.0231, -0.6249, -0.0374,  0.0602, -0.0045, -0.0072, -0.2453, -0.2547,
        -0.3303, -0.0043, -0.0621, -0.5160,  0.0430, -0.0911, -0.3779, -0.1396,
        -0.1213, -0.0254, -0.1057, -0.0101, -0.0013, -0.4339, -0.0466, -0.1836,
        -0.0535,  0.0344, -0.1758, -0.3930,  0.0179, -0.0543, -0.0207,  0.0309,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0111,  0.0054,  0.0058, -0.0654, -0.0107, -0.1959, -0.0363,  0.0072,
        -0.4273, -0.0566, -0.0038,  0.0047, -0.1813, -1.0074, -0.3756, -0.0953,
         0.0091,  0.1143, -0.3031,  0.0084, -0.0176,  0.0677,  0.0115, -0.0474,
        -0.1294, -0.5393, -0.0460, -0.2011, -0.2934, -0.0521, -0.0497, -0.2278,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3158e-01, -2.3730e+00, -1.2189e-01, -1.2254e-02, -3.7216e-02,
         6.1795e-03, -1.1064e-01, -4.1007e-01, -6.9313e-01, -3.0493e-02,
        -1.3390e-02, -2.9657e-01, -1.3303e-01, -1.2929e-01, -6.7974e-02,
        -1.5856e-01, -3.8393e-01, -1.4760e-01,  9.3291e-02, -8.2205e-02,
        -2.2632e-02,  4.4113e-02, -1.2583e-01, -5.1637e-01, -1.8766e-02,
         3.3555e-02, -1.7866e-01, -2.4514e-02,  3.9675e-02, -1.2332e-01,
         6.1776e-02,  1.0162e-03,  1.1814e-02,  1.3154e-01,  7.0778e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.5884,  0.0042, -0.3706, -0.7470, -0.2458, -0.1846, -1.0399, -0.8439,
         0.0457, -0.4739, -0.0639, -0.1991, -0.0473, -0.1642, -0.0706, -0.0737,
        -0.0872, -0.0127,  0.0569, -0.1115,  0.0805,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2105, -2.2674,  0.0567, -0.4142, -0.3644, -0.3481, -0.0692, -0.2700,
        -0.7163,  0.1863,  0.0236, -0.0382,  0.1276,  0.0300, -0.1390,  0.1032,
        -0.1200, -0.2760,  0.2843,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1283, -0.9929, -0.8283, -0.2417, -0.3028, -0.0149,  0.1506,  0.3802,
        -0.1410, -0.5975, -0.4999,  0.0292, -0.2010, -0.4780, -0.0856, -0.2672,
         0.1090, -0.0490,  0.2632, -0.3769,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2674e-01, -2.4409e+00, -8.7760e-02, -2.0305e-01,  1.3802e-01,
         2.2292e-02,  2.7275e-02, -2.0103e-01, -2.0189e-02, -6.5553e-04,
         3.6260e-02, -3.8980e-02, -2.4972e-02, -2.9427e-01,  2.2088e-02,
        -1.8159e-01, -4.2244e-01, -7.1012e-02, -5.3330e-02, -3.8645e-01,
         4.3034e-02,  3.4021e-01,  7.3622e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4332, -0.4009, -0.2705,  0.0546, -0.0657, -0.2440, -0.0528, -0.1872,
        -0.3164, -0.4338, -0.0915, -0.3067, -0.1184, -0.2509,  0.0127,  0.0500,
        -0.0394,  0.0152, -0.1354,  0.0332, -0.2103, -0.0028, -0.0409, -0.3319,
         0.0827,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3831,  2.4856,  0.4013,  0.0766, -0.1441, -0.0157, -0.0298, -0.0260,
        -0.0601, -0.0305,  0.2952,  0.0662, -0.0255,  0.0709,  0.0366,  0.0914,
         0.0364,  0.0028,  0.0583, -0.0340,  0.2139,  0.2495,  0.1287,  0.0501,
         0.1715, -0.0573,  0.0584,  0.0826,  0.0061,  0.2568,  0.2272,  0.0580,
         0.1987, -0.2553,  0.0956,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2378, -0.0348,  0.0086, -0.2899, -0.0062, -0.0798, -0.3503, -0.1712,
        -0.5055, -0.1144, -0.0960, -0.0171,  0.1124, -0.4797, -0.3399, -0.4730,
        -0.0657, -0.3480, -0.0285, -0.0151, -0.0510,  0.0380,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0378e-01,  2.1734e-01,  1.6936e-01,  1.3422e-02, -3.4786e-02,
         3.1856e-01, -1.3896e-02,  1.5800e-04, -1.8105e-01,  7.3838e-01,
        -4.6369e-01, -8.6022e-02, -1.3680e-01, -1.6616e-01, -2.8652e-02,
         8.8227e-01,  3.7462e-01, -2.2846e-01, -1.6941e-01,  6.1014e-02,
        -3.3856e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1782, -3.1750, -0.2435, -0.1591, -0.0552, -0.0988, -0.4324, -0.8592,
         0.0790,  0.0391, -0.0223,  0.0569,  0.1440, -0.3251, -0.0468, -0.7500,
         0.1546,  0.1906, -0.3606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5000e-01,  4.0064e-02, -1.8595e-01,  5.4418e-03,  5.3137e-02,
        -1.2412e-01, -1.0896e-03, -1.6984e-02,  6.3753e-02, -1.7689e-02,
        -3.0806e-02, -6.6421e-02, -1.8843e-02, -1.8531e-01, -4.5193e-02,
        -2.8838e-02, -1.9275e-01, -3.5237e-01, -5.4970e-02, -1.7291e-01,
        -5.1371e-02, -2.5648e-02, -8.6636e-02,  6.0061e-02, -1.8758e-01,
        -7.8532e-02, -4.5738e-02, -1.4049e-01, -6.4471e-02, -5.5562e-01,
        -6.7965e-02, -4.4320e-02, -2.0343e-01, -6.3561e-01,  1.7156e-02,
        -5.4648e-04, -9.5996e-03, -2.1377e-02, -1.0643e-02, -8.8230e-02,
         4.5621e-03, -1.4863e-02,  3.5679e-02, -1.1415e-02, -4.1974e-03,
         1.8391e-01, -3.3868e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3337, -1.3075, -0.8968, -0.0232,  0.0314,  0.0445, -0.0609, -0.1263,
        -0.2654,  0.0288,  0.0796,  0.0839,  0.0546, -0.2097,  0.2684,  0.0665,
        -0.4505, -0.0964,  0.1207, -0.0488,  0.0837, -0.0747, -0.8160, -0.8686,
        -0.0240, -0.0264, -0.1106,  0.0499,  0.0221, -0.0084,  0.0760,  0.0225,
        -0.3161, -0.0533,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0632e-04, -2.1200e+00, -1.1587e-01,  1.1691e-04, -1.9560e-01,
        -1.4502e-01,  8.4751e-02,  1.3108e-03, -5.4304e-02, -1.8661e-01,
         7.4336e-02,  1.2566e-02, -2.8234e-01, -5.4704e-01, -8.2541e-02,
         3.1587e-02, -3.8992e-02, -6.3353e-02, -3.5192e-02, -3.8250e-01,
        -2.9429e-02,  4.7817e-02, -1.8598e-02,  2.2587e-02, -3.6565e-02,
         8.4627e-03,  9.8515e-03, -5.7834e-02, -4.0038e-02, -1.7035e-01,
         2.5952e-02,  7.9650e-02, -1.0627e-01,  1.5479e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.4010, -1.8261,  0.0526, -0.0746, -0.1921, -0.5259,  0.2036,  0.0826,
        -0.1476, -0.5390, -0.0572, -0.0896, -0.2005, -0.8857, -0.0146,  0.0162,
        -0.0987, -0.4806,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1514,  0.1058,  0.0146,  0.0966, -0.0940,  0.0694,  0.0532, -0.1909,
        -0.7353, -1.5163,  0.0064, -0.0789, -0.1452,  0.0443,  0.0741,  0.0165,
        -0.0360,  0.0473, -0.1280, -1.2809,  0.1501, -0.1881, -0.0337,  0.0716,
         0.0121,  0.0274,  0.1836,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1807,  0.0528, -0.0053,  0.0131, -0.0019, -0.0535,  0.3672,  0.1799,
        -0.5764,  0.0884,  0.2048, -0.0963, -1.2296, -0.0738, -0.0217, -0.0038,
        -0.2472, -0.1105, -0.0250, -0.6370, -0.0822,  0.0939, -0.2093, -0.0482,
         0.1594, -0.2981,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3057, -1.0426,  0.1153, -0.3705, -0.5779, -0.0195, -0.0131, -0.0704,
        -0.2449, -0.1104, -0.4462, -0.2283,  0.0679, -0.0379, -0.0824, -0.4594,
        -0.1099, -0.0522, -0.0453, -0.0387,  0.1966,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2751e-02, -1.1904e+00, -4.7927e-01, -8.3064e-01, -3.7976e-02,
        -2.8366e-02, -7.7110e-01, -5.4612e-02, -1.3797e-01, -3.3364e-02,
         7.8615e-02, -1.7399e-01,  7.5699e-02,  4.0463e-04, -2.3412e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1557,  0.1412, -0.1059, -0.1518,  0.1237,  0.2254, -0.0763,  0.1052,
        -0.1319,  0.2236,  0.0798,  0.1592,  2.0176,  0.7034, -0.1447,  0.2954,
         0.1621,  0.1145,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4139, -1.6653, -0.5248, -1.0260,  0.1433, -0.2243, -0.0180, -0.1934,
        -0.1130, -0.2834, -0.5579, -0.2844, -0.1308, -0.4451, -0.0234, -0.0217,
        -0.1875, -0.0202,  0.1828,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2022, -0.6658, -0.5083, -0.0771, -0.1203, -0.0988,  0.1142, -0.0404,
        -0.0691, -0.0194, -0.0119,  0.0145, -0.1195,  0.0007,  0.0577,  0.0519,
        -0.3352, -0.0161,  0.0332,  0.0142, -0.5293, -0.1312, -0.3561, -0.0982,
        -0.2772, -0.0085, -0.2095, -0.0795, -0.0642, -0.1587,  0.0016,  0.0388,
        -0.3179, -0.0084, -0.2238, -0.0610, -0.0925,  0.0467, -0.0027,  0.0425,
         0.0346,  0.0229, -0.0199, -0.0363], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6456,  2.0764,  0.2120,  0.6056,  0.4134,  0.0064,  0.1084, -0.0646,
         0.4047,  0.0227,  0.1364,  0.1010,  0.1641,  0.0524,  0.0379, -0.0045,
         0.0694,  0.3677, -0.1576,  0.0549,  0.1815, -0.0528, -0.1975,  0.0402,
        -0.1365,  0.0700,  0.0243,  0.4786, -0.0585, -0.0461,  0.0215,  0.1326,
         0.0311,  0.2678,  0.1762,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3440, -2.6704, -0.2570, -0.3805, -0.2021, -0.0377, -0.9005, -0.5473,
         0.0327, -0.0343,  0.0356, -0.2044, -0.5874,  0.0473,  0.0412, -0.1697,
        -0.0345, -0.0316, -0.0388,  0.0344,  0.0092, -0.0150,  0.2225,  0.1664,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0769, -1.7448,  0.0716, -0.4354, -0.1347, -0.0113,  0.0127, -0.1894,
        -0.6285, -0.0213, -0.1902,  0.0188,  0.0200, -0.2669, -0.2993, -0.0021,
        -0.1072, -0.0573, -0.3615, -0.0820,  0.0731, -0.2447,  0.0412, -0.1420,
        -0.0965, -0.2157, -0.1549, -0.1606, -0.0272, -0.0658, -0.2207, -0.0614,
        -0.0192, -0.0222,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0661,  0.0632, -0.1383, -0.2221, -0.0040,  0.0723, -0.6966, -0.2071,
         0.1270,  0.0389, -0.0253, -0.0241, -0.5552, -0.1479, -0.5987, -0.9383,
         0.0035, -0.0086, -0.0423, -0.0390,  0.0333, -0.0375,  0.1027, -0.4020,
        -0.2110, -0.0950, -0.3023, -0.0168, -0.0297, -0.0717,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.6229, -0.3425,  0.3795,  1.0640,  0.1782, -0.1058,  0.1397,  0.1863,
         0.4489,  1.7853, -0.0834, -0.3406,  0.8269,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2498,  0.0980,  0.1295, -0.1319, -0.0080, -0.1404, -0.0014, -0.5089,
        -0.6554, -0.0076,  0.0119, -0.2907, -0.0772, -0.2683, -0.6451, -0.0498,
        -0.2125, -0.1481,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3145, -0.8422, -1.0540, -0.3767, -0.4209,  0.1495, -0.2300,  0.2447,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8404e-01, -9.8302e-01, -3.9675e-02, -1.0878e-01, -6.0971e-02,
        -9.4224e-02, -2.6596e-02, -2.4716e-02,  1.9417e-02,  3.1367e-02,
         2.8833e-02,  9.8034e-02, -6.2716e-02, -2.2232e-02, -1.6984e-02,
        -3.0171e-02,  2.5727e-02,  1.0284e-02,  2.1178e-06,  1.9084e-02,
         6.6424e-02, -9.8163e-02, -2.2178e-01, -8.3804e-02,  2.0399e-03,
        -9.2654e-02, -7.9623e-02, -9.4640e-02, -4.0813e-01, -8.1464e-02,
         4.8162e-02, -4.4072e-01, -1.0160e-01, -6.3503e-01, -1.5880e-02,
        -1.9094e-01, -3.1284e-01, -1.9849e-02,  6.5152e-02,  1.5202e-02,
        -5.3521e-02, -4.9897e-02, -1.0320e-01,  6.5293e-02,  7.5568e-02,
         1.3816e-02,  1.7912e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2455, -1.0896, -0.8384, -1.1004, -0.0398,  0.1204,  0.2418, -0.0705,
        -0.1622,  0.1593,  0.0703,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6793, -1.0334, -0.3215, -0.4438, -0.0074, -0.1068, -0.2907, -0.9366,
         0.0069, -0.7149, -0.1881, -0.1707, -0.7246, -0.0257, -0.3688,  0.0805,
         0.1090,  0.1927,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2019, -0.5891, -1.3856,  0.2079, -0.0779,  0.2795, -0.4406, -0.0418,
        -0.3012, -0.1621,  0.0595, -0.3798, -0.1057,  0.0935, -0.2576,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1293, -1.9027, -0.0988, -0.1406, -0.0468, -0.4532, -0.6072,  0.2917,
        -0.0887, -0.2427, -0.2909,  0.1418,  0.1374,  0.0062, -0.0402,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4216e-01,  3.5299e-02,  8.2841e-02,  2.1214e-01,  6.1258e-02,
        -5.2299e-04,  1.9208e-02,  9.4169e-02, -3.0410e-01, -1.4569e-01,
        -9.9222e-03, -3.8789e-01, -7.0721e-01, -6.0147e-02, -1.2114e-03,
        -8.2953e-02, -1.6396e-01,  2.3036e-01, -1.1413e-01,  7.8135e-03,
        -4.6318e-01, -6.5483e-01,  1.6006e-01, -2.0857e-01, -1.9342e-01,
        -3.7280e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0276,  0.0331, -0.1347,  0.0066, -0.2114, -0.0977,  0.1463, -0.5052,
        -1.3445, -0.1215,  0.1370, -0.1279, -0.0103,  0.3227,  0.0108,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0842, -1.1295,  0.0115, -0.2969, -0.3593, -0.0371, -0.3573, -0.0547,
        -0.0920, -0.8312,  0.1431,  0.1120, -0.2657, -0.0822, -0.0944,  0.0019,
        -0.0654,  0.0262, -0.2569,  0.0448, -0.1757,  0.0808,  0.0502,  0.0542,
         0.1052, -0.0362,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9165e-01, -1.2019e-01,  1.0623e-01,  1.3659e-01, -9.4903e-03,
        -9.2055e-02,  5.7773e-04,  1.9231e-01, -1.6757e-01, -1.7501e-01,
        -9.3540e-01, -1.3372e-01, -5.7317e-03,  2.4656e-02, -3.5370e-01,
        -3.3698e-01,  9.8508e-02, -1.9366e-01, -4.5736e-02, -2.1277e-01,
        -5.2411e-02, -6.3920e-02, -1.7753e-02, -2.9860e-01,  1.5134e-01,
        -7.2334e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.0679, -1.3439,  0.0756,  0.0521, -0.0790,  0.0374, -0.1952, -0.2848,
        -0.0513, -0.2429, -0.0323, -0.1980, -0.3733,  0.0108, -0.1519, -0.0342,
        -0.0737, -0.1726, -0.0988, -0.1911,  0.0417, -0.2139,  0.0444, -0.6275,
        -0.5204, -0.0223, -0.1140, -0.0459,  0.0999,  0.0096, -0.0054, -0.1805,
         0.0485,  0.1086,  0.0034,  0.2181,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0790, -0.5049, -0.2290,  0.0071, -0.3666,  0.0631, -0.2692, -0.3447,
         0.0089,  0.1409, -0.1484,  0.0195, -0.0810,  0.0198, -0.0051, -0.0628,
         0.0107, -0.0434,  0.0709,  0.0093, -0.0025, -0.3643, -0.0420, -0.0809,
        -0.2897, -0.3597, -0.0126, -0.2451, -0.2371,  0.0471, -0.0266, -0.1769,
        -0.2089,  0.1057, -0.0927,  0.1445,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0054, -0.2986,  0.1030, -0.0498,  0.0559, -0.8403, -0.3113, -0.0345,
        -0.0040,  0.0041, -0.1415, -0.1915, -1.1912, -0.1613,  0.0124,  0.0343,
        -0.0476, -0.0771, -0.1045,  0.7792,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0420, -0.9324, -0.1499, -0.0971,  0.0586,  0.0704, -0.0106, -0.3353,
        -0.0265, -0.1942, -0.0336, -0.0322,  0.0220, -0.0114, -0.4696, -0.1007,
        -0.0266, -0.0574,  0.0236, -0.0171, -0.0562, -0.0272,  0.0132,  0.1208,
        -0.1865,  0.0052, -0.0157,  0.0171,  0.0988, -0.0974, -0.1756, -0.1013,
         0.0441, -0.6250,  0.1039,  0.2908, -0.0932, -0.2030,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0865, -1.3327, -0.6144, -0.5627, -0.0818,  0.0487, -0.0202, -0.1339,
         0.0321, -0.0589, -0.2786, -0.3773, -0.0136,  0.0594, -0.0033, -0.2795,
         0.0312, -0.4488, -0.0105, -0.0470,  0.0562,  0.0719,  0.1065, -0.0279,
         0.0664, -0.0038, -0.0515, -0.1163, -0.3761, -0.0444, -0.0893, -0.0863,
        -0.0193, -0.0136, -0.1106,  0.0341, -0.0370,  0.0284,  0.0699,  0.1078,
        -0.0830], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6744, -3.5877,  0.0176,  0.1291,  0.1351, -0.1252, -0.0200, -0.2223,
        -0.0674, -0.0203, -0.1272, -0.4761, -0.6253, -0.1210,  0.0107, -0.8475,
         0.0203, -0.4273,  0.0292, -0.1568, -1.3044,  0.1705,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0275,  0.0360,  0.0544,  0.0424,  0.1142, -0.0964, -0.0124,  0.0330,
         0.0237,  0.0366, -0.0028, -0.2299,  0.0047, -0.1106, -0.0922, -0.2659,
         0.0085, -0.1747, -0.0852, -0.2989, -0.5706,  0.0352,  0.0757, -0.2875,
        -0.0295,  0.0437, -0.1294, -0.2172, -0.3080,  0.0972, -0.2300, -0.2377,
        -0.0392,  0.0017,  0.1869,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1045, -0.3202, -1.0805, -0.1082, -0.4820, -0.0289,  0.0608,  0.1934,
         0.3528, -0.0802, -0.2413,  0.0050, -0.2571,  0.0334,  0.0047,  0.0532,
         0.3452, -0.1000, -0.2309,  0.0448, -0.2511,  0.2407, -0.0783, -0.3477,
        -0.0202, -0.0609, -0.3386, -0.2117, -0.0539,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1791, -2.1194, -0.3831, -0.1049, -0.0502, -0.4529, -0.0069, -0.0964,
        -0.0475, -0.1300, -0.1574, -0.2338,  0.0556,  0.0774,  0.0036, -0.2221,
        -0.2210, -0.1175, -0.2007, -0.0132, -0.1239, -0.0858, -0.0664, -0.0044,
        -0.1715,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1977, -0.1405, -0.0452, -0.1508, -0.6849, -0.0299, -0.0366, -0.3736,
        -0.5701, -0.1956, -0.0175,  0.0346, -0.0374,  0.0100, -0.2905, -0.6373,
         0.0456, -0.1509, -0.0643,  0.0097, -0.1220, -0.0582,  0.0597,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2579e-01, -1.1193e-02, -1.1584e-02, -2.9751e-02,  9.2054e-02,
         8.3322e-02,  8.9021e-02, -5.2691e-02, -3.4479e-03, -2.2358e-02,
         1.8106e-02, -2.1661e-02, -2.9950e-01, -7.4385e-01,  6.2262e-03,
        -8.7051e-02, -5.7744e-04, -3.0738e-01, -3.0279e-02,  1.7170e-03,
         1.5119e-01, -9.9548e-02, -3.6092e-02, -5.2564e-02, -7.1235e-02,
        -2.7676e-02, -9.3075e-01, -2.0081e-01, -5.7971e-01, -1.4702e-01,
        -3.8415e-01,  3.6177e-03, -8.8413e-02, -1.5004e-01, -1.6067e-01,
         5.5773e-02, -3.6526e-02,  1.6754e-01, -1.2704e-01,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6205,  0.2142,  0.2159, -0.3105, -1.3105, -0.2166, -0.0641, -0.5678,
        -0.4059,  0.2039, -0.3314, -0.1700,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 5.0822e-01, -1.6220e-01, -1.7278e+00,  5.0578e-02, -9.4316e-02,
        -1.3701e-01, -2.5495e-01,  1.1335e-01, -7.7829e-03, -1.5642e-01,
        -1.9550e-02,  1.0609e-01, -2.7460e-02,  4.4865e-02, -1.7995e-01,
         3.7553e-02, -5.9706e-02, -2.0498e-01, -4.9876e-04, -4.5173e-02,
        -7.7522e-02, -9.1899e-02,  2.2529e-02,  3.1865e-02, -3.6722e-02,
         1.6447e-02,  2.7900e-03, -5.9428e-02, -3.2679e-02, -1.2549e-01,
        -6.3711e-02,  5.7477e-02, -1.8432e-01,  1.4832e-02, -1.7173e-03,
         4.5922e-03, -1.2225e-01, -1.2555e-02, -2.0970e-03,  1.5840e-03,
         6.7427e-02,  4.4783e-02,  2.2780e-02,  2.0744e-02, -4.3813e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0452,  2.1869, -0.0542,  0.5006, -0.8150, -0.0290,  0.0956,  0.1892,
         0.6795, -0.2177, -0.0089, -0.3257,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8401, -3.6181, -0.4152, -0.6995, -0.2934, -0.1921, -0.0541,  0.0988,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3209e-01,  1.3597e+00,  3.4576e-01,  1.0877e-02,  4.9453e-01,
        -1.8462e-02,  5.6638e-02,  4.5148e-02,  1.1143e-01,  4.0347e-01,
         1.6516e-02,  7.3988e-02,  2.6463e-02, -1.8051e-02, -5.3296e-02,
        -5.0314e-02, -1.7378e-01,  4.3329e-02, -2.5773e-02, -1.5676e-02,
        -1.1428e-02, -2.4198e-02,  4.0169e-02, -7.1595e-03,  5.2378e-02,
         2.6194e-02, -4.8614e-02,  3.1434e-02,  1.4107e-01, -5.0407e-02,
        -1.2219e-03,  8.2614e-02, -6.6026e-03,  7.8087e-02,  6.1735e-02,
        -1.7021e-02, -2.6068e-02,  3.6663e-02, -3.4223e-02,  1.3329e-02,
         3.6908e-02, -9.4668e-03, -5.0947e-02,  6.4319e-02,  4.6083e-02,
        -2.7516e-01,  3.4511e-01, -7.7712e-02,  4.1289e-01,  7.4465e-01,
         1.9819e-01, -4.2977e-02,  1.5291e-01,  2.4175e-02,  1.6081e-02,
        -5.6240e-03,  1.3529e-01, -9.8957e-04,  4.6435e-02,  2.6006e-02,
        -1.6749e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1586, -0.0873, -0.0216, -0.1720, -0.6747,  0.1302, -0.2077, -0.0193,
         0.0019,  0.0073, -0.0217, -0.1546,  0.0299, -0.1460, -0.2408, -0.0249,
        -0.0041, -0.0243, -0.1885, -0.1411, -0.3405, -0.4022, -0.0565,  0.0470,
        -0.2173, -0.2503, -0.0399, -0.0873, -0.1506, -0.0641, -0.0084,  0.0831,
         0.0259,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.9642e-02, -3.1457e-01, -8.3007e-01, -6.6171e-01, -3.2528e-01,
        -1.3341e-01, -7.0627e-01,  1.9578e-02,  3.0008e-02,  1.5271e-03,
         1.1107e-01,  4.3748e-02, -2.4715e-01, -5.9412e-02, -9.1608e-02,
         2.4909e-01, -9.1564e-02, -4.2409e-01, -1.5377e+00,  5.5864e-02,
        -1.5732e-01,  1.3216e-01,  1.0780e-01, -3.3037e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0156, -0.9269, -0.6830, -0.1489, -0.2073, -0.7038, -0.0686, -0.2601,
        -0.0308, -0.3807, -0.4125, -0.2959, -0.5178, -0.1385,  0.0427,  0.0167,
         0.0565,  0.0803,  0.0448,  0.0293, -0.1233,  0.0196,  0.0898,  0.2148,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1897,  0.1954, -0.2539, -0.2687,  0.0779,  0.0856, -0.1377, -0.6839,
        -0.0243, -0.0853,  0.0837, -0.0059, -0.0032,  0.0025, -0.2219, -0.4048,
        -0.4067,  0.1003, -0.1063, -0.0452, -0.4624, -0.0033, -0.2770, -0.5057,
        -0.0032,  0.0311, -0.6112, -0.0297,  0.0057, -0.1110, -0.1710, -0.1161,
        -0.0091,  0.1321,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5877, -0.0155,  0.0555,  0.0858, -0.5010, -0.1330,  0.0081,  0.0121,
        -0.1353,  0.0838,  0.0104,  0.1930, -0.1284, -1.0412, -0.1074, -0.0587,
        -0.4006,  0.1899, -0.2583, -0.2342, -0.1555, -0.3422, -0.0703,  0.1256,
        -0.2888,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9118,  0.0182, -0.1972, -0.1104, -0.2205, -0.0914, -1.0509, -0.3850,
        -0.4680, -1.4583,  0.1944, -0.0342, -0.1208, -0.2063,  0.3214, -0.4375,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6169e-01, -1.4091e+00, -1.0617e-01, -4.8661e-01, -2.2858e-01,
        -1.9010e-01, -6.3708e-01, -1.3584e-01,  3.9007e-02, -4.3661e-02,
         1.5360e-02, -5.8226e-02, -2.2057e-01, -4.2337e-02,  5.3755e-02,
         4.6923e-02, -2.3572e-01, -2.6683e-01, -1.8137e-01,  8.5520e-04,
         1.1795e-01, -4.6258e-01,  5.7487e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3051,  0.1239,  0.2952,  1.3391,  1.6302,  0.2115, -0.2481,  0.0762,
         0.1764, -0.2372, -0.0318, -0.2819, -0.2731,  0.2284,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.3988, -0.7327, -0.4100, -0.8563, -0.1341,  0.1218, -0.2373,  0.1218,
        -0.2805, -0.1369, -0.6608, -0.6842, -0.2492, -0.0648, -0.1936,  0.1598,
        -0.0156, -0.7488,  0.1470, -0.6033, -0.0215, -0.0724, -0.2083,  0.5973,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3454,  0.0786, -0.8885, -0.1446, -1.0187,  0.0722, -0.9958,  0.1448,
        -0.1637, -0.0040,  0.1619,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2946, -0.0361,  0.2636, -0.0034,  0.0642, -0.2415, -0.8218, -0.0335,
        -0.4216, -0.0462, -0.2029,  0.1010, -0.1761, -0.0111, -0.3060, -0.0492,
        -0.0509, -0.3395,  0.1307, -0.1805,  0.0065,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0501,  0.0762,  0.0930, -0.0631,  0.0766,  0.0520, -0.1249, -1.4356,
        -0.5746, -1.0667, -0.4331,  0.1739,  0.1081, -0.3151,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0719,  0.0714,  0.1297, -0.1094, -0.0337, -0.0769, -0.0736, -0.1637,
        -0.0393, -0.0742,  0.4546,  0.3191,  1.6478,  0.1189,  0.0306,  0.0878,
         0.0983,  0.1590,  0.0049, -0.1896,  0.3929,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8101,  2.4632,  0.1370,  0.7852,  0.1785,  0.2679,  0.0041,  0.5528,
         0.4690,  0.4842,  1.3807, -0.5533,  0.3303,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1930, -0.5265, -0.0533, -0.0728, -0.0197, -0.1035, -0.2779, -0.5441,
        -0.1031, -0.0292, -0.0656,  0.0833, -0.0357,  0.0403,  0.0028, -0.0724,
        -0.0360, -0.0617,  0.0242, -0.0610,  0.0257,  0.1492,  0.0486, -0.0633,
        -0.6231,  0.1583, -0.6907, -0.6890, -0.2997,  0.0613, -0.0337, -0.0357,
         0.0078, -0.0792,  0.0933,  0.0166, -0.2122,  0.0064,  0.0125, -0.0640,
        -0.0567, -0.0505,  0.0134, -0.1049,  0.1349], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0428, -0.1841, -0.1071, -0.0615,  0.0060,  0.1240,  0.1183, -0.0164,
        -0.0206, -0.0187, -0.4237, -0.0819, -0.1334, -0.0401, -0.2283,  0.0294,
        -0.5077,  0.1205, -0.0494,  0.0524,  0.2121, -0.4084, -0.0546, -0.2766,
        -0.0618,  0.0632, -0.2742, -0.2785, -0.4143, -0.0020, -0.0646, -0.1727,
        -0.6934, -0.0646,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6840,  0.0886,  0.0834, -0.1850, -0.1055, -0.0622, -0.0068,  0.0132,
         0.0421, -0.0325, -0.1273, -0.0109,  0.0252,  0.0228,  0.0105,  0.4027,
         0.1585,  0.1578, -0.0482, -1.3189, -2.0872, -0.0695,  0.0219,  0.0236,
         0.1486, -0.4910,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1567, -1.6068,  0.1597, -0.6319,  0.1643, -0.1749,  0.0201, -0.2002,
         0.1895, -0.0332,  0.0076,  0.0237, -0.0178,  0.0394, -0.0262, -0.4512,
        -0.5709,  0.1530, -0.1121, -0.0308, -0.2829, -0.0936, -0.0110,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1037,  0.5613,  0.0317,  0.1303, -0.0419, -0.1038,  0.0407,  0.3592,
         0.5222,  0.1401, -0.0580,  0.0111,  0.0365,  0.0405,  1.0716, -0.1282,
         0.0314,  0.2681,  0.4434,  0.1391,  0.0763,  0.0362,  0.0014, -0.0770,
        -0.0345,  0.0967,  0.3418,  0.5966,  0.1348,  0.0850,  0.0374,  0.0595,
        -0.0067, -0.0841,  0.0181,  0.0435,  0.1037,  0.0011,  0.0585,  0.1297,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1836, -0.0945, -0.0903, -0.1632, -0.4180, -0.1257, -0.4178, -0.3957,
         0.0055, -0.1316, -0.0521, -0.0479, -0.2064, -0.0427, -0.1254, -0.2731,
        -0.2284, -0.0250, -0.0277, -0.1758, -0.0528, -0.3363, -0.1444, -0.0284,
        -0.0693, -0.0554, -0.0345, -0.0156, -0.0811, -0.1272, -0.2696,  0.0096,
        -0.4468, -0.1637,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0460, -0.2444, -0.2057, -0.0470, -0.2303,  0.1109,  0.0333, -0.0313,
         0.0181,  0.0180, -0.0145, -0.0533,  0.0464,  0.0365, -0.5912, -0.0123,
        -0.0790,  0.0220, -0.3458, -0.0489,  0.0697,  0.0775, -0.0314,  0.0919,
        -0.0763, -0.3465,  0.1179,  0.0154, -0.0181, -0.0316, -0.0239,  0.0593,
         0.0068, -0.0426, -0.1281,  0.0541, -0.0580, -0.1241, -0.0845, -0.1418,
        -0.1664, -0.0037, -0.2021, -0.0393, -0.0370,  0.0185,  0.0420, -0.0009,
        -0.0587, -0.0638, -0.0053, -0.0745], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0874,  0.0621,  0.1292, -0.0209, -0.9968, -0.2965,  0.0854,  0.1074,
         0.0596, -0.0593, -0.0124,  0.0994, -0.1370, -0.5554, -0.1488, -0.3759,
        -0.5119, -0.4218, -0.3319, -0.0248, -0.1401, -0.2000, -0.1456, -0.3552,
         0.0379,  0.0081, -0.1232, -0.0254,  0.1942,  0.0610, -0.2097,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2657, -0.6597,  0.1219, -0.1940,  0.0460,  0.0495, -0.0314, -0.0173,
        -0.0898,  0.0269,  0.0212, -0.3567, -0.1549, -0.0112, -0.2156, -0.2757,
         0.0507, -0.2559, -0.2936, -0.4210, -0.0364, -0.1197, -0.0949,  0.0861,
        -0.2522,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1247, -0.0174,  0.0587,  0.0677, -0.3022, -0.8544, -1.8589, -0.2397,
        -0.0231, -0.1240, -0.0674,  0.4581,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1489, -1.8044, -0.0696, -0.1419, -0.3338, -0.0675,  0.0496, -0.0084,
         0.0213, -0.1856,  0.0852, -0.0503, -0.3467, -0.4902, -0.1517, -0.2105,
         0.0375, -0.0240,  0.0470,  0.0034, -0.1751,  0.1662, -0.0116, -0.1592,
        -0.2479, -0.3434, -0.0579,  0.0045,  0.0738,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4090, -1.5737, -0.2389, -0.0094, -0.1915, -0.3074, -1.7444,  0.0314,
         0.1463,  0.0927,  0.0499,  0.1253,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0395, -1.0544,  0.0471, -0.4179,  0.4229, -0.8217, -1.4182,  0.1371,
        -0.3482,  0.0906, -0.0241,  0.0573,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4131,  0.3307, -0.8851, -0.0181, -0.5599, -0.5133, -0.0051,  0.0159,
        -0.6739, -0.2659, -0.0808, -0.3621, -0.0248,  0.0631, -0.2428, -0.7147,
         0.0842,  0.1765, -0.4087, -0.3244,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3345e-01, -1.1235e+00, -4.5619e-02, -2.8376e-01, -6.6269e-01,
        -2.0564e-05, -8.4523e-02, -9.7384e-02, -1.6428e-01, -2.1863e-03,
        -5.3684e-02, -1.9698e-01, -8.4821e-02, -6.1020e-02, -6.4189e-02,
         8.5648e-03, -6.7090e-02,  1.4964e-02, -1.4300e-01, -6.5260e-02,
        -6.5466e-02,  2.6197e-02,  7.6444e-03, -5.2943e-03,  3.1243e-02,
         3.1700e-02,  4.0130e-02, -2.1437e-01, -2.0056e-01,  2.8303e-02,
         1.5167e-02, -6.3744e-03, -2.5289e-01, -3.8937e-01, -1.6735e-02,
         1.1478e-01,  2.4614e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3198, -3.0384,  0.0971,  0.0108, -0.2271,  0.0113, -0.0403, -0.1321,
        -0.0466,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0007, -0.0038,  0.2744, -0.5171, -0.1731, -0.1874, -0.2353, -0.3067,
         0.0444, -0.0659, -0.1308, -0.1681, -0.3492, -0.3887,  0.1005, -0.0139,
         0.0071,  0.2068, -0.3031, -0.3749, -0.0203,  0.0105, -0.0276, -0.0297,
         0.0782,  0.0229,  0.0925,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5645e-01, -1.9967e+00,  1.4024e-01, -4.7878e-01, -4.9899e-02,
        -1.4715e-01, -4.0087e-03, -6.3718e-02,  5.9547e-03, -1.0252e-01,
        -6.3253e-04, -5.1039e-01, -1.8084e-01, -2.7237e-02, -4.3997e-02,
        -2.9592e-02, -3.4145e-01, -4.6601e-02,  9.1334e-02, -5.5190e-02,
        -1.1229e-01, -2.1556e-01, -3.6988e-01, -1.1209e-01, -6.5779e-02,
        -2.7176e-02, -4.8187e-03, -1.7975e-01, -8.8115e-01,  6.8206e-03,
         1.2008e-02,  3.1661e-02, -7.1690e-02,  4.6262e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.7100e-01,  2.1633e-03,  1.4461e-02, -1.1021e-01, -9.8211e-02,
        -7.6709e-02,  2.4000e-02, -1.0625e-02,  4.8466e-02,  2.1031e-01,
        -3.5037e-02, -4.7337e-01,  3.9725e-02,  1.3476e-02, -4.4096e-02,
        -4.1474e-01, -6.2617e-01, -8.3482e-02, -1.6131e-01,  9.6444e-02,
         7.5170e-02, -9.9838e-03, -2.1574e-01, -1.8565e-02,  3.4764e-02,
        -4.9764e-02,  1.6102e-02, -3.8644e-02,  3.2971e-04, -1.0210e-02,
        -5.7933e-02, -2.6684e-01, -3.5029e-01, -1.0769e-02, -9.7367e-02,
        -1.4300e-01, -4.0961e-01,  3.8685e-02,  6.3619e-02, -4.0724e-02,
        -1.0449e-01,  6.6723e-03,  6.6895e-02,  1.0623e-01,  1.6708e-01,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3397e-01,  1.9145e-02, -2.6682e-02, -3.9589e-02, -2.3313e-01,
        -6.6069e-01, -1.0840e+00, -4.3854e-02,  4.7576e-02, -5.4698e-01,
        -2.2597e-01, -5.3666e-04, -3.6392e-01, -1.2674e-02,  3.2146e-03,
        -1.8602e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2725, -0.1623,  0.1334,  0.0788,  0.0481, -0.0460,  0.2189,  0.1783,
         1.1295,  0.8071,  0.4238,  0.6975, -0.0964,  0.3547,  0.1251, -0.0359,
         0.0635, -0.0294, -0.1403,  0.0218,  0.0416, -0.1700, -0.0225, -0.0338,
        -0.0642,  0.0852, -0.1565,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0653, -0.0974, -0.3045, -0.0097, -0.0880,  0.0314, -0.0871, -0.0540,
        -0.0805,  0.0345,  0.0312,  0.0178, -0.0941, -0.0052, -0.0567,  0.0226,
        -0.0877,  0.0568,  0.0955, -0.0838,  0.0601, -0.1193, -0.0470, -0.3645,
        -0.0184,  0.0121, -0.2107, -0.2483,  0.1433, -0.1744,  0.0635, -0.1625,
        -0.2269, -0.0542,  0.0074, -0.0282, -0.0849, -0.1842, -0.0148,  0.0457,
        -0.0640, -0.1930, -0.0060, -0.0395,  0.0681,  0.1263], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3660,  0.0779,  0.0565,  0.0308, -0.0403, -0.1276, -0.0339, -0.6013,
         0.1342, -0.1481,  0.0134,  0.0343,  0.0482, -0.0388,  0.0043, -0.0663,
         0.0075, -0.0688, -0.6408, -1.0344, -0.1118, -0.0429, -0.0333, -0.1217,
         0.1130, -0.1225, -0.4839,  0.0112,  0.0227,  0.0870,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5523, -0.9217, -0.4403, -0.1806, -0.2027,  0.1061,  0.0514,  0.8404,
        -0.1640, -0.2537,  1.3372,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5889e-02, -2.0800e+00,  1.7228e-03, -3.1150e-02, -2.7539e-01,
        -1.3286e-01, -2.7220e-01, -5.1917e-01,  3.6309e-02,  8.3778e-02,
         5.7070e-02,  6.3489e-02, -6.9266e-02, -9.2588e-02, -7.4844e-02,
        -3.0220e-01, -8.3507e-02, -1.8795e-01, -2.4443e-03,  1.2931e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0397,  1.4224,  0.6550,  0.4196, -0.2695,  0.1883,  0.4157, -0.0872,
         0.1783,  0.1366, -0.4363,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0268,  1.7272,  0.8400,  0.8261,  0.1788,  0.1407,  0.0393,  0.2586,
         0.4533,  0.0927, -0.0181,  0.1042, -0.0372,  0.1568, -0.0632,  0.3639,
         0.0125,  0.0478, -0.0149,  0.0150, -0.3126,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2661e-01, -1.1067e+00, -3.3487e-01,  8.1729e-02,  2.2110e-02,
        -1.2593e+00,  2.7122e-01,  5.4773e-02, -1.2791e-01, -4.7335e-02,
         2.0989e-02, -3.8881e-02, -3.9719e-02, -3.9026e-02, -2.0742e-01,
         1.1630e-03,  1.2666e-01,  3.7850e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6471, -0.1279, -0.5763, -0.0562, -0.0120,  0.0605,  0.0493,  0.0229,
        -0.0044, -0.0457, -0.0080,  0.0283, -0.0747, -0.4075, -0.3914,  0.0345,
        -0.0912, -0.3413, -0.1284, -0.1549, -0.0931, -0.2490, -0.4399, -0.0206,
        -0.1164, -0.0468, -0.2376, -0.0423, -0.0784, -0.3010,  0.0626, -0.0017,
        -0.0089,  0.0343, -0.0530,  0.0266, -0.0100,  0.1383, -0.1597,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5479e-01, -3.2410e+00, -1.4704e-01, -6.8056e-01, -4.1240e-02,
        -5.0543e-03,  1.1420e-01,  1.4716e-02, -2.4921e-01, -1.2524e-01,
        -4.9771e-02, -3.9557e-01,  1.6317e-03, -5.8012e-04,  6.0628e-02,
         1.1427e-02, -3.3439e-01, -2.9745e-02,  7.3625e-02, -7.5428e-02,
        -2.0047e-02, -8.3765e-03,  6.3272e-02,  1.6085e-02,  1.0180e-02,
        -3.7367e-02, -9.9871e-02, -5.5501e-02, -2.9055e-01, -3.4844e-01,
        -2.2064e-03, -9.3193e-02,  1.2159e-02, -4.1586e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.1819, -0.5112, -1.0381, -1.3587,  0.1319, -0.4355,  0.1235,  0.0484,
        -0.3824,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3157e-02, -1.4700e+00, -7.7592e-02, -3.8371e-01, -5.9507e-02,
        -5.6434e-01, -8.0219e-02, -2.8282e-01, -7.3583e-02,  1.6791e-02,
        -1.5418e-01, -2.9898e-02, -5.1261e-02, -1.5476e-01, -8.9053e-02,
        -4.7517e-01,  2.2467e-02, -5.4627e-01, -4.7074e-03, -4.4482e-04,
        -1.0313e-01,  1.2264e-02,  1.0415e-02, -3.3637e-02, -1.6390e-01,
         2.8862e-02, -1.2958e-01, -7.3213e-02,  2.5392e-02, -1.6436e-02,
        -1.4925e-01, -1.7809e-01,  5.9971e-02, -3.5406e-01,  4.3878e-02,
        -4.2146e-02, -1.1419e-01,  5.3322e-02, -6.8456e-02,  2.1153e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0558, -0.4192, -0.8432, -0.0412, -0.1731,  0.0229,  0.0163, -0.0185,
        -0.0969, -0.2525,  0.0016,  0.0027,  0.0021, -0.0058, -0.0462,  0.0341,
        -0.0504, -0.1160, -0.2716, -0.0198, -0.0597,  0.0323,  0.1193,  0.0262,
         0.0683, -0.0797, -0.3023, -0.0210, -0.3043, -0.5165, -0.0453,  0.0556,
        -0.0322,  0.0532,  0.0366, -0.0470, -0.1599, -0.3343, -0.0229,  0.0197,
        -0.0494,  0.0441, -0.0058,  0.0110,  0.0211,  0.0444, -0.0086, -0.0034,
        -0.0158, -0.0185,  0.0626,  0.0199,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1010,  1.8005,  0.1660,  0.1727,  0.1013,  0.0403,  0.3271,  0.0244,
         0.0362, -0.0051, -0.0799,  0.0617,  0.6195,  0.1101,  0.3416,  0.3547,
         0.4690,  0.2748,  0.4478,  0.0168, -0.1976, -0.1421,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0146e-01,  3.8014e+00, -1.3102e-01, -7.9918e-02,  2.7706e-03,
        -5.1469e-02,  3.5854e-01,  3.1546e-01,  1.8527e-01, -1.1502e-02,
         6.2971e-02,  2.6990e-01,  7.2342e-03,  9.3154e-03, -1.0458e-01,
         3.3853e-01,  1.6191e-01,  1.0164e-02, -8.5829e-02, -3.1656e-01,
         1.6161e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6062e-01, -1.2362e+00, -2.1961e-01,  4.0108e-03, -2.1769e-03,
         8.9783e-02, -1.4606e-02,  6.5686e-02, -1.1198e-01, -2.0213e-02,
         1.9615e-02, -5.0071e-02,  3.7641e-03,  1.0286e-02,  5.8923e-02,
         1.0897e-02,  3.1830e-02,  4.8463e-02, -1.6304e-01,  1.5003e-02,
         7.0314e-03,  1.1501e-03, -9.7120e-03, -1.2887e-03, -3.3864e-02,
        -2.3257e-02, -4.6225e-02, -1.8627e-01,  2.6470e-02, -1.6804e-01,
        -2.7826e-01, -2.5306e-01,  7.5293e-03, -1.2004e-01, -1.9093e-01,
        -3.9745e-02,  7.8950e-03,  3.8679e-02,  7.7053e-02, -4.5732e-02,
        -5.0624e-01,  1.3882e-02, -2.0892e-02,  5.4359e-02, -2.2183e-01,
         6.7885e-02, -1.6651e-01, -6.3836e-02,  6.2819e-02,  4.9582e-02,
        -7.1161e-02, -7.7860e-03,  7.1708e-02,  5.9445e-02,  2.0954e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1302, -0.0334,  0.1903, -0.0940, -0.0253, -0.1449, -0.1836,  0.1583,
        -0.0340,  0.0476,  0.0059, -0.0506, -0.0799, -0.1166, -0.1973, -0.1123,
        -0.1075, -0.1720, -0.0975, -0.2760, -0.0153, -0.0859, -0.2123, -0.0139,
         0.0887,  0.0713, -0.0393, -0.3756,  0.0390, -0.0091,  0.0763, -0.0286,
        -0.0933, -0.3938, -0.0704, -0.1129,  0.0766, -0.2062, -0.2038, -0.1404,
        -0.0117, -0.0071, -0.0411, -0.0112, -0.3126,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3896, -1.1518, -0.6501, -0.4373,  0.1893, -0.0335, -0.1119,  0.0232,
        -0.3053,  0.0371,  0.2448, -0.1747,  0.0639, -0.3147, -0.4429,  0.2319,
        -0.0798, -0.2953,  0.2474, -0.1543, -0.4306,  0.0145, -0.0852,  0.1229,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0314,  0.1931, -0.0061,  0.0422,  0.0129, -0.0680, -0.2273, -0.2772,
        -0.0011,  0.0601, -0.0141, -0.0271,  0.0728, -0.1210, -0.0469,  0.1171,
        -0.1941, -0.1781, -0.0958, -0.1609, -0.0596, -0.0512, -0.1978, -0.1397,
        -0.0182, -0.2752, -0.4167, -0.0995, -0.1811, -0.2828, -0.0656,  0.0296,
        -0.1004,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2352, -3.1233, -0.0400, -0.6104,  0.0195, -0.1742,  0.0730, -0.1975,
        -0.9021, -0.0170, -0.0118, -0.0528,  0.2117, -0.3864,  0.1232, -0.0632,
        -0.1331, -0.0192,  0.0694,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1674,  0.0226,  0.0974, -0.1050, -0.4405, -0.0960, -0.4285, -0.3528,
        -0.5809, -0.0605,  0.0759,  0.0014, -0.0692,  0.0177,  0.0079, -0.0742,
        -0.0452, -0.0813,  0.0090,  0.1217, -0.2035, -0.4442, -0.0841,  0.0671,
        -0.1014,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1043, -0.9665, -1.0322, -0.0521, -0.1512, -0.5927, -1.4425,  0.1065,
        -0.0588, -0.2012, -0.0561,  0.0855, -0.1381, -0.0547, -0.0176,  0.4517,
         0.3132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.4210,  0.0115, -0.0864, -0.0867,  0.1239, -0.0649,  0.0588, -0.0977,
        -0.1159, -0.5130, -0.3671, -0.1415,  0.0323, -0.1049,  0.0461, -0.0338,
        -0.3477, -0.8260,  0.1058, -0.2189, -0.0673, -0.5885, -1.1991, -0.0793,
        -0.1266,  0.0046, -0.0383,  0.1510,  0.2050, -0.0255,  0.1209,  0.2937,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1021, -0.5187, -1.2311,  0.0558, -0.1348, -0.0151, -0.2920, -0.6568,
        -0.0557, -0.0300,  0.0281,  0.0249,  0.5415, -0.0135, -0.2366,  0.0651,
        -0.1075,  0.0554, -0.2278, -0.0816, -0.0885,  0.0033, -0.3098, -0.8486,
        -0.1209, -0.0125,  0.0917, -0.0970,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3258, -0.3141, -0.0574,  0.1221, -0.0989, -0.3098, -1.0162, -0.1354,
         0.0632,  0.0765,  0.0155, -0.4494, -0.2147, -0.2232, -0.0336, -0.3679,
        -0.0463, -0.2538,  0.0400, -0.0626, -0.1449,  0.0508,  0.0413, -0.0481,
        -0.3010,  0.0810, -0.0139, -0.2789,  0.2013,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1766, -0.2300, -0.1531, -0.3482, -0.5909, -0.0406, -0.1465, -0.0417,
         0.0011,  0.1483,  0.0288, -0.0290, -0.2630, -0.8036,  0.2209, -0.2341,
        -0.0236, -0.3215, -0.0297, -0.1335,  0.0370, -0.0263, -0.1368, -0.0472,
        -0.1427, -0.1750, -0.2020, -0.0210, -0.0642,  0.0186, -0.0235,  0.0036,
        -0.0966,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7763e-01, -2.0739e+00, -4.1686e-02, -3.3462e-01, -2.1451e-01,
        -4.8870e-01, -3.0235e-02, -1.2805e-01,  3.5007e-02, -4.1423e-01,
        -2.5975e-01, -4.6023e-03,  7.8011e-02, -3.3348e-02,  1.3321e-02,
        -1.2905e-03, -3.6265e-01,  2.5662e-02, -6.8882e-01,  1.3609e-02,
        -5.9966e-01,  4.1214e-03,  1.5142e-01,  1.7382e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0822, -0.7636, -0.7810,  0.0231, -0.4399, -0.1205, -0.1535, -0.4377,
         0.0683, -0.1901, -0.0171, -0.1737, -0.2048, -0.0354, -0.0307, -0.0695,
         0.0898, -0.2295,  0.0278, -0.3731, -0.3732,  0.2180,  0.1661,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0401e-01, -1.0981e+00,  4.2677e-02,  1.7696e-02, -2.0277e-01,
        -3.3564e-01,  1.2322e-02,  1.5799e-01, -2.1520e-02, -4.1512e-02,
         7.8205e-02, -4.7523e-02, -2.7646e-02, -6.2404e-02,  7.1026e-02,
        -2.1565e-01, -4.4884e-01, -2.1766e-01, -1.0252e-01,  7.7303e-02,
         1.5932e-02, -1.9948e-01, -1.4115e-01,  6.7898e-02, -3.3637e-01,
        -2.1541e-01,  1.7150e-02,  1.3049e-02, -1.5147e-01, -6.6861e-01,
        -5.1088e-01,  6.7266e-04, -3.6916e-01, -1.4692e-01, -2.5184e-01,
        -3.1237e-01, -4.2170e-03, -3.4279e-01, -1.0107e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0356, -0.2109,  0.0386, -0.1557, -0.6416, -0.0693,  0.0295, -0.0276,
         0.0496,  0.0624,  0.0222, -0.0205, -0.0340, -0.0235, -0.0907, -0.3696,
        -0.0820, -0.0198,  0.0239, -0.0392, -0.2065, -0.0953, -0.0624, -0.1214,
        -0.4636, -0.3587, -0.1354, -0.0382, -0.0395, -0.1723, -0.0548, -0.0474,
         0.0596, -0.0628, -0.0256, -0.0019, -0.0172, -0.0387,  0.1293,  0.0287,
        -0.0499, -0.0471,  0.0525,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2923e-02, -1.2545e+00, -5.1102e-02, -2.1449e-02,  1.5089e-01,
         4.4741e-03, -4.4840e-02,  4.6313e-02, -2.9343e-01,  8.1907e-03,
         9.7645e-02,  5.7572e-02, -1.0210e-02,  7.3243e-03, -6.4955e-03,
         4.4557e-02, -7.9327e-02, -2.8751e-01, -1.1805e-02, -6.3317e-02,
         4.7381e-02,  1.1219e-01, -1.3629e-01,  2.5543e-02, -9.5535e-02,
        -5.7664e-02, -1.1147e-02,  1.5862e-02,  2.6585e-02,  1.0895e-02,
         1.9512e-02, -1.7615e-01, -3.5372e-01,  1.6243e-02, -1.0953e-01,
         3.8405e-02,  1.7399e-01, -1.2155e-01, -1.7719e-01, -3.1048e-02,
        -1.2703e-01, -2.1340e-01, -2.9424e-02, -3.9290e-03, -2.2403e-03,
         2.9358e-02, -3.8569e-03,  1.3977e-02,  3.6134e-03, -5.0641e-03,
         4.6921e-02, -1.3455e-02,  3.7352e-02,  3.0431e-02, -2.5164e-02,
        -7.2689e-02,  4.6375e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6197, -3.0984, -0.0955, -0.1411, -0.4186,  0.1010, -0.4231, -0.7716,
        -0.0348,  0.0324, -0.4503,  0.1474,  0.0828,  0.1152,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0666, -1.3612, -0.1901, -0.1594,  0.0075, -0.0728, -0.0148, -0.0296,
        -0.0952, -0.0249, -0.2973, -0.3956, -0.0763, -0.0416, -0.3031, -0.1626,
        -0.0872, -0.1831,  0.0711, -0.3095, -0.4737, -0.1658, -0.2829, -0.0864,
         0.0592,  0.0253,  0.0091,  0.0535, -0.1649,  0.1491,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8022, -0.7967,  0.2774, -1.0798, -1.1293,  0.4865,  0.2450, -0.3943,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2105, -0.9826, -0.2147, -0.1272, -0.3576, -0.5667,  0.0229,  0.0907,
        -0.0372, -0.0953,  0.0371, -0.0889, -0.2910, -0.5380, -0.0788,  0.0075,
        -0.0630, -0.0376, -0.0605,  0.0044,  0.1169, -0.0035, -0.0423,  0.0379,
        -0.4058,  0.0223, -0.2045, -0.0344, -0.3858, -0.4929, -0.0324,  0.0077,
        -0.3115,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5091e-01, -1.0379e+00, -7.0627e-01, -1.2943e-01, -3.9316e-02,
        -7.5170e-02, -3.4402e-02, -8.3326e-02,  9.3821e-02, -1.0489e-01,
         4.8049e-02, -2.7077e-02, -9.4745e-02,  1.9327e-02, -3.5783e-02,
         2.0156e-02,  6.2064e-03,  5.2702e-02, -1.0518e-01,  4.5436e-02,
         3.5947e-02,  2.7506e-02, -2.5898e-02, -5.9492e-03,  6.5265e-02,
        -7.9171e-02, -1.0155e-01, -3.7255e-01, -7.5038e-03, -4.1731e-02,
        -8.5411e-02, -8.2481e-02, -3.6090e-01, -5.8123e-01,  3.4653e-02,
         2.6857e-03, -1.5680e-01, -1.0283e-01, -4.2763e-02,  2.6854e-02,
         3.6139e-02, -3.0139e-02, -1.0433e-02, -2.4583e-01,  2.2726e-02,
         4.6139e-03, -1.9407e-02,  1.1346e-02, -4.1469e-03,  5.2417e-02,
         2.8631e-04, -1.2044e-01, -3.2388e-02, -4.1718e-02, -1.2858e-02,
         3.7155e-02,  7.2394e-03,  2.1292e-03,  6.8579e-04, -4.4542e-02,
        -4.9596e-03, -4.4044e-02, -3.0436e-01, -8.9597e-04,  1.2285e-03,
         1.9826e-02, -1.2241e-02,  4.7627e-02,  2.8937e-02, -9.6600e-02,
        -2.6275e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7612e-01,  2.6514e-02,  1.0550e-02,  1.1978e-02, -1.6379e-01,
        -1.8450e-03, -3.6104e-01, -8.1410e-02,  3.2702e-02,  1.0504e-01,
         1.3697e-01,  4.7813e-02,  8.1721e-02,  1.9343e-02,  1.0717e-01,
         5.5406e-02, -3.6947e-02,  5.1307e-02, -8.1100e-02, -4.3461e-01,
         3.5886e-04,  7.3198e-03,  1.2562e-01, -3.7732e-01,  6.9381e-02,
        -2.1296e-01, -1.2501e-01,  1.1318e-02,  3.4567e-02,  6.0349e-03,
        -1.3960e-01, -1.5530e-01, -7.6707e-01, -2.9961e-01, -9.2971e-02,
        -5.4968e-02, -3.1535e-01, -2.2363e-01, -1.2479e-02,  3.9851e-02,
         1.4360e-01, -1.0003e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3153, -1.5131,  0.0066, -0.0144, -0.0072, -0.0709,  0.0709, -0.0288,
        -0.0488, -0.3161, -0.1990, -0.2029, -0.3227, -0.0464,  0.0507,  0.0153,
        -0.0597, -0.3164, -0.5027,  0.0114, -0.0177, -0.0761, -0.0268,  0.0070,
        -0.1178, -0.5736, -0.0513, -0.0793, -0.4246, -0.0385, -0.0135, -0.0481,
         0.0842,  0.0405, -0.0837, -0.2277, -0.2601,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6994e-01,  2.3178e+00,  1.6049e-01,  1.2976e-01, -1.4079e-01,
        -1.3918e-01, -1.6404e-02, -8.2675e-02,  1.8545e-02, -5.5878e-02,
         1.1949e-01, -1.8442e-01,  2.5926e-01,  1.4599e-01,  2.0836e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4802,  0.2700,  0.0186,  0.0615,  0.9362,  0.8600, -0.1115,  0.3981,
         1.1303,  0.1020,  0.0065,  0.3499,  0.3513,  0.0798,  0.0916, -0.0208,
        -0.3397,  0.1813, -0.1441, -0.6011,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0977e-01,  3.0565e-03, -4.0566e-02, -6.2486e-02, -5.9111e-01,
        -5.0496e-03, -1.3038e-02, -6.5561e-02, -2.0982e-02,  1.0323e-02,
        -2.0147e-01, -2.4524e-01, -4.3881e-02,  5.9087e-03, -1.4665e-01,
        -1.3478e-01,  3.5395e-02, -5.1224e-02,  3.1392e-02, -1.3696e-01,
        -5.8026e-02, -6.8793e-03, -3.1528e-02, -3.3023e-02,  1.3662e-02,
        -1.5997e-01, -3.7171e-01,  3.4763e-02,  3.5854e-02, -2.2403e-01,
        -3.3406e-01, -1.2216e-01, -4.2444e-04, -6.0888e-02, -1.5925e-01,
        -2.3063e-01,  8.6746e-02, -1.1317e-01, -2.1716e-02,  5.0048e-02,
        -5.4782e-02,  9.5131e-03, -4.5147e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4054e-01,  5.7995e-02,  1.1504e-01, -7.0960e-03, -1.8159e-01,
        -8.0479e-02, -1.6941e-01, -4.4484e-01, -1.5464e-01, -6.0176e-03,
         1.6012e-02,  9.4080e-03, -3.0099e-02, -3.8937e-01, -4.9526e-01,
        -5.4543e-02,  2.7159e-04,  1.1946e-03, -7.7926e-02, -1.0344e-01,
        -5.2520e-01, -8.2766e-02,  8.5658e-03,  1.1553e-01, -1.6824e-01,
         5.3586e-02, -1.0026e-01, -6.9812e-02, -4.1276e-01,  2.6798e-02,
        -8.4436e-03, -3.5185e-01, -1.1971e-01, -4.2087e-01, -6.4100e-01,
        -1.2787e-02,  3.4062e-02, -5.6910e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1393e-01, -1.0907e+00, -4.4386e-01, -7.5805e-01, -2.3392e-01,
        -3.9835e-01,  2.0798e-01, -1.9174e-02, -9.6684e-02, -3.4522e-01,
        -4.3632e-03, -4.2210e-03, -2.5923e-01,  9.4911e-04,  6.2185e-02,
        -9.4574e-02, -7.4293e-02, -2.5966e-01, -4.8421e-03,  4.5938e-02,
         1.8948e-01, -5.9156e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2670, -0.3983,  0.0354, -0.0469,  0.1171, -0.0648,  0.0174,  0.6423,
         0.4663, -0.4780, -0.0778,  0.0198, -0.0908,  0.0147, -0.0851, -0.2689,
        -0.5699, -0.1092, -0.2004, -0.3295, -0.0504,  0.0149, -0.4352, -0.2261,
        -0.1064, -0.0035, -0.0964,  0.0304,  0.0213, -0.1984, -0.1316,  0.0585,
         0.0411, -0.1887, -0.2181, -0.6314,  0.0263, -0.0606,  0.0393, -0.0448,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8591e-04,  4.9871e-03, -7.3331e-02, -1.2885e-01,  4.2561e-02,
        -2.6769e-01, -6.5314e-02, -4.9129e-03, -3.0506e-01, -2.3293e-01,
        -6.4193e-02, -1.7537e-01, -2.0180e-02, -8.6714e-01, -6.2496e-01,
        -6.0692e-02,  4.0004e-02,  7.9092e-02, -1.4335e-01,  1.3276e-01,
        -3.3415e-02, -3.4720e-02,  3.8777e-03, -5.4134e-02, -3.7664e-02,
        -6.7860e-01,  1.3502e-02, -5.2846e-01, -5.8061e-01, -1.9012e-01,
         1.0782e-01, -3.5276e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0709e-02, -2.5776e+00, -1.0190e-01, -7.0389e-02, -8.7333e-02,
        -4.6160e-02, -1.0654e-01, -4.1132e-01, -5.8017e-01,  3.5330e-02,
        -7.8836e-03, -1.6004e-01,  1.6169e-02, -3.8830e-02,  5.1349e-02,
         2.8132e-02, -2.7161e-01, -1.5976e-05, -2.0128e-02,  9.4793e-02,
        -2.0009e-02, -4.9570e-02,  2.6134e-02, -3.1917e-01, -4.5235e-02,
         3.2934e-02, -2.5647e-01, -1.9182e-03, -1.6680e-02, -1.3856e-01,
        -7.8805e-03,  1.6757e-02,  2.8677e-02,  6.1826e-03,  2.5315e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.0478, -0.2475, -0.7975, -0.2972, -0.0461, -0.0251, -0.8082, -0.7692,
        -0.2683, -0.6442,  0.0611, -0.3685,  0.0466, -0.2828, -0.0808, -0.1173,
        -0.2607, -0.1128,  0.0510, -0.1614,  0.2640,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6028, -1.9531,  0.1281, -0.3042, -0.5588, -0.2696, -0.0412, -0.4117,
        -0.3287,  0.1585, -0.0770,  0.0965, -0.0183,  0.0198, -0.3096,  0.0422,
        -0.4520, -0.1898,  0.2679,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0776, -0.1648, -0.2625, -0.0549, -0.2901, -0.0051,  0.0439,  0.0440,
        -0.0117, -0.6083, -0.7649, -0.0138, -0.1647, -0.8575,  0.0253, -0.1952,
        -0.0203, -0.1939, -0.0570, -0.1682,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2724, -1.7302,  0.0322, -0.5178,  0.0498, -0.1045, -0.3622, -0.3158,
        -0.0631, -0.0373, -0.1489, -0.1288, -0.0448, -0.5386, -0.0940, -0.1937,
        -0.3619, -0.1134, -0.1342, -0.1932, -0.0020, -0.3184, -0.2500,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2376, -0.9991, -0.3744, -0.2181,  0.0482, -0.2239, -0.1345, -0.0868,
        -0.4316, -0.5910, -0.1184, -0.2394, -0.0726, -0.4491,  0.1731, -0.0186,
         0.1061, -0.1084, -0.0457,  0.0854, -0.3801, -0.0809, -0.0607, -0.0458,
         0.0847,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3423,  2.3359,  0.2218, -0.1094,  0.0763,  0.0883, -0.0245,  0.0118,
        -0.1547, -0.0144,  0.4080,  0.0307,  0.0593, -0.0659, -0.0916,  0.0218,
         0.0798,  0.0459, -0.0690,  0.0686,  0.6677,  0.4472,  0.3323,  0.2648,
         0.1827,  0.2306,  0.0658,  0.0708,  0.0522,  0.0529,  0.1397, -0.0925,
         0.2294,  0.2713, -0.2610,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1661,  0.0703, -0.0287, -0.3810, -0.0766,  0.0421, -0.4817, -0.0736,
        -0.6428,  0.0028,  0.0695, -0.1110,  0.1560, -0.1683, -0.2469, -0.6571,
        -0.1115, -0.3770,  0.1337,  0.2750,  0.0365,  0.0740,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1473,  0.1625, -0.0941, -0.0187, -0.0745,  0.1725, -0.0468,  0.0753,
        -0.0288,  1.2955,  0.4071,  0.1249,  0.0740,  0.1184,  0.0649,  0.4526,
         0.0875, -0.1349, -0.0779,  0.1243, -0.2007,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0708, -1.7438, -0.1426,  0.0600, -0.1143,  0.0364, -0.5350, -0.9750,
        -0.0056, -0.0274,  0.1210,  0.0820, -0.1972, -0.4845, -0.0366, -0.3141,
         0.1485,  0.6941, -0.0240,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5168e-02,  3.8386e-02, -9.8327e-02,  7.2474e-02,  6.3843e-02,
        -2.4434e-01,  2.0432e-02, -5.2476e-02,  2.8632e-02, -3.0563e-02,
         1.6381e-02, -9.5378e-02, -6.4395e-02, -9.3381e-02,  1.9940e-01,
         6.9977e-02, -2.0462e-01, -1.9755e-01,  8.5340e-02, -2.4685e-01,
        -6.7055e-02, -7.1078e-02, -1.8196e-01,  3.5141e-03, -1.9084e-01,
        -6.4966e-02, -4.7635e-02, -1.8609e-01,  7.9299e-02, -6.1148e-01,
        -2.3856e-02, -1.1652e-01, -1.2376e-01, -1.7649e-01, -3.0665e-02,
        -3.1941e-02, -6.2841e-02,  1.0618e-02,  3.9091e-02, -1.4296e-01,
         5.2679e-03,  5.1405e-02, -3.2224e-04, -3.3398e-02, -4.6251e-02,
        -4.2203e-02,  2.9216e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9624, -2.4241, -0.9166, -0.1766,  0.0218,  0.1271, -0.1423, -0.1710,
        -0.2452,  0.1549,  0.0796,  0.1080, -0.0435,  0.2488,  0.0716, -0.0274,
        -0.9764,  0.0356,  0.0170, -0.0877,  0.0422,  0.0400, -0.3785, -1.0602,
         0.0313,  0.0828, -0.2970,  0.0203,  0.1202,  0.0659,  0.0025,  0.0708,
         0.3241, -1.1723,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1197,  2.3416,  0.0587, -0.1021, -0.0245,  0.1993, -0.0704, -0.0424,
         0.0245,  0.1301,  0.0074, -0.0239,  0.2532,  0.3672, -0.0084,  0.0784,
        -0.0342, -0.0597,  0.0556,  0.4674, -0.1131, -0.0122,  0.0889, -0.0316,
        -0.0910, -0.1079, -0.0382,  0.0520,  0.0138,  0.2224,  0.0386, -0.0712,
         0.0545, -0.1314,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.5146,  2.7627, -0.4826,  0.1711,  0.2199,  0.6654, -0.1344,  0.0920,
         0.1196,  0.3067,  0.1825,  0.0980,  0.1659,  0.3787,  0.0914, -0.0387,
        -0.0604,  0.1530,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0802, -0.0831, -0.0267, -0.0631, -0.0622, -0.0756, -0.0966,  0.1296,
        -0.8221, -0.9868, -0.0280, -0.3526, -0.3065, -0.0445, -0.0035, -0.0641,
         0.0201,  0.0320, -0.0808, -1.2436,  0.1395, -0.1560, -0.0061, -0.0266,
        -0.1316,  0.0433, -0.2350,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1726, -0.0641,  0.0217,  0.0348, -0.0600, -0.0611, -0.0328, -0.0487,
        -0.2317,  0.0095,  0.0105, -0.0448, -1.0156, -0.0318, -0.1247,  0.0533,
        -0.0672, -0.1077, -0.2461, -0.9792, -0.0258,  0.0398, -0.3746, -0.0157,
         0.0977, -0.1668,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1273, -2.1759,  0.1318, -0.3826, -0.5904,  0.0769, -0.1135,  0.1196,
        -0.3929, -0.3176, -0.2907, -0.1390,  0.1698, -0.0090, -0.0845, -0.2905,
        -0.0781, -0.1440,  0.0197, -0.1259,  0.3110,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3117, -1.7407, -0.8154, -0.8315, -0.1463, -0.1402, -0.3478, -0.0841,
         0.0073,  0.2228, -0.0696, -0.2922, -0.0093,  0.0962,  0.0172,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3150,  0.0399, -0.0385,  0.0511,  0.0102, -0.0907, -0.0900,  0.1092,
         0.0696,  0.1451,  0.0690,  0.0126,  1.5796,  0.1496,  0.3089,  0.0197,
        -0.0616,  0.0068,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2657, -1.4933, -0.7046, -0.9483, -0.0939, -0.0616, -0.2142, -0.2616,
        -0.1093, -0.3500, -0.4878, -0.2939,  0.0757, -0.3575, -0.0572,  0.0387,
         0.1012, -0.1171,  0.4099,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8880e-04, -3.2816e-01, -2.9147e-01, -7.7969e-02, -1.3795e-04,
        -1.0263e-01,  1.1914e-01,  3.6093e-02, -5.8080e-02,  5.5728e-03,
         1.0791e-02,  9.2628e-03, -1.1166e-01,  2.6910e-02,  2.4135e-02,
         3.5533e-03, -1.9230e-01,  4.2545e-02, -2.2080e-02,  1.6425e-01,
        -4.1453e-01,  1.3452e-02, -2.5519e-01, -1.2168e-02, -3.0664e-01,
         1.5412e-02, -3.3409e-01,  2.1430e-02,  5.3008e-02, -9.4228e-02,
        -2.8135e-02, -4.0673e-02, -2.1999e-01, -8.3714e-03, -1.3063e-01,
        -2.5264e-02, -1.9348e-01,  8.8777e-02, -8.8313e-02, -4.1704e-02,
        -2.2284e-02, -2.2236e-02,  9.1042e-02, -1.1705e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1254, -1.6364, -0.2614, -0.6682, -0.3777, -0.0426, -0.1210, -0.0560,
        -0.1624, -0.0970, -0.2055,  0.0611,  0.0479,  0.0352,  0.1584, -0.0045,
         0.1168, -0.5781,  0.0367,  0.0298, -0.2053,  0.0050,  0.0567,  0.0148,
         0.0924,  0.0416, -0.0847, -0.4605,  0.0962,  0.0595,  0.0608, -0.1034,
         0.0571, -0.0258,  0.2100,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8041e-01, -2.9867e+00,  1.8318e-02, -9.2142e-01, -2.5200e-02,
        -1.7281e-03, -5.8923e-01, -3.4908e-01, -1.9247e-01,  5.5158e-02,
         5.7518e-02, -1.9361e-01, -2.7548e-01, -2.7420e-02,  1.2422e-01,
        -2.7845e-01, -1.0634e-01,  3.9988e-02,  1.9212e-01, -4.3927e-02,
        -1.6310e-02, -1.7713e-01,  1.6155e-01, -2.1311e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1404, -0.9757, -0.0860, -0.2149, -0.0978,  0.0157,  0.0422, -0.1344,
        -0.5801, -0.0729, -0.1947,  0.0210,  0.0895, -0.2312, -0.4719, -0.0698,
        -0.0046, -0.0168, -0.2682, -0.0434, -0.0180, -0.2518,  0.0179, -0.1310,
        -0.0728, -0.4421, -0.1909, -0.1820, -0.1017, -0.1449, -0.3801,  0.0428,
         0.1471, -0.0568,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2504,  0.1218, -0.0733, -0.1204,  0.0329,  0.0898, -0.8376, -0.0770,
         0.0020, -0.0628,  0.0813, -0.0852, -0.4263, -0.0293, -0.5153, -1.1658,
        -0.1486, -0.0038, -0.0632,  0.0979,  0.0472, -0.0206, -0.0164, -0.4536,
        -0.1837, -0.1046, -0.3193, -0.0572,  0.0405,  0.2981,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.5824,  0.6630, -0.2270, -1.7368,  0.0760, -0.0282, -0.2795, -0.1553,
        -0.2632, -0.9671,  0.0615, -0.1096,  0.0576,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2684, -0.0787, -0.2413, -0.1023,  0.0369,  0.0556,  0.2891,  0.6570,
         0.6928, -0.0307, -0.1144,  0.5512, -0.3240,  0.3521,  0.2100, -0.1582,
        -0.0830,  0.3611,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4776, -1.2392, -1.1451, -0.1093, -0.3923, -0.2238,  0.1029, -0.0434,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0034, -1.9197, -0.0750, -0.2448, -0.2745, -0.0637, -0.0401, -0.0135,
         0.1423, -0.0427,  0.0412,  0.1173, -0.0429, -0.0100,  0.0061, -0.0412,
         0.0312,  0.0114,  0.0145, -0.0286,  0.1164,  0.0384, -0.2473, -0.1599,
         0.0924, -0.1179, -0.0511, -0.0942, -0.9216, -0.2747,  0.1041, -0.3449,
        -0.0747, -0.6923,  0.0084, -0.3318, -0.1583, -0.0243,  0.0411,  0.0305,
        -0.1160, -0.0237, -0.0243,  0.0368,  0.1212,  0.0367,  0.0953],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4923,  0.6963, -0.0805,  1.8101,  0.1627, -0.3107,  0.0070,  0.0174,
         0.1183,  0.0938, -0.1768,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4152,  3.3665,  0.0428,  0.7116,  0.1300,  0.0253,  0.2151,  0.7510,
         0.2272,  0.4913,  0.2141, -0.0776,  0.6547, -0.0407, -0.2796, -0.0041,
         0.1394,  0.0317,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5473, -0.4860, -1.4233,  0.1764,  0.2098,  0.2144, -0.9818, -0.2606,
        -0.5726,  0.0378,  0.1446,  0.3483,  0.1543,  0.1254, -0.3570,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2326, -1.8552,  0.1145, -0.1952,  0.2692, -0.3412, -0.7744,  0.0791,
         0.0081, -0.5107, -0.7903,  0.0094,  0.1466, -0.0209, -0.0337,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7060e+00,  3.5060e-02, -7.4309e-02, -2.3298e-01, -6.5402e-02,
         6.8684e-02, -1.0556e-01,  2.1454e-01,  3.0307e-01,  4.7668e-02,
        -3.4562e-04,  3.4477e-01,  1.0992e+00,  4.4103e-02, -1.0170e-01,
         9.0608e-02,  3.2232e-01, -2.1401e-01, -1.8199e-01, -5.5033e-02,
         6.4617e-01,  1.2184e+00, -2.2991e-01, -6.8238e-01,  4.0432e-01,
         2.8671e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2905,  0.0676,  0.0640,  0.2352, -0.0600,  0.0278, -0.0530,  0.5136,
         1.3121,  0.0482,  0.0805,  0.1491, -0.1764, -0.1613,  0.2514,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2379,  2.3332,  0.1051,  0.4874,  0.5676,  0.1783,  0.4197, -0.0512,
         0.0044,  0.8052, -0.0673, -0.1483,  0.2233, -0.0056, -0.2588, -0.0109,
         0.1215, -0.0312,  0.3406,  0.0109,  0.2928, -0.1253, -0.1174, -0.0767,
         0.0142, -0.1426,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0460, -0.1049,  0.0423,  0.0326, -0.1083, -0.2354,  0.0915,  0.0498,
        -0.2070, -0.4149, -0.9559, -0.0891, -0.0944,  0.1815, -0.4406, -0.3114,
        -0.0403, -0.1395, -0.0029, -0.3187, -0.0673, -0.2163, -0.1058, -0.2856,
         0.0850, -0.0927,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.2758, -1.7568,  0.3144, -0.0802, -0.0494,  0.0319, -0.3224, -0.2198,
         0.1414, -0.1804, -0.1594, -0.1509, -0.2306,  0.0627, -0.1436, -0.1036,
         0.0266, -0.0406, -0.0378, -0.0362,  0.0679, -0.0771,  0.0784, -0.3642,
        -0.4624, -0.0772, -0.2716, -0.0759,  0.1443,  0.0134,  0.0482, -0.1459,
         0.0074,  0.0213,  0.0393, -0.2455,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2232, -0.4828, -0.1650, -0.0897, -0.2391, -0.1124, -0.2554, -0.4589,
         0.0458,  0.0722, -0.0783,  0.0220, -0.0068,  0.0728, -0.0065,  0.0054,
         0.0531, -0.0201,  0.0253,  0.0830, -0.0388, -0.3883, -0.0643, -0.0421,
        -0.2209, -0.4704, -0.0093, -0.3929, -0.2803, -0.1332, -0.0586, -0.3289,
        -0.3664,  0.0114,  0.0336,  0.1303,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0077, -0.3451, -0.1567, -0.0623,  0.0724, -0.8941,  0.1002, -0.0626,
        -0.0530, -0.0254, -0.2086, -0.3895, -1.0938, -0.0097,  0.1686, -0.0224,
        -0.1922,  0.1051, -0.0633, -0.0506,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4664, -0.8589, -0.1616, -0.0894, -0.1326,  0.0171, -0.0758, -0.2791,
        -0.0598, -0.1862, -0.0683, -0.0270, -0.0319, -0.0602, -0.4218, -0.3458,
        -0.1433, -0.1252, -0.0457, -0.0730, -0.1037, -0.0365,  0.0214, -0.0253,
        -0.0865,  0.0284,  0.0095, -0.0554,  0.0148, -0.2103, -0.9112, -0.0115,
        -0.2535, -0.3872,  0.0516,  0.0978, -0.0691, -0.0789,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1036, -0.8273, -0.4569, -0.4458, -0.0569,  0.0463, -0.0734,  0.1924,
         0.0842, -0.0688, -0.1367, -0.3399, -0.0023,  0.0430, -0.0193, -0.2332,
         0.1346, -0.2370,  0.0025, -0.0100, -0.0027,  0.0518,  0.1431,  0.1103,
         0.0117, -0.0067, -0.0525, -0.2619, -0.3924, -0.0488, -0.0848, -0.0578,
         0.0373, -0.0465, -0.1556, -0.0153,  0.0163,  0.0659, -0.0212, -0.0019,
         0.0067], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5150, -3.1096, -0.0167,  0.1829,  0.0890, -0.3365, -0.0885, -0.0958,
        -0.0091,  0.0426, -0.0631, -0.3250, -0.2834, -0.1384,  0.1746, -0.4118,
        -0.0484, -0.3402, -0.1540,  0.1529, -0.0049, -0.2442,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1567,  0.0421, -0.0071,  0.0095, -0.0285, -0.0290, -0.0269,  0.1488,
        -0.0459,  0.0533, -0.0340, -0.1419,  0.0771, -0.3398, -0.0395, -0.4873,
         0.0158, -0.2753, -0.0942, -0.4551, -0.6048,  0.0097,  0.1636, -0.2537,
         0.0258,  0.0046,  0.0174, -0.1319, -0.5024,  0.0484, -0.3364, -0.2875,
        -0.1342,  0.0420, -0.0785,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2281, -0.6941, -1.9616, -0.1774, -0.5648, -0.1849,  0.1334,  0.1722,
         0.2637, -0.0910, -0.1976, -0.0939, -0.0354,  0.1090, -0.0655, -0.0182,
         0.2664, -0.0237, -0.3626,  0.0651, -0.2391,  0.0532, -0.0176, -0.1884,
        -0.0429,  0.0336, -0.2422,  0.2027, -0.1314,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8066e-02, -2.0301e+00, -3.1577e-01, -3.3575e-01,  2.1669e-02,
        -3.0087e-01, -8.9228e-02, -1.3306e-01, -2.5001e-02,  1.3809e-02,
        -2.9032e-01, -2.6089e-01, -9.3792e-02,  7.7551e-03, -7.7141e-04,
        -2.7520e-01, -1.3599e-01, -2.0998e-02, -1.1835e-01,  6.3832e-02,
        -6.1199e-02, -2.2778e-03, -8.2118e-02,  6.2082e-02,  4.0897e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5892,  0.1287, -0.0528,  0.1493, -1.3825,  0.1709,  0.0568, -0.8086,
        -0.8097, -0.1019,  0.0700,  0.0700,  0.0242,  0.1162, -0.5651, -0.1962,
         0.2176, -0.2001, -0.1261, -0.1087, -0.0605,  0.1530,  0.1507,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0198, -0.0027,  0.0318,  0.0138,  0.0302,  0.0053, -0.0192,  0.0066,
         0.0379,  0.0928, -0.0070, -0.0483, -0.1590, -0.5321, -0.0720,  0.0234,
        -0.0104, -0.1902, -0.0105, -0.0029,  0.0119, -0.0519,  0.0756,  0.0292,
        -0.0299,  0.0837, -0.7198,  0.0023, -0.5822, -0.1859, -0.2615, -0.0528,
        -0.0386, -0.0597, -0.1337,  0.0896, -0.0568,  0.0785, -0.1288,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1007, -0.4749, -0.1152, -0.7581, -0.7499, -0.2013, -0.0458, -0.6471,
        -0.4353, -0.2154, -0.1836,  0.1430,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 6.2492e-02, -4.6968e-01, -1.6459e+00,  8.6585e-02, -1.1901e-02,
        -3.0659e-01, -1.1813e-01,  3.7104e-02,  4.2415e-02, -8.9624e-02,
         1.9725e-02,  1.5851e-02, -2.8458e-02, -3.3414e-02, -1.8537e-01,
         1.1921e-01, -3.8926e-02, -3.5865e-01,  3.2372e-03,  5.0990e-02,
        -1.0668e-01, -1.0075e-01, -5.3048e-02,  3.9300e-02,  3.4826e-02,
         2.5488e-02,  3.0854e-04, -7.6898e-02, -3.2733e-02,  3.8336e-02,
         4.9470e-02,  1.9628e-01, -1.1467e-01, -1.1789e-02,  1.4430e-02,
         6.4589e-02, -1.5792e-01, -8.2115e-03, -3.1188e-02, -1.4144e-03,
         3.9613e-02,  1.4475e-02,  9.1581e-02,  2.3813e-03,  6.6027e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0532,  2.8727,  0.5943,  0.7525, -0.0304,  0.1779, -0.3072,  0.2478,
         0.6184, -0.2464,  0.2675,  0.0371,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2109, -4.7064, -0.4824, -0.8242, -0.2061, -0.3374,  0.0809,  0.0714,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2425, -1.5475, -0.2249,  0.1963, -0.4063,  0.0717, -0.1742, -0.1121,
        -0.0845, -0.3008, -0.0251, -0.0553, -0.0093, -0.0161,  0.0126, -0.0141,
        -0.0265, -0.0177,  0.0073, -0.0196,  0.0277,  0.0308, -0.0307, -0.0258,
        -0.0282,  0.0275,  0.0202, -0.0126, -0.1650, -0.0691, -0.0048, -0.0196,
         0.0059, -0.0198, -0.0335,  0.0341, -0.0246, -0.0236,  0.0035,  0.0025,
        -0.0020, -0.0176, -0.0347, -0.0266, -0.0028,  0.1843, -0.3895,  0.0416,
        -0.2333, -0.2362, -0.0721, -0.0719, -0.0986,  0.0897, -0.0854, -0.0067,
        -0.0761,  0.0129, -0.0102, -0.0142,  0.0453], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0369,  0.0022,  0.0814, -0.1033, -0.4597,  0.1183, -0.2255,  0.0465,
         0.0047,  0.0250, -0.0389, -0.3602,  0.0453, -0.3248, -0.6280,  0.0696,
         0.0659, -0.0804, -0.1486, -0.0364, -0.3310, -0.2946, -0.0568, -0.0403,
        -0.2457, -0.0874,  0.0016, -0.1635, -0.2207, -0.1681, -0.0310, -0.1089,
        -0.0302,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4090,  0.4333,  1.3174,  0.4584,  0.2389, -0.0233,  0.3568,  0.0473,
        -0.0235,  0.1956, -0.0158, -0.1040,  0.0318,  0.1051, -0.0034, -0.2053,
         0.0495,  0.0960,  1.1124,  0.0624,  0.0615, -0.0525, -0.2361, -0.1099,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1368, -0.3874, -0.2667, -0.1554, -0.0957, -0.4479,  0.0838, -0.3164,
        -0.0920, -0.4132, -0.4166, -0.3277, -0.6558, -0.1770,  0.0890, -0.0124,
         0.0820,  0.0032,  0.0665, -0.0459, -0.0746, -0.1026, -0.0912, -0.1571,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5352,  0.0665,  0.4353,  0.6519,  0.0092,  0.0747,  0.1002,  0.4161,
         0.1195,  0.0118,  0.0727,  0.0805, -0.1582, -0.0269,  0.5816,  0.8192,
         0.6195,  0.0052, -0.0550, -0.0409,  0.5131,  0.0812,  0.2038,  0.4944,
        -0.0419, -0.1005,  0.4598,  0.0616,  0.0670,  0.0853,  0.1277,  0.1431,
        -0.0439, -0.1523,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0674, -0.0016, -0.1739, -0.0267, -0.9006,  0.3098, -0.0090, -0.1389,
        -0.2327, -0.0351,  0.0192,  0.0413, -0.1477, -1.1174, -0.0301, -0.2298,
        -0.8675,  0.0414, -0.4774, -0.0640, -0.3921, -0.4517,  0.1727, -0.0326,
         0.0404,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3161, -0.1000, -0.0223,  0.0429,  0.2126, -0.1689,  0.9158,  0.3735,
         0.4891,  1.9199, -0.1687,  0.2854,  0.0973,  0.0142, -0.2963,  0.3914,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2176, -2.4712, -0.1516, -0.7517, -0.3387, -0.4894, -1.2254, -0.2057,
         0.3769, -0.0664,  0.0143, -0.0974, -0.1847,  0.2220,  0.3664, -0.0236,
        -0.1543, -0.3490, -0.1086, -0.0884,  0.0876, -0.3995,  0.0292,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2805, -0.0072,  0.0113,  0.8236,  1.9257,  0.0850, -0.3865,  0.0425,
         0.1238, -0.1248,  0.1125, -0.0639, -0.3147, -0.0041,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.3621, -0.5868, -0.4494, -0.7783, -0.1130,  0.4315, -0.2798,  0.1414,
        -0.1276, -0.2858, -0.5471, -0.3679, -0.0831, -0.0989, -0.0397,  0.0750,
         0.0764, -0.4232, -0.0794, -0.2118, -0.1620, -0.0613,  0.1016,  0.0863,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3392e-02,  1.4885e-02, -6.4091e-01, -3.8233e-04, -4.9590e-01,
        -3.7539e-02, -1.5291e+00, -1.0644e-02,  1.6852e-01,  6.6373e-02,
         6.4919e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2025,  0.0544,  0.0617,  0.0491,  0.1595, -0.2834, -0.8011, -0.0119,
        -0.4792, -0.0589, -0.1482,  0.0532, -0.1228,  0.0027, -0.3056, -0.0292,
        -0.0587,  0.0502,  0.0053, -0.0763,  0.0391,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0507, -0.2226,  0.0867,  0.0570,  0.5073,  0.1510, -0.2904, -1.0742,
        -0.7707, -1.3842,  0.1350,  0.3851,  0.0700, -0.4622,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1482,  0.0600,  0.0510, -0.0382,  0.0528,  0.0969, -0.0436, -0.0094,
        -0.0421, -0.0116, -0.0343,  0.1592,  1.2807,  0.0664,  0.0053,  0.1613,
         0.0643, -0.0245,  0.1111, -0.0195,  0.4965,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2505,  2.4077, -0.1406,  0.7059, -0.4665,  0.6592,  0.1241,  1.5628,
         0.2341,  0.3444,  0.1529, -0.3689, -0.4603,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1848, -0.5992, -0.0282, -0.0131, -0.0357, -0.0856, -0.1897, -0.5941,
        -0.2663, -0.0677, -0.0116,  0.0512,  0.0527,  0.0621,  0.0270, -0.0759,
         0.0084, -0.0084, -0.0056,  0.0864, -0.0483,  0.1249,  0.0825, -0.1289,
        -1.1092, -0.1271, -0.6594, -0.4195, -0.3168, -0.0927, -0.0574,  0.0477,
        -0.0147,  0.0098,  0.0400,  0.0743, -0.1765,  0.0022,  0.0261, -0.0403,
        -0.1788, -0.0129,  0.0298,  0.0462,  0.3190], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4755, -0.5594, -0.5217, -0.1580, -0.0242,  0.1085,  0.0348, -0.0836,
         0.0238,  0.0373, -0.1659,  0.0734, -0.0242,  0.0239, -0.1011, -0.0995,
        -0.7942, -0.0168, -0.0102,  0.0556, -0.0556, -0.3374, -0.0659, -0.4761,
        -0.0065,  0.0620, -0.1356, -0.1368, -0.6107, -0.0603, -0.0865, -0.1040,
         0.0237, -0.0521,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2115, -0.0550,  0.1391,  0.3477,  0.1050,  0.0368,  0.0158,  0.0558,
        -0.2390,  0.0183,  0.0402,  0.0649, -0.0880,  0.0232, -0.0203, -0.0701,
        -0.0673, -0.1111,  0.1080,  1.5369,  1.8933,  0.0565, -0.0581,  0.1145,
        -0.1417,  0.1960,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0105, -2.1394, -0.2497, -0.5190,  0.0816, -0.1532,  0.2297, -0.4652,
        -0.2420,  0.0046, -0.0111,  0.0468,  0.0912,  0.0663, -0.2330, -0.8231,
        -0.9635, -0.1133, -0.2889, -0.0652, -0.3945,  0.0240,  0.1700,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1560, -0.9974, -0.2354, -0.0373,  0.0322,  0.0187, -0.0973, -0.1447,
        -0.3748, -0.1243,  0.0647,  0.0448,  0.0640,  0.0040, -0.8330,  0.2128,
        -0.0668, -0.0850, -0.2593, -0.1710, -0.0208, -0.0114,  0.0067,  0.0816,
         0.0336,  0.0240, -0.3383, -0.6327,  0.0838, -0.0557, -0.1468,  0.0296,
        -0.0090,  0.0554, -0.0397, -0.0309, -0.2581,  0.0374, -0.1660,  0.5230,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0544, -0.0367,  0.0444, -0.0543, -0.2712,  0.0182, -0.1040, -0.3291,
         0.0298, -0.0961, -0.0093, -0.0714, -0.3117, -0.0876, -0.2268, -0.4743,
        -0.1883, -0.0554,  0.0055, -0.2250, -0.0873, -0.3146, -0.1420, -0.0162,
        -0.1590, -0.0059, -0.0747, -0.2091, -0.0092, -0.1202, -0.2222, -0.0393,
        -0.1281,  0.1860,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.1743, -0.4073, -0.1438, -0.0689, -0.5271,  0.0130,  0.0463, -0.0225,
        -0.0112,  0.0487, -0.0151, -0.0193, -0.0800, -0.0709, -0.5282,  0.0159,
        -0.0182, -0.0247, -0.2176,  0.0205, -0.0520,  0.0314, -0.0666,  0.0845,
        -0.1933, -0.3802,  0.1180,  0.0206, -0.0368, -0.0160, -0.0197,  0.0052,
         0.0173, -0.0526, -0.1694,  0.0045, -0.0909, -0.1766, -0.0571, -0.0819,
        -0.1021,  0.0074, -0.1496, -0.0470, -0.0021, -0.0180, -0.0164,  0.0216,
        -0.0388, -0.0090, -0.0370, -0.2857], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0406, -0.0224,  0.0188,  0.0957,  0.7602,  0.0747, -0.2347, -0.3025,
        -0.0856,  0.1091,  0.0099, -0.0212,  0.0987,  1.1621,  0.1637,  0.3126,
         0.8371,  0.4799,  0.3480, -0.1903,  0.1667,  0.1083,  0.0573,  0.9049,
         0.0276, -0.0834, -0.0082,  0.0460, -0.0259,  0.0570, -0.0447,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2379, -0.5810,  0.0085, -0.0383,  0.0522,  0.0707, -0.0908, -0.0128,
        -0.0789,  0.1612,  0.0217, -0.2567, -0.0115, -0.1548, -0.5824, -0.4524,
        -0.1401, -0.4582, -0.4460, -0.6086, -0.1638, -0.0339, -0.2042,  0.0299,
         0.4126,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2803, -0.0299, -0.0901,  0.1325,  0.0273, -0.2605, -1.8762,  0.0056,
        -0.3101, -0.0670,  0.1097, -0.0473,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2330, -0.8593, -0.0432, -0.3782, -0.4647, -0.0515,  0.0913,  0.0118,
        -0.0324, -0.0997,  0.0552, -0.0886, -0.2582, -0.5515, -0.0677, -0.3325,
        -0.1089, -0.0560, -0.0638,  0.0498, -0.2054, -0.0422, -0.0445,  0.0105,
        -0.1072, -0.4756, -0.0729,  0.0440, -0.1077,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0819,  3.1770,  0.2941, -0.2014,  0.0046,  0.4674,  0.7847, -0.6184,
        -0.2277, -0.0828,  0.0773,  0.2093,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7709e-02, -2.6677e+00, -1.0241e-01, -8.5261e-01, -1.4210e-03,
        -4.2271e-01, -1.0830e+00,  1.9678e-02, -3.8764e-01, -7.0804e-03,
        -1.1464e-01,  2.9830e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1585,  0.1142, -0.6067,  0.0746, -0.6578, -0.1213, -0.2221, -0.3191,
        -1.0371, -0.2373,  0.0739, -0.7920, -0.1016, -0.2725, -0.0980, -0.5831,
        -0.0809, -0.0814,  0.1740, -0.0365,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0215e-02,  1.7056e+00,  8.1133e-02,  3.5986e-01,  9.2299e-01,
         7.1414e-02,  2.1109e-01, -1.1396e-01,  1.8930e-01,  8.2849e-02,
         8.7093e-02,  1.2991e-01,  1.0233e-01,  8.4075e-02, -3.9565e-02,
         1.4058e-02,  1.0109e-01, -7.6664e-02,  2.4109e-02,  2.2271e-01,
         3.0917e-01, -2.2618e-01, -4.5628e-02,  1.1748e-01,  6.1589e-02,
        -5.9688e-02,  2.9990e-02,  3.0730e-01,  1.9834e-01, -2.1026e-02,
         1.7582e-01,  2.1686e-04,  2.0743e-01,  5.2828e-01,  5.0843e-02,
         1.7401e-01, -2.9573e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5342,  2.9505,  0.1051,  0.1010,  0.0877, -0.0597, -0.2753, -0.1433,
        -0.2015,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0059, -0.0635, -0.1144, -0.6376, -0.0855, -0.0143, -0.2521, -0.4635,
         0.1200, -0.0635, -0.1220, -0.0271, -0.4417, -0.8101, -0.0151, -0.1168,
         0.0417,  0.0078, -0.3870, -0.2420,  0.2158,  0.0613, -0.0249,  0.0115,
        -0.0491,  0.1909, -0.1749,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4776,  2.0684, -0.0874,  0.4046, -0.0238,  0.0337,  0.0212,  0.0129,
         0.0671, -0.1240, -0.0153,  0.5910,  0.2294, -0.1110,  0.0134,  0.0629,
         0.6357,  0.0960, -0.1346, -0.0675, -0.0675,  0.6242,  0.5653, -0.1759,
         0.0665, -0.1423,  0.0609,  0.3024,  0.7232, -0.0210, -0.0209,  0.2011,
         0.2727, -0.0698,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.0724, -0.0118, -0.0205,  0.0109, -0.2274, -0.0475,  0.0282,  0.0447,
         0.0759,  0.2338, -0.1074, -0.5003, -0.0809, -0.0720,  0.0328, -0.1879,
        -0.4609,  0.0331, -0.1424, -0.0618,  0.0313, -0.0647, -0.1285, -0.0326,
         0.1315,  0.0601, -0.0092, -0.0228, -0.0135, -0.0351,  0.1091, -0.3691,
        -0.5287,  0.0307, -0.1050, -0.2405, -0.2716,  0.0095,  0.0313,  0.0212,
        -0.1227, -0.0931, -0.0514,  0.0624, -0.0426,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2205, -0.0883, -0.1797, -0.1037, -0.1476, -0.6145, -1.1498, -0.0218,
         0.0758, -0.6682,  0.0045, -0.1571, -0.5586, -0.0681, -0.1364,  0.1360,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5160, -0.1370,  0.1494,  0.0266, -0.0806, -0.0583, -0.0771,  0.1460,
         1.1843,  0.8578,  0.4287,  0.8386,  0.0944,  0.2421,  0.1749, -0.0366,
        -0.0262,  0.0289, -0.1277,  0.0412, -0.0171, -0.1403, -0.0124, -0.0371,
         0.1269, -0.1333, -0.0547,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1137, -0.1122, -0.1696, -0.0028, -0.1442, -0.0025, -0.0563, -0.0360,
        -0.0806,  0.0052, -0.0208, -0.0725, -0.2161, -0.0216, -0.1191, -0.1009,
        -0.0996,  0.0278,  0.0249, -0.0778,  0.0365, -0.0473, -0.0639, -0.2134,
        -0.0523, -0.0322, -0.0871, -0.1846,  0.0298, -0.1703,  0.0370, -0.1586,
        -0.1464, -0.0478,  0.0189, -0.1217, -0.0748, -0.0984,  0.0296,  0.0321,
        -0.0683, -0.1505, -0.0102, -0.0060, -0.1021, -0.0324], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1517, -0.0479, -0.1533, -0.2983,  0.2824,  0.1621,  0.0759,  1.0480,
         0.0380, -0.1478,  0.2563, -0.0328,  0.0155, -0.0301,  0.0577,  0.0585,
         0.0735,  0.1213,  0.4812,  0.5894,  0.2434,  0.0687,  0.0429,  0.0451,
         0.0334, -0.0330,  0.4387, -0.0422,  0.0769, -0.0721,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8368,  0.1849,  0.1407, -0.1409, -0.3413,  0.4468,  0.3162,  1.9411,
         0.2046, -0.3376,  0.2338,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3163, -2.2327,  0.3132, -0.1479, -0.3076, -0.1308, -0.1905, -0.6738,
         0.0799, -0.0800, -0.0440, -0.0630, -0.2323, -0.1230,  0.0482, -0.3747,
        -0.0479, -0.1302, -0.0470,  0.0340,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3536, -1.8719, -1.0985, -0.5703,  0.1426, -0.2482, -0.4594,  0.2874,
        -0.0724,  0.0205,  0.0340,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0844,  1.8056,  1.3523,  1.0967, -0.0541,  0.0048,  0.1085,  0.3475,
         0.4587, -0.0435, -0.1059,  0.1636,  0.0231,  0.0128, -0.0244,  0.1562,
         0.0363,  0.0936,  0.1715, -0.0613, -0.5016,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1617, -2.2652, -0.6418,  0.1703, -0.0764, -1.0834,  0.2166,  0.1527,
         0.1011,  0.1237,  0.0322, -0.0435, -0.0379,  0.0504, -0.0059,  0.1123,
        -0.0112,  0.2518,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1121, -0.4923, -0.9343, -0.1665, -0.0356,  0.0319,  0.1510,  0.0077,
        -0.0099, -0.0286, -0.0044,  0.0663, -0.1332, -0.2538, -0.3513, -0.0254,
        -0.1450, -0.2327, -0.1693, -0.0187, -0.0636, -0.2093, -0.5077, -0.0563,
        -0.1794, -0.0677, -0.2029, -0.0847, -0.0393, -0.3108,  0.0525, -0.0193,
         0.0051,  0.0077, -0.1213,  0.0021,  0.0781,  0.1076,  0.1137,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2483e-02, -2.0713e+00,  8.5854e-02, -3.0784e-01,  3.0218e-02,
         1.1878e-02,  8.4660e-02,  7.4175e-02, -1.6151e-01, -1.0445e-01,
        -1.7257e-01, -1.2464e-01,  3.7567e-02,  3.0918e-02, -1.0151e-01,
        -4.1791e-02, -4.3667e-01, -4.5738e-02, -3.0662e-02, -8.7985e-02,
        -3.0055e-03,  1.5717e-02,  1.5511e-02,  1.7186e-02, -1.1338e-02,
         1.7294e-03, -1.3261e-01, -1.0153e-02, -2.6449e-01, -4.4119e-01,
        -1.8484e-02, -5.5123e-02, -1.3641e-01,  1.4786e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.0792,  0.2194,  1.0974,  1.0251,  0.4235,  0.3972,  0.1184,  0.1683,
         0.2572,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2092,  1.2302,  0.2371,  0.5448,  0.2374,  0.5525,  0.0193,  0.1548,
         0.0252,  0.0457,  0.1977,  0.0724, -0.0087,  0.1117,  0.0684,  0.3426,
         0.0939,  0.6317, -0.0265, -0.0411,  0.0789, -0.0169,  0.0312,  0.0388,
         0.1382,  0.0189,  0.0599, -0.0367,  0.0230,  0.0207,  0.0778,  0.3465,
        -0.0164,  0.2265,  0.0294,  0.0602,  0.0061,  0.0292,  0.0323, -0.0490,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0829, -0.3238, -1.4269,  0.0187, -0.3559, -0.1046, -0.0216, -0.0789,
        -0.4188, -0.3528, -0.0195,  0.0063,  0.0212,  0.0290, -0.0097,  0.0212,
         0.0641, -0.0448, -0.3214,  0.0341, -0.1663, -0.0722, -0.0084, -0.0619,
         0.0705, -0.1407, -0.1040,  0.0014, -0.4237, -0.3756,  0.0375, -0.1747,
        -0.0292,  0.0312,  0.0457,  0.0659, -0.2860, -0.3966, -0.0127, -0.0386,
         0.0805,  0.0068,  0.0041, -0.0057,  0.0322,  0.0087, -0.0384,  0.0090,
         0.0291,  0.0288, -0.0929,  0.2580,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0148,  1.6157, -0.0752,  0.3089,  0.0295,  0.1538,  0.3200,  0.1540,
         0.1684, -0.0354, -0.0707,  0.1770,  0.5211,  0.0984, -0.0121,  0.3522,
         0.2297, -0.0725,  0.2479, -0.1312,  0.0150, -0.1872,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0683,  2.8663,  0.0852,  0.0350, -0.1139,  0.0125, -0.0330,  0.1560,
         0.0628, -0.0916, -0.0733,  0.0933, -0.0249,  0.0597, -0.0965,  0.2130,
         0.0254, -0.0068, -0.0160, -0.0695, -0.0310,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0907, -1.2151, -0.2360, -0.1372, -0.0488,  0.0355, -0.1190,  0.1227,
        -0.0865,  0.0138,  0.0342,  0.0209,  0.0172, -0.0745,  0.0427,  0.0271,
         0.0126,  0.0064, -0.1941,  0.0224,  0.0167,  0.0344, -0.0115,  0.0307,
        -0.0124, -0.0353, -0.0451, -0.3090,  0.0595, -0.4584, -0.8189, -0.7608,
        -0.0547, -0.3198, -0.2700, -0.0472,  0.0928,  0.1540,  0.0591,  0.0390,
        -0.5959, -0.0187,  0.0321, -0.0101, -0.1877,  0.0800, -0.3120, -0.1518,
        -0.0240,  0.1008, -0.0636, -0.0273,  0.0052,  0.0084,  0.0267],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1021, -0.0798,  0.0579, -0.0908, -0.0396, -0.2256, -0.5825,  0.0276,
        -0.0239,  0.0811, -0.1059,  0.0792, -0.0215, -0.1136, -0.3049, -0.1116,
         0.0107, -0.1255, -0.0562, -0.2677, -0.0498, -0.0439, -0.1401,  0.0293,
         0.0091,  0.0229, -0.0141, -0.5898,  0.0688,  0.0058, -0.0403, -0.0210,
        -0.0663, -0.3239,  0.0098, -0.2808, -0.0221, -0.1017, -0.2523, -0.1866,
         0.0150, -0.0779,  0.0103, -0.0538,  0.0288,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2833,  1.3238,  0.4641,  0.4487,  0.0662,  0.1408,  0.1719,  0.1546,
         0.2378, -0.0231, -0.1773,  0.2577,  0.0135,  0.3127,  0.2903, -0.0665,
         0.1432,  0.2842, -0.0062,  0.2310,  0.4979, -0.1638,  0.0042, -0.0433,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2329,  0.1488,  0.0334,  0.0409,  0.0062, -0.0121, -0.0186, -0.1581,
         0.1487, -0.0760, -0.0245,  0.0219, -0.3445, -0.2872,  0.0049,  0.1071,
        -0.4181, -0.1517, -0.1899, -0.2929,  0.0299, -0.1198, -0.2288, -0.2213,
         0.0421, -0.1391, -0.3122, -0.1498, -0.1375, -0.2917, -0.0097,  0.0089,
        -0.0255,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5413,  2.5074,  0.0959,  0.4061,  0.1416,  0.1158,  0.0350, -0.2094,
         1.0466, -0.1781,  0.0381,  0.0726, -0.2581,  0.3174, -0.1847,  0.1224,
         0.0610, -0.1370,  0.1141,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3862,  0.1667, -0.2655,  0.3242,  0.7045, -0.1245,  0.4625,  0.4379,
         0.5062, -0.0441,  0.0417,  0.0308, -0.0563, -0.1285, -0.0433, -0.0243,
         0.0380,  0.4809,  0.1862,  0.0989,  0.3319,  0.9718, -0.0872, -0.1072,
        -0.2412,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6537e-01, -1.7909e+00, -5.4346e-01,  9.4521e-03,  1.4361e-03,
        -7.3267e-01, -9.9388e-01,  1.8353e-01, -7.4515e-02, -1.1808e-01,
         6.4323e-02,  4.7392e-02, -2.7293e-01, -1.3269e-01, -2.3349e-01,
        -1.9924e-01,  5.9319e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.3978e-01, -4.5873e-02,  1.0813e-01,  7.9653e-04,  3.5399e-02,
         3.1853e-02,  2.8381e-02, -1.5983e-01,  1.9516e-01,  2.6961e-01,
         1.3411e-01,  8.0226e-02, -1.6953e-01, -1.1111e-02, -1.1166e-02,
         2.1004e-02,  4.2887e-01,  8.6203e-01,  6.0624e-04,  2.4005e-01,
         7.1262e-02,  5.0134e-01,  7.0367e-01, -1.3275e-02, -6.5665e-02,
         9.3646e-02, -1.0903e-01, -4.7059e-02,  3.0433e-04, -1.7324e-02,
         7.2203e-02, -1.3973e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2363,  0.2803,  0.6588,  0.0405, -0.0131,  0.0053,  0.3103,  0.4435,
         0.0596, -0.0470, -0.0272, -0.0290,  0.0330, -0.0179,  0.0537, -0.0928,
         0.0408,  0.0992,  0.2042,  0.0259,  0.2013,  0.0442,  0.3470,  0.3381,
        -0.0791,  0.1426, -0.0501,  0.0360,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2161,  0.2354, -0.0582,  0.0852,  0.0064,  0.2345,  0.6080,  0.0368,
         0.0271,  0.0700,  0.0411,  0.3258,  0.0501,  0.0682, -0.0032,  0.2466,
         0.0545,  0.2889,  0.0360, -0.0773,  0.1268, -0.0034,  0.1069,  0.0473,
         0.2030, -0.0213,  0.0442, -0.0570,  0.2617,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0321,  0.4384,  0.1370,  0.3625,  0.8962,  0.1206,  0.1137, -0.0116,
         0.0803, -0.0257, -0.0454,  0.0526,  0.1783,  0.8387, -0.0223,  0.3304,
        -0.0601,  0.6066,  0.1360,  0.2789,  0.1259,  0.0520,  0.2539,  0.0654,
         0.1815,  0.2162,  0.2285,  0.1186,  0.0393, -0.0241,  0.0480, -0.0537,
         0.4168,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2938e-02,  1.3274e+00, -1.0451e-01,  3.7260e-01, -1.1082e-01,
         9.8632e-02, -5.7769e-02,  1.4069e-01, -8.8912e-05,  2.5642e-01,
         1.8313e-01,  2.1650e-02,  6.5626e-02, -6.7474e-02,  3.4282e-02,
        -1.2180e-02,  1.6921e-01, -7.2869e-03,  6.7866e-01,  8.5355e-02,
         2.4128e-01, -1.1721e-01, -1.2629e-01,  2.6575e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1483e-01, -1.4006e+00, -7.8137e-01, -3.9878e-02, -2.5290e-01,
        -3.8814e-02, -7.3036e-02, -3.4672e-01,  1.1948e-01, -3.8474e-01,
        -1.5393e-01, -1.5341e-01, -2.1880e-01, -2.7734e-02,  8.9414e-02,
        -9.5411e-04,  1.1327e-01, -2.8684e-01, -2.4773e-02, -9.0163e-01,
        -1.9563e-01, -5.4053e-02, -2.0638e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1953, -1.4481, -0.1412, -0.0911, -0.1340, -0.2771,  0.1416,  0.0610,
        -0.0502, -0.0627,  0.0381, -0.1546, -0.0615,  0.0857, -0.0955, -0.1712,
        -0.4333, -0.1256, -0.1175, -0.0159,  0.0056, -0.0447, -0.0953,  0.0749,
        -0.2914, -0.1048, -0.0477, -0.0023, -0.0591, -0.3418, -0.2807,  0.0153,
        -0.2100, -0.1041, -0.2138, -0.3250, -0.1613, -0.1696, -0.0103,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2822,  0.4503,  0.1738,  0.1631,  0.7590,  0.0754, -0.0306, -0.0708,
        -0.0480, -0.0255,  0.0263, -0.0092,  0.0356,  0.0265,  0.0597,  0.5740,
         0.3500,  0.0342,  0.0615, -0.0917,  0.3333,  0.0337, -0.0174, -0.0117,
         0.4959,  0.8564,  0.0571,  0.0866,  0.0848,  0.3526,  0.0311,  0.1742,
        -0.0116,  0.0660,  0.1153,  0.0210,  0.0161,  0.3329, -0.1158, -0.0037,
         0.0709,  0.0461,  0.0875,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0486e-01,  2.3998e+00,  8.6276e-02, -3.8914e-04, -1.3021e-01,
        -1.5945e-02,  1.5919e-01,  9.1600e-04,  7.1782e-01, -3.4987e-02,
         2.2680e-02, -1.0537e-02,  1.0025e-01,  1.0732e-02, -5.1570e-02,
        -2.2159e-02,  2.0363e-01,  4.6402e-01,  7.3022e-02,  1.8651e-02,
        -5.1026e-02, -1.5137e-02,  1.2446e-01,  4.7364e-02, -3.3385e-02,
         1.8642e-02, -5.7844e-02,  2.0254e-02, -6.3186e-02, -6.9831e-02,
        -4.5373e-02,  1.7997e-01,  5.6732e-01, -1.6620e-01, -4.9904e-02,
         1.6283e-02, -5.6001e-02,  1.6076e-01,  2.5070e-01,  9.4320e-02,
         2.3319e-01,  3.6415e-01,  4.8579e-02, -3.6392e-02,  2.0147e-02,
        -1.0698e-02,  2.8071e-02, -3.2716e-02, -2.9445e-02, -2.5393e-02,
         3.2437e-03, -1.0324e-03, -3.4227e-02,  2.1067e-02,  1.8033e-02,
         3.6620e-02,  8.5818e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0793,  3.0334,  0.2595,  0.0605,  0.2884,  0.0606,  0.3911,  0.6246,
         0.2197,  0.2710,  0.6220, -0.2243, -0.2948,  0.1147,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3698, -0.9928, -0.3395,  0.0676,  0.0265, -0.2117, -0.0259, -0.0166,
        -0.2962, -0.0207, -0.2633, -0.6904,  0.0746, -0.0628, -0.2384, -0.0136,
         0.0131, -0.1704, -0.1002, -0.3491, -0.8800, -0.0647, -0.2452, -0.0332,
         0.0986,  0.1664, -0.0272,  0.1036, -0.2838,  0.1539,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5718, -1.0575,  0.1662, -1.4118, -0.5502,  0.4988, -0.3592,  0.3809,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.4729, -0.5368, -0.1079,  0.0166, -0.1025, -0.5225, -0.0394, -0.0119,
         0.0263, -0.0835,  0.0314,  0.0159, -0.1335, -0.2256, -0.0489, -0.0192,
         0.0576,  0.0361, -0.0112, -0.0187,  0.1110, -0.0367, -0.0639, -0.1691,
        -0.3229, -0.0359, -0.2505, -0.0501, -0.3281, -0.6551,  0.1163,  0.0032,
        -0.2777,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0891, -1.4265, -0.5237, -0.0526, -0.0636, -0.0504, -0.0442, -0.0897,
         0.0061, -0.1060,  0.0285, -0.0313, -0.1170, -0.0304,  0.0132, -0.0369,
         0.0051, -0.0022, -0.1295,  0.0373,  0.0083,  0.0053, -0.0522,  0.0215,
         0.0082, -0.0335, -0.0574, -0.2049,  0.0230, -0.0019, -0.0465, -0.0650,
        -0.2376, -0.3152,  0.1497, -0.1041, -0.1250,  0.0109,  0.0366, -0.0101,
        -0.0089, -0.1044, -0.0390, -0.2379, -0.0123,  0.0362, -0.0040, -0.0267,
        -0.0525, -0.0234,  0.0021, -0.1219, -0.0926, -0.1784, -0.0411,  0.0900,
         0.0089, -0.0401,  0.0225, -0.1966, -0.0692, -0.0709, -0.5033, -0.0021,
        -0.0016, -0.0653, -0.0148,  0.0499, -0.0643,  0.1389, -0.1188],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2703, -0.0460, -0.1590, -0.0287, -0.5052, -0.1267, -1.1336, -0.0707,
         0.0177, -0.0201, -0.0103,  0.0443,  0.0469,  0.0592,  0.1054,  0.0137,
         0.0112,  0.0142, -0.0154, -0.5214, -0.0143, -0.0038,  0.1248, -0.2978,
        -0.0068, -0.1920, -0.1672,  0.0668, -0.1413, -0.0850, -0.0409, -0.2684,
        -0.5182, -0.4838, -0.0888, -0.0545, -0.3377, -0.1114, -0.0922,  0.0462,
        -0.0788,  0.0604,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2024,  1.1002,  0.0127, -0.0223,  0.0265, -0.0441, -0.0692, -0.0100,
         0.0684,  0.3935,  0.1035,  0.3410,  0.5680, -0.0740,  0.0425, -0.0078,
         0.0798,  0.3210,  0.6115,  0.0624,  0.0761,  0.2000, -0.0779, -0.0083,
        -0.1171,  0.4675,  0.2221,  0.0291,  0.6318,  0.1228,  0.0424, -0.0041,
        -0.0283, -0.0013,  0.0903, -0.1238,  0.1045,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3553, -2.7751, -0.2827, -0.6274, -0.0991, -0.2126, -0.0191,  0.0572,
        -0.0815, -0.1752, -0.0432,  0.0984, -0.2652, -0.0399,  0.1862,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0758,  0.3642, -0.0861,  0.2679,  0.9765,  0.5953, -0.0106,  0.5074,
         1.2381,  0.2006, -0.0362, -0.0575,  0.0298, -0.0346, -0.2972, -0.0284,
        -0.0584, -0.5174, -0.2379, -0.1725,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9123,  0.1279, -0.0956, -0.2608, -0.7565,  0.1421, -0.1619,  0.3904,
         0.4571,  0.3305, -0.2198, -0.3052,  0.2309,  0.3110, -0.1342, -0.1921,
        -0.2810, -0.1993,  0.1629, -0.1071, -0.0011,  0.1540,  0.0746, -0.0272,
        -0.0121, -0.2851, -0.4873,  0.0377,  0.1354, -0.3221, -0.2816,  0.3878,
         0.1210,  0.2099, -0.0452, -0.2412,  0.1082, -0.2824,  0.3775, -0.1714,
        -0.2016, -0.9666, -0.8457,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9459e-01, -1.5265e-02, -5.1785e-02, -5.8695e-02, -2.8848e-01,
        -1.8561e-01, -2.7315e-01, -9.6374e-01, -3.3045e-02,  1.1438e-02,
         2.6058e-02, -1.1749e-01, -7.7742e-02, -3.8588e-01, -6.0090e-01,
         7.6276e-03,  2.3397e-02, -1.9690e-03, -5.8278e-02,  1.4305e-01,
        -2.9053e-01, -1.8158e-02,  2.5038e-01,  3.7659e-02,  1.2206e-01,
         7.5771e-02,  5.0636e-02, -1.5623e-02, -5.0224e-01, -1.6936e-02,
        -4.4991e-02, -2.2979e-01, -6.4565e-02, -2.9258e-01, -4.7453e-01,
        -1.8465e-02, -4.5743e-04, -4.0028e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2021, -2.3733, -0.8049, -0.9221,  0.1637, -0.3015,  0.2152,  0.0161,
        -0.0882, -0.4896, -0.0545, -0.0686, -0.1687,  0.0403,  0.0651,  0.0693,
         0.1169, -0.2807, -0.0293,  0.1894,  0.1017,  0.1604,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4030, -0.9369, -0.0476,  0.0504,  0.1699, -0.0714, -0.2798,  0.2031,
        -0.2174, -0.7354,  0.1114,  0.0807, -0.0428,  0.0517, -0.0998, -0.3573,
        -0.4644, -0.0391, -0.2125, -0.3497, -0.0318, -0.1697, -0.5562, -0.2149,
        -0.2753,  0.0428, -0.0242,  0.0775, -0.0423, -0.6031, -0.0508, -0.4557,
        -0.1013, -0.0169, -0.3319, -0.3438,  0.1400,  0.0079, -0.1481, -0.6486,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2264, -0.1112,  0.0082, -0.1375,  0.0149, -0.2303, -0.0131,  0.0194,
        -0.1901, -0.0698,  0.0109, -0.0551, -0.0638, -0.4736, -0.3477,  0.0132,
         0.0873, -0.0378, -0.2243, -0.0586,  0.0568,  0.0104,  0.0286,  0.0450,
        -0.0366, -0.5946, -0.1074, -0.4206, -0.4730, -0.0707,  0.0137, -0.1692,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2275e-01, -2.0991e+00, -1.4065e-01,  1.3374e-01, -1.0686e-01,
         1.2297e-01, -7.9995e-03, -2.6167e-01, -7.1857e-01, -8.2708e-02,
        -1.6306e-02, -2.9084e-01,  3.5795e-02, -7.8555e-02, -9.1203e-02,
         1.0999e-02, -2.8643e-01, -3.4637e-02, -3.1069e-04,  1.2751e-01,
         2.8044e-02, -9.9885e-02, -2.1829e-02, -5.0421e-01, -4.5213e-02,
        -8.0009e-02, -3.0507e-01,  2.6657e-02,  1.2813e-02, -1.3388e-01,
        -3.8749e-02,  9.7004e-03, -1.6882e-02, -1.8641e-01,  2.8433e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.3224,  0.2355,  0.7274,  0.6475,  0.2435, -0.2843,  0.9633,  0.7223,
         0.0660,  0.6414, -0.2508,  0.2761, -0.1290,  0.2959,  0.0862,  0.1602,
         0.0820, -0.0346,  0.0057, -0.2517,  0.2388,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6256, -2.7699,  0.0601, -0.5094, -0.8757, -0.4530, -0.1134, -0.4249,
        -0.5180,  0.0750, -0.0773, -0.0155,  0.0341,  0.2099, -0.2990,  0.1298,
        -0.1546, -0.0242,  0.2606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0869, -0.3330, -0.7880, -0.0894, -0.1206, -0.0207,  0.0391, -0.0249,
        -0.0218, -0.3409, -0.7641, -0.0856, -0.2015, -0.7637, -0.1978, -0.4712,
         0.0732, -0.3996,  0.0697, -0.2107,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0332, -1.9216, -0.0705, -0.5215,  0.1436, -0.0786, -0.1530, -0.2920,
        -0.0428,  0.0065,  0.0057, -0.0922,  0.0314, -0.2138, -0.0064, -0.2902,
        -0.4434, -0.0417, -0.2430, -0.1132,  0.1116, -0.0728,  0.0240,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3663, -1.2424, -0.4610, -0.1659,  0.1179, -0.3586, -0.0554, -0.3646,
        -0.2067, -0.8814, -0.2480, -0.4422, -0.1597, -0.4249,  0.0858, -0.0015,
         0.0694, -0.0651, -0.0515, -0.1185, -0.3136, -0.0805, -0.0859,  0.1959,
         0.0886,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1383e-01,  2.0136e+00,  2.4259e-01,  2.5887e-01,  4.3543e-02,
        -2.5416e-02, -9.4107e-02, -1.4326e-03, -3.4909e-01, -2.2242e-01,
         2.9114e-01, -5.6646e-04, -2.7877e-03, -3.9567e-02,  3.1862e-02,
         2.0780e-02, -2.5068e-02,  1.2426e-02, -1.0795e-01,  7.4633e-02,
         3.3664e-01,  5.5484e-01,  1.3379e-01,  3.5390e-01,  1.4084e-01,
         1.1491e-01,  1.4176e-02, -2.2365e-02,  5.0811e-02,  1.1858e-01,
         3.0892e-02,  7.5328e-03,  1.4469e-01,  1.0187e-01,  7.4927e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0109,  0.0946, -0.1379, -0.4603, -0.0704, -0.1609, -0.5729, -0.0857,
        -0.5144, -0.0410, -0.1380, -0.1737,  0.1975, -0.4457, -0.4409, -0.8197,
         0.0227, -0.6504, -0.1142,  0.0670,  0.0942, -0.1537,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2060,  0.1059,  0.6023, -0.3930, -0.2337, -0.2623, -0.0738, -0.2508,
        -0.5230,  0.5760,  2.4930,  2.0105, -2.5071, -0.7272, -0.5315, -0.4334,
         0.4997,  0.4062,  0.4183,  1.0909, -0.3560,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0028, -2.5454, -0.2413, -0.0296,  0.2086, -0.0284, -0.6493, -0.9978,
        -0.0263, -0.0391,  0.0109,  0.0360,  0.0269, -0.4048,  0.0385, -0.5054,
         0.1685,  0.2256, -0.1338,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2478, -0.0887, -0.0720,  0.0814,  0.0678, -0.3892,  0.0287, -0.0160,
         0.0821, -0.0020, -0.0514, -0.0658, -0.0341, -0.1552, -0.0129, -0.2010,
        -0.3668, -0.3043,  0.0058, -0.2366, -0.1017,  0.0478,  0.0407,  0.1077,
        -0.2302, -0.0576, -0.0383, -0.0877, -0.1540, -0.4182,  0.0019,  0.0672,
        -0.2822, -0.6991, -0.0182, -0.0396, -0.0202, -0.0110, -0.0299, -0.2048,
        -0.0125, -0.0050,  0.0078, -0.0128, -0.0089,  0.1475, -0.0722],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1212, -2.3170, -0.8490, -0.0148,  0.1310, -0.1039,  0.0728, -0.0262,
        -0.3315, -0.0496,  0.0089,  0.0444,  0.0839, -0.0172,  0.0554, -0.0202,
        -0.4046,  0.0036,  0.0688, -0.0322, -0.0446, -0.1712, -0.9294, -0.6376,
         0.0775, -0.0521,  0.0255,  0.0232,  0.0125,  0.0129,  0.0124,  0.0554,
        -0.1306, -0.1631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0124e-01, -2.4201e+00, -1.4457e-01,  2.7794e-01, -5.3546e-02,
        -3.6171e-01,  8.0365e-02,  1.0838e-02,  1.1880e-02, -7.1135e-02,
         1.6876e-01, -2.1205e-02, -2.7061e-01, -4.7010e-01,  6.4712e-02,
        -7.2481e-02,  4.8372e-02, -3.5387e-02,  1.4205e-01, -4.2762e-01,
        -4.2779e-02, -4.5256e-02,  2.2043e-03, -8.3749e-03,  1.1468e-02,
         3.6550e-03, -2.5249e-03, -1.2901e-01, -7.2561e-02, -2.8464e-01,
        -2.0876e-02,  3.0619e-02, -2.7382e-01,  6.0527e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.3526, -2.4145,  0.2865,  0.1304, -0.2814, -0.7421,  0.0623, -0.0939,
         0.0218, -0.9496, -0.0204, -0.0607, -0.2805, -0.6480, -0.0876, -0.0422,
        -0.0241, -0.0496,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2399, -0.1156,  0.1119, -0.1137, -0.0119, -0.0578, -0.1489, -0.0831,
         0.6848,  1.2874, -0.0292, -0.1651,  0.1174,  0.1449,  0.0547,  0.1282,
         0.2563, -0.0024,  0.1744,  0.9655,  0.0808,  0.1231,  0.1589,  0.0082,
        -0.0417, -0.1700,  0.1495,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0858, -0.1221,  0.0376, -0.0111, -0.0172, -0.0397, -0.0872, -0.2119,
        -1.1633, -0.0089,  0.0555, -0.1795, -0.8418,  0.0600,  0.0513, -0.0287,
        -0.1069, -0.3504, -0.0015, -1.0896,  0.0590,  0.1448, -0.3797,  0.1424,
        -0.1863,  0.1863,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0175, -1.8714,  0.0351, -0.6717, -0.7372, -0.0649, -0.1020, -0.1386,
        -0.3547, -0.2417, -0.2906, -0.2577,  0.0937, -0.1487, -0.2101, -0.4667,
        -0.4090, -0.3498, -0.0614, -0.0095,  0.2757,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2083, -1.9558, -1.0901, -0.6596, -0.0340, -0.2941, -0.7489, -0.0115,
        -0.0390, -0.0095,  0.0040, -0.4223,  0.2303, -0.1818, -0.1426,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3838,  0.1699,  0.0320,  0.1362, -0.0640, -0.1652, -0.0620, -0.3868,
        -0.0357, -0.0939,  0.1396, -0.3328,  1.4688,  0.4571,  0.1396,  0.0872,
        -0.4191,  0.3472,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1837, -2.3669, -0.7973, -0.9365,  0.1254, -0.0718,  0.2431, -0.0936,
         0.1354, -0.3182, -0.5817, -0.2432,  0.0630, -0.5926,  0.0081,  0.1505,
        -0.0334,  0.1594,  0.3334,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0960e-01, -6.6002e-01, -5.3787e-01, -2.1081e-01, -2.3488e-01,
        -1.1706e-01,  1.1531e-01, -5.9701e-02, -4.6351e-02, -2.5657e-04,
         2.2591e-02, -4.1035e-02, -1.1976e-01, -2.3458e-02, -1.4694e-02,
        -1.7170e-02, -2.0370e-01,  4.0447e-02,  6.2921e-02,  3.4714e-02,
        -3.9475e-01, -4.7176e-02, -3.2602e-01, -6.5012e-02, -2.6547e-01,
        -6.5912e-02, -2.9583e-01,  6.8990e-03,  2.7997e-02, -1.8882e-01,
         2.7621e-02, -5.7842e-02, -2.6353e-01, -1.6839e-02, -1.4610e-01,
        -3.6841e-02, -3.2574e-01,  6.9862e-02, -5.2828e-02, -1.9757e-02,
         1.5450e-02,  2.0688e-02, -9.5229e-03, -3.8373e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2551e-02, -1.6886e+00, -1.8154e-01, -4.8638e-01, -2.9472e-01,
        -6.7146e-02, -1.5352e-01, -6.5766e-02, -6.6354e-01,  2.9492e-02,
        -1.9448e-01,  1.2389e-03,  1.3710e-01,  4.1519e-03,  8.1867e-02,
         7.3333e-02, -8.6733e-02, -4.5013e-01,  1.0071e-02, -2.1084e-01,
        -2.5370e-01, -1.1068e-02, -5.7972e-02,  3.6057e-02,  7.0789e-02,
        -1.3036e-02, -6.1533e-02, -8.2398e-01,  4.1960e-02, -1.3885e-01,
         9.0731e-02, -2.1113e-01,  9.8490e-02,  2.3240e-02, -5.5321e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4699e-01, -3.8655e+00, -1.5807e-01, -5.6771e-01, -1.8215e-01,
         1.2769e-01, -8.3778e-01, -7.4580e-01, -2.0153e-01, -6.5061e-02,
        -1.0793e-01, -2.9013e-01, -4.5383e-01, -9.4560e-02, -4.6426e-02,
        -2.0842e-01, -1.0965e-01,  5.7735e-02,  2.1690e-01, -1.9205e-02,
         1.4752e-02,  3.7027e-02,  5.7282e-01, -1.9134e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1921, -2.8223, -0.0610, -0.4997, -0.2129, -0.1181, -0.0758, -0.0692,
        -0.6242,  0.0288, -0.2089,  0.0515,  0.1102, -0.1748, -0.3190,  0.0299,
         0.1553, -0.0229, -0.3549, -0.0127,  0.0589, -0.3770,  0.1897, -0.3448,
         0.0510, -0.3559, -0.1719, -0.1163, -0.0387, -0.1557, -0.1590, -0.0508,
        -0.0719, -0.0189,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0757,  0.0526,  0.0185, -0.2630,  0.0718,  0.0567, -1.1877, -0.3416,
         0.0057, -0.0788,  0.0258, -0.1428, -0.5499, -0.0354, -0.5563, -0.9483,
        -0.1396, -0.0309,  0.0083, -0.0511,  0.0136, -0.0259, -0.0402, -0.3626,
        -0.2095, -0.1312, -0.2379, -0.0434, -0.0715,  0.2184,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.1171, -0.8296,  0.0194, -0.7546, -0.0960, -0.0194, -0.0078, -0.2661,
        -0.6597, -1.5847,  0.1518, -0.0914, -0.3617,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3143, -0.0308,  0.3712,  0.0947, -0.1015, -0.2345, -0.1533, -0.3597,
        -1.1882, -0.1242,  0.0460, -0.4164, -0.2957, -0.3473, -0.5500, -0.0453,
        -0.1259,  0.1575,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4949,  1.8805,  3.4520,  0.8377,  0.6256, -0.3087,  0.1418, -0.0088,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0628e-03, -1.3776e+00, -9.0892e-02, -1.6633e-01, -1.8890e-01,
        -2.4282e-02, -1.0129e-01, -4.0864e-02,  1.0217e-01, -3.8079e-02,
         4.3260e-02,  1.5188e-01, -9.1869e-02, -4.2255e-02, -1.1076e-02,
        -7.8311e-02,  2.0467e-02, -1.4236e-02,  5.1402e-04, -7.3285e-03,
         5.0247e-02, -5.9111e-02, -2.6325e-01, -2.3811e-01, -1.1278e-02,
        -1.9880e-01, -6.4744e-02, -3.5604e-02, -5.1219e-01, -3.4308e-01,
        -1.0646e-01, -2.7741e-01, -6.4098e-02, -8.4119e-01, -1.9639e-01,
        -4.4547e-01, -4.7014e-01, -1.0061e-01,  1.3022e-01,  3.4800e-02,
        -7.8342e-02, -2.8541e-02, -2.2820e-02,  8.3283e-02,  8.2559e-02,
        -3.5787e-02,  1.3797e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2473,  1.4301, -0.6339,  2.2834,  0.3655, -0.0756,  0.1106,  0.2433,
        -0.0246,  0.2818,  0.0740,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6093, -1.5352, -0.1681, -0.5092, -0.1203, -0.1111, -0.2267, -1.4069,
         0.1022, -0.6179, -0.3424,  0.0055, -0.4055,  0.0085, -0.1220, -0.0468,
         0.0507, -0.0259,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2847, -0.6661, -1.6474,  0.2702, -0.0074,  0.0197, -1.2668, -0.1862,
        -0.6669, -0.3199,  0.0088, -0.1590, -0.0885,  0.2220, -0.3316,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2609, -2.6741, -0.2265, -0.3806, -0.0475, -0.4207, -0.6946,  0.1519,
        -0.1019, -0.4361, -0.6427, -0.0251, -0.2582, -0.0484, -0.1072,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6567,  0.0325, -0.0181,  0.0209, -0.0230,  0.1707,  0.0478,  0.1924,
        -0.6370, -0.1173, -0.2132, -0.3448, -0.7937, -0.1745, -0.0458,  0.1635,
        -0.2857,  0.0851,  0.1317, -0.1015, -0.4030, -0.7334,  0.0859, -0.1425,
        -0.1026,  0.0728,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8289, -0.3047, -0.0177,  0.0506, -0.0885, -0.0461,  0.1895, -0.9289,
        -1.6541,  0.0421, -0.0575, -0.1264,  0.0155, -0.0474, -0.1552,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1622, -1.9672,  0.0553, -0.3703, -0.3330, -0.1594, -0.2139,  0.0360,
         0.0109, -1.0309,  0.1063,  0.1031, -0.4035, -0.0732,  0.0139, -0.0124,
        -0.1211, -0.0274, -0.3930,  0.1234, -0.0899,  0.0076,  0.0908,  0.0941,
         0.2677, -0.0384,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1230, -0.0540,  0.0252,  0.0224, -0.0283, -0.1610,  0.0148,  0.1132,
        -0.1814, -0.4151, -0.9534, -0.1614, -0.0894, -0.0218, -0.4572, -0.3699,
         0.1371, -0.1730,  0.0094, -0.2253, -0.1512, -0.1242, -0.0830, -0.1705,
        -0.0244, -0.1050,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.3827, -2.2723,  0.2342,  0.0940, -0.0575,  0.0538, -0.2004, -0.5042,
        -0.0957, -0.2769, -0.0632, -0.1083, -0.1693,  0.0911, -0.1597, -0.1467,
        -0.0365,  0.0335, -0.0791, -0.0409,  0.1971, -0.0748,  0.2320, -0.6031,
        -0.7032,  0.0289, -0.1296, -0.0775,  0.0097,  0.0104, -0.0358, -0.2374,
        -0.0064, -0.0437,  0.0984,  0.2028,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0859e-02, -2.2041e-01, -2.2685e-01,  1.2019e-01, -2.2575e-01,
        -5.3337e-03, -2.7549e-01, -6.0193e-01, -1.1184e-02,  4.0371e-02,
        -1.3787e-01, -4.6385e-03,  3.0928e-03,  9.1511e-02,  4.2635e-04,
        -4.3107e-02,  4.1547e-02, -1.2067e-02,  1.1699e-02,  9.0774e-02,
        -1.0867e-01, -3.9992e-01, -8.8854e-02, -1.2703e-02, -2.2621e-01,
        -6.4680e-01,  1.3372e-02, -3.7090e-01, -1.7958e-01, -2.9274e-02,
        -1.5089e-01, -2.8802e-01, -5.9472e-01, -5.8003e-02, -2.3174e-01,
         7.3814e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1341, -0.2447, -0.0548, -0.1670,  0.2128, -0.5865, -0.2797, -0.1654,
        -0.0069, -0.0259, -0.0545, -0.2705, -0.9843,  0.1384,  0.0470,  0.0187,
        -0.4514, -0.0447, -0.1168,  0.2657,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3773, -0.5664, -0.1867, -0.1321,  0.0236, -0.0652, -0.0827, -0.2117,
        -0.1462, -0.1298,  0.0053,  0.0252, -0.0170, -0.0657, -0.8712, -0.2654,
        -0.1738, -0.3751, -0.0694,  0.0324, -0.0880,  0.0016,  0.0055,  0.0186,
        -0.0333,  0.0745,  0.0056,  0.0747, -0.0894, -0.1084, -0.5195, -0.0951,
        -0.1398, -0.2293,  0.0336, -0.1789,  0.0408,  0.0382,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9598e-01, -4.7595e-01, -4.8020e-01, -4.7128e-01, -3.2404e-02,
        -3.7026e-03, -1.0135e-01,  1.9753e-02,  7.6103e-02, -4.7770e-02,
        -2.7182e-01, -4.3329e-01, -2.3931e-02,  1.2369e-02, -6.0213e-02,
        -2.7168e-01, -2.2703e-02, -3.2932e-01, -7.3699e-03,  3.8316e-02,
         7.1149e-03,  2.0022e-02,  9.8282e-02,  3.8602e-02,  3.4782e-02,
        -2.4769e-02,  4.8944e-03, -1.3971e-01, -3.2301e-01, -4.1730e-02,
        -1.4518e-02, -4.1022e-02,  2.9395e-04, -1.8109e-02, -1.3765e-01,
        -1.7096e-02, -7.2447e-02,  8.0930e-02, -4.7563e-03,  4.8595e-02,
        -3.6889e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1315e-01, -3.3491e+00, -4.5946e-03,  1.7072e-01, -1.5861e-01,
        -5.5004e-01, -3.2904e-02, -1.7592e-03, -1.2550e-01, -3.4064e-02,
         1.7754e-02, -1.3414e-01, -4.1082e-01, -9.1129e-02,  7.3254e-02,
        -3.8778e-01, -1.3955e-02, -2.8884e-01, -4.2822e-02,  8.2976e-02,
        -1.5427e-02, -2.0008e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1382,  0.0385,  0.0559, -0.0068,  0.0040, -0.0948, -0.0781,  0.1039,
        -0.0147,  0.0497,  0.0383, -0.3651, -0.0116, -0.1759,  0.0506, -0.5311,
         0.0440, -0.2204, -0.0223, -0.2576, -0.3704,  0.0483,  0.1126, -0.3074,
        -0.0443, -0.0468, -0.0717, -0.1771, -0.4256, -0.0870, -0.2310, -0.1737,
        -0.0398,  0.1307, -0.1912,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3839, -0.4901, -1.7349, -0.2215, -0.5526, -0.0624,  0.0336,  0.0526,
         0.0449, -0.0862, -0.2103,  0.0390, -0.0563,  0.0337,  0.0172, -0.0588,
         0.4548, -0.1010, -0.3018, -0.0053, -0.1356,  0.1725, -0.1583, -0.5513,
        -0.0272, -0.0043, -0.2337, -0.1745,  0.1336,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0254, -1.8065, -0.4180, -0.2823,  0.0205, -0.3064,  0.1239, -0.1212,
        -0.0147, -0.0755, -0.0864, -0.3142,  0.0096, -0.0931, -0.1905, -0.4165,
        -0.1898, -0.0743, -0.3272, -0.0805, -0.0695, -0.0466, -0.0857,  0.1306,
         0.2078,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1919,  0.0360,  0.0165,  0.1390, -0.8615,  0.0395, -0.0471, -0.7480,
        -0.8248,  0.0652,  0.0333,  0.2306,  0.0053, -0.0192, -0.4985, -0.5567,
         0.1708, -0.1713,  0.1380,  0.0178, -0.1298, -0.0864, -0.0341,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0292,  0.0027,  0.0551, -0.0281,  0.0598,  0.0840,  0.0448,  0.0223,
        -0.0242,  0.0289,  0.0046, -0.0108, -0.3285, -0.6599, -0.0956, -0.1602,
        -0.0187, -0.1534,  0.0445,  0.0326, -0.0088,  0.1052,  0.0164,  0.0149,
         0.0266, -0.1023, -0.3024,  0.0821, -0.3675, -0.0357, -0.4162, -0.0766,
        -0.0346, -0.1295, -0.1923, -0.0611, -0.0440, -0.0184, -0.0389,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2865, -0.3778, -0.3908,  0.8676,  1.3841,  0.1574,  0.0119,  0.5119,
         1.3231, -0.1445, -0.1483,  0.3357,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.4800, -0.6402, -1.2112,  0.0168, -0.0700, -0.0403, -0.3656,  0.0807,
         0.0096, -0.1483, -0.0236,  0.1215,  0.0268, -0.0502, -0.2348,  0.0151,
        -0.0588, -0.3977,  0.0031, -0.0232, -0.0830, -0.2171,  0.0335, -0.0243,
        -0.0293,  0.0273, -0.0942, -0.0752,  0.0907, -0.0914,  0.0464,  0.0270,
        -0.0600,  0.0533, -0.0194,  0.0548, -0.0566, -0.0213,  0.0098, -0.0051,
         0.0473,  0.0369, -0.0347, -0.0726, -0.0275,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4290,  1.9375,  0.4686,  0.7668, -0.3124,  0.0439,  0.1551,  0.2817,
         0.5439,  0.4672,  0.2638, -0.4750,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5490,  2.8022, -0.3423,  1.2947, -1.3676,  0.2148,  0.4656, -1.2714,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5376e-01,  1.6463e+00,  2.5393e-01, -5.1890e-02,  5.2857e-01,
        -2.0837e-02,  3.3893e-02,  4.6154e-02,  1.2845e-01,  4.7358e-01,
         1.3129e-01, -3.0800e-02,  2.0465e-02, -2.1194e-02, -2.4268e-02,
        -3.3381e-02, -6.4207e-02, -3.2557e-02, -1.0804e-02,  1.6025e-02,
        -8.8798e-03, -4.4469e-02,  2.3575e-02, -1.0265e-02, -8.9867e-04,
         5.5618e-03,  2.6741e-02, -1.2037e-02,  1.5215e-01,  1.5872e-02,
         4.3206e-02,  5.6442e-02,  4.2149e-03,  1.8973e-02,  1.4392e-03,
        -4.9472e-02, -2.7853e-02, -2.5418e-02,  2.5013e-02, -3.7983e-03,
         1.0671e-03, -5.2864e-02,  2.1083e-02,  8.6210e-03, -5.5405e-02,
        -3.7211e-01,  8.2688e-01,  3.3836e-02,  5.1666e-01,  8.6323e-01,
         3.4926e-01,  1.9395e-03,  2.6192e-01,  2.2175e-02,  1.5985e-01,
         1.6408e-02,  2.6306e-02, -6.2803e-02,  4.3888e-02, -1.2933e-01,
        -4.3803e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1528,  0.0088,  0.0739, -0.0841, -0.4027,  0.0277, -0.1120, -0.0170,
        -0.0066,  0.0386, -0.0083, -0.5764, -0.1034, -0.2870, -0.2408, -0.0174,
        -0.0019,  0.1118, -0.1979, -0.1834, -0.2739, -0.4363, -0.0276, -0.0920,
        -0.3160, -0.0809, -0.0524, -0.1160, -0.1643, -0.1019,  0.0736, -0.0091,
        -0.0734,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2939, -0.2515, -0.8685, -0.3686, -0.0361, -0.0632, -0.3122, -0.0688,
         0.0336, -0.0514,  0.0924,  0.0614, -0.1291,  0.0398, -0.0668,  0.6683,
        -0.0229, -0.5353, -1.1664, -0.0256,  0.1026, -0.0949, -0.2199,  0.1316,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2144, -0.4418, -0.5478, -0.1327, -0.0479, -0.5255,  0.1818, -0.3728,
         0.0874, -0.7600, -0.4806, -0.3474, -0.5741, -0.0189, -0.1014, -0.0062,
        -0.0241, -0.0872,  0.1808,  0.0363, -0.2192, -0.0156,  0.0358, -0.2817,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1938,  0.0199, -0.4203, -0.5106, -0.0516,  0.0094, -0.0697, -0.3412,
         0.0116, -0.0092, -0.0154, -0.0067,  0.0008, -0.0175, -0.2913, -0.2773,
        -0.3209,  0.0105, -0.0628, -0.0311, -0.3282, -0.0812, -0.1756, -0.5358,
         0.0217, -0.0865, -0.4445, -0.0895, -0.0445, -0.0584, -0.1144, -0.0947,
         0.0653,  0.0372,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1727,  0.0187,  0.0163, -0.0722, -0.6409,  0.0175,  0.0098, -0.0319,
         0.0761,  0.0375, -0.0028,  0.3671,  0.3121, -1.1071, -0.2398, -0.3197,
        -0.7652,  0.0596, -0.5168, -0.1166, -0.5402, -1.0957,  0.2000, -0.1327,
        -0.2175,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4975,  0.1771, -0.2251, -0.0139, -0.1619, -0.1650, -0.8672, -0.1635,
        -1.2305, -1.5782, -0.1820, -0.2042, -0.1537, -0.0541, -0.0732, -0.0727,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3467e-01, -2.3365e+00,  4.1045e-03, -4.7146e-01, -4.3540e-01,
        -3.9044e-01, -1.4510e+00, -1.5423e-01,  1.7169e-01, -4.5273e-02,
         6.8181e-02, -2.7331e-02, -2.7272e-01,  3.6876e-02,  4.7575e-02,
         1.6108e-03, -1.5136e-01, -3.0388e-01, -1.7691e-02, -1.6572e-03,
         1.5390e-01,  3.7760e-02,  1.8292e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6256,  0.1872, -0.2700, -0.7415, -2.5284,  0.2372,  0.4123,  0.0494,
        -0.1481,  0.1106, -0.0240,  0.2906,  0.0992, -0.4699,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.5053, -0.6449, -0.5417, -0.8079, -0.1138,  0.2691, -0.1424,  0.0364,
        -0.1792, -0.0190, -0.3119, -0.5253, -0.0734, -0.0083,  0.0041,  0.3042,
         0.1815, -0.4716,  0.0520, -0.4903, -0.0130, -0.0737, -0.0408, -0.0540,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0212, -0.0497, -0.3578,  0.0694, -0.5522, -0.2121, -1.4888,  0.1838,
        -0.1205,  0.1157, -0.1509,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1375,  0.0759,  0.2511,  0.0211,  0.0612, -0.3539, -0.9811, -0.0724,
        -0.4614, -0.1871, -0.4739,  0.0183, -0.2622, -0.0112, -0.3498, -0.0681,
        -0.1024, -0.4788,  0.2156, -0.0297, -0.0801,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3729,  0.1446,  0.1370,  0.1170, -0.0890,  0.0036,  0.0736, -1.8843,
        -0.9606, -1.2176, -0.2197,  0.1559,  0.0414, -0.2004,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2707,  0.1588,  0.0557, -0.0109,  0.1101,  0.0145,  0.1087, -0.0490,
         0.0023,  0.2585,  0.2113,  0.0571,  1.6892,  0.0720,  0.1111,  0.0829,
         0.2726,  0.1145,  0.1223,  0.0709,  0.1392,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0213,  2.0522,  0.0043,  0.6911, -0.4440,  0.7619,  0.3971,  1.2131,
         0.1423,  0.3094,  0.2573, -0.0684, -0.1831,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1077, -0.6205, -0.1075, -0.0887, -0.0777, -0.1070, -0.3118, -0.5860,
        -0.2436,  0.0167, -0.0803,  0.0764,  0.0696,  0.0205,  0.0056, -0.0396,
        -0.0038, -0.0267, -0.0378,  0.0417,  0.0089,  0.0536,  0.0557, -0.0035,
        -0.7493,  0.1460, -0.4526, -0.3546, -0.4240, -0.0246, -0.1196,  0.0195,
         0.0270, -0.0693,  0.0037,  0.0861, -0.1494,  0.0156, -0.0093, -0.0343,
        -0.1150,  0.0266,  0.0278, -0.0183,  0.0673], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0872,  0.2472,  0.5069, -0.0938, -0.0290, -0.1372,  0.0132,  0.2251,
        -0.0114,  0.0942,  0.9467,  0.1880,  0.1236,  0.1649,  0.5603,  0.1404,
         1.1892, -0.1514,  0.0381, -0.0377,  0.0708,  0.4886,  0.0959,  0.4431,
        -0.1094, -0.0597,  0.0845,  0.0820,  0.5285,  0.1588, -0.0526,  0.0787,
         0.0709, -0.0995,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5901,  0.5137,  0.0647, -0.0084,  0.0099,  0.0968, -0.1096, -0.0154,
        -0.2233,  0.0270,  0.1340, -0.0203,  0.0100,  0.0021, -0.1063, -0.0958,
         0.0269, -0.0945,  0.2113,  1.4825,  1.8960,  0.1743, -0.1147,  0.3240,
         0.0672, -0.3524,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2255, -1.9872,  0.3301, -0.6060, -0.1422, -0.0875, -0.0630, -0.4849,
         0.0736, -0.0446,  0.0280,  0.1146,  0.0531,  0.0449,  0.0044, -0.2868,
        -0.6337,  0.0673, -0.2333, -0.0104, -0.2260,  0.0158,  0.1535,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1239, -0.8618, -0.1142, -0.2263, -0.0216,  0.0483,  0.0263, -0.2904,
        -0.6101, -0.1369,  0.0881, -0.1092,  0.0153, -0.0325, -0.8694,  0.0933,
        -0.1062, -0.1450, -0.4004, -0.1612, -0.0644, -0.0538,  0.0347,  0.0227,
         0.0094, -0.0514, -0.2053, -0.5563, -0.0205, -0.0939,  0.0041,  0.0408,
        -0.0802,  0.0393, -0.0031, -0.1080, -0.2358, -0.0394,  0.0213,  0.0998,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0969,  0.0546,  0.0372, -0.1433, -0.3433, -0.0482, -0.1193, -0.3101,
         0.2403, -0.1013, -0.0368, -0.0009, -0.1491,  0.0128, -0.1473, -0.2673,
        -0.1774, -0.1421, -0.0728, -0.2324,  0.0315, -0.4663, -0.2206, -0.0333,
        -0.0735,  0.0159, -0.0016, -0.2077, -0.0690, -0.1436, -0.1316,  0.0350,
        -0.0431,  0.0381,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.5487, -0.2659, -0.1304, -0.2743, -0.6751,  0.0964,  0.1154,  0.0326,
         0.0284,  0.0480,  0.0544, -0.0504,  0.1699,  0.1458, -0.8302,  0.1835,
         0.0069, -0.0120, -0.4012, -0.0565, -0.0303, -0.0240, -0.0020,  0.1035,
        -0.2242, -0.6992, -0.0541,  0.0692, -0.0534,  0.0391,  0.1143, -0.0105,
         0.0027, -0.0178, -0.3079, -0.0388, -0.1149, -0.2506, -0.0822, -0.1708,
        -0.1987,  0.0459, -0.1714, -0.0408,  0.0216,  0.0229, -0.0463,  0.0387,
        -0.0282, -0.0508,  0.0266,  0.0798], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0749, -0.0127,  0.1213,  0.0648,  0.2924,  0.0021, -0.0377, -0.0659,
         0.0607,  0.0095, -0.0974, -0.0352,  0.1145,  1.0770,  0.2154,  0.2623,
         0.8833,  0.1164,  0.2736,  0.0239,  0.1522,  0.0168,  0.1743,  0.8402,
         0.0223,  0.0336,  0.0771,  0.0447, -0.0398, -0.0042, -0.1695,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2838e-01, -8.1699e-01,  3.7426e-02, -7.5266e-02,  8.1548e-02,
         4.8868e-02, -1.3142e-01, -1.1542e-01, -4.2298e-02,  1.5313e-01,
        -4.9883e-04, -2.6315e-01, -6.2089e-03,  2.1732e-02, -4.2482e-01,
        -4.0306e-01, -3.6872e-02, -5.6336e-01, -6.1919e-01, -8.2626e-01,
        -1.7803e-01,  3.7868e-02, -9.4828e-02,  8.5655e-02,  3.9399e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4907,  0.1783, -0.0624,  0.1626, -0.0679, -0.9682, -1.9480, -0.1836,
         0.0606, -0.1488, -0.2259, -0.2000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1241, -1.5551, -0.1904, -0.3410, -0.6449, -0.1578,  0.0881,  0.0169,
         0.0281,  0.0279, -0.0422,  0.0387, -0.3152, -0.5982,  0.1184, -0.2898,
        -0.0988, -0.0203, -0.0145, -0.0577, -0.2101,  0.0021, -0.0763, -0.0187,
        -0.0491, -0.5086, -0.0513, -0.0378,  0.1969,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2242e-01, -1.7356e+00,  1.5026e-01,  7.8094e-03, -3.5147e-04,
        -5.7564e-01, -1.4307e+00,  7.6103e-02, -1.4153e-01,  5.3053e-02,
        -2.1048e-01, -3.8328e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6205, -1.9651, -0.1339, -0.7217,  0.0819, -0.5659, -0.8107, -0.0963,
        -0.4727, -0.1469,  0.1158,  0.2404,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2160,  0.2008, -0.5022, -0.0744, -0.6758, -0.0024, -0.0858, -0.0546,
        -0.9817,  0.0281,  0.0786, -0.4716, -0.0401, -0.1675,  0.0013, -0.4613,
        -0.1130,  0.0155,  0.0686, -0.1717,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1094, -1.1529,  0.0746, -0.4655, -0.9360,  0.0609, -0.1403, -0.0617,
        -0.1081, -0.0609, -0.0085, -0.3307, -0.2040, -0.0746,  0.0235, -0.0168,
        -0.0750,  0.0419, -0.0085, -0.0317, -0.1233,  0.0935,  0.0792,  0.0131,
        -0.0178,  0.0884, -0.0430, -0.2258, -0.1327, -0.0017,  0.0587, -0.0391,
        -0.1312, -0.2729, -0.0045,  0.0390,  0.0580,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3051,  3.0724,  0.1449, -0.3794,  0.1344,  0.0480,  0.1128, -0.1987,
         0.2231,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1269,  0.2495,  0.0111, -0.7012, -0.1179, -0.0677, -0.2822, -0.4468,
         0.0470, -0.0588, -0.0450, -0.0619, -0.6601, -0.5109,  0.0066, -0.1336,
         0.0140,  0.0278, -0.3145, -0.4895,  0.1552,  0.0099, -0.0740,  0.0885,
         0.0783, -0.0139,  0.2151,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7283e-01, -2.5624e+00,  8.8897e-02, -5.7960e-01, -7.6330e-02,
        -2.5023e-01,  2.2066e-02, -1.1394e-02, -6.0259e-02,  6.7193e-02,
         4.4524e-01, -3.2896e-01, -3.5654e-02,  1.5755e-01, -8.1639e-02,
         4.4943e-02, -3.8425e-01,  4.2943e-02,  1.3890e-01,  9.1893e-02,
         1.5971e-01, -3.8287e-01, -3.5583e-01,  4.1874e-02,  1.3781e-02,
         4.2512e-02, -7.6313e-02, -3.7926e-01, -1.0470e+00, -4.7660e-02,
         1.0257e-01,  1.7087e-03,  2.2140e-01, -4.7108e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.2126, -0.0413,  0.0280, -0.0379, -0.1614, -0.0550,  0.0363,  0.0387,
         0.0493,  0.1604, -0.0210, -0.4754,  0.0098, -0.0366,  0.0100, -0.2242,
        -0.6200, -0.0413, -0.1842, -0.0546, -0.0018, -0.0544, -0.2247, -0.0598,
         0.0864,  0.0395,  0.0886, -0.0091, -0.0132, -0.0196,  0.0936, -0.4986,
        -0.7237, -0.0322, -0.1945, -0.1917, -0.4117, -0.0264,  0.0582, -0.0478,
        -0.0768, -0.2488,  0.0077,  0.0561,  0.0327,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3271, -0.0036, -0.1816, -0.0485, -0.1576, -0.7756, -1.3515,  0.0963,
         0.1918, -0.5297,  0.0743, -0.1533, -0.4103,  0.1791,  0.0233, -0.1571,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1589, -0.0640,  0.2268,  0.1145,  0.0418, -0.0704, -0.0500, -0.0785,
         1.2234,  0.8927,  0.0126,  0.8956, -0.0413,  0.3075,  0.5855,  0.0121,
        -0.0796,  0.0171, -0.1043, -0.0034, -0.0433, -0.0102,  0.0556,  0.0044,
        -0.0275,  0.0929,  0.2553,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0974, -0.0095, -0.2739, -0.0156, -0.1698,  0.0020, -0.1356, -0.0690,
        -0.1170, -0.0594,  0.0064, -0.0227, -0.1851, -0.0282, -0.0753, -0.0150,
        -0.1093,  0.0463, -0.0277, -0.2146,  0.0544, -0.1289, -0.0392, -0.2809,
        -0.0186,  0.0478, -0.1410, -0.1448,  0.0028, -0.2174,  0.0603, -0.0844,
        -0.1604, -0.0393,  0.0026, -0.1902,  0.0091, -0.0675,  0.0333,  0.0779,
        -0.0641, -0.1513, -0.0275, -0.0026,  0.1234, -0.0246], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4619,  0.1788,  0.0051, -0.0547,  0.0247,  0.0086,  0.0194, -0.4017,
        -0.0709, -0.1247, -0.0228, -0.0690,  0.0221,  0.0377, -0.0120,  0.0291,
        -0.0071, -0.1005, -0.5383, -1.4184, -0.0099, -0.1695, -0.0896,  0.0207,
        -0.0047, -0.2256, -0.7412, -0.1040, -0.1651,  0.0079,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0934, -0.4231, -0.4301, -0.1616,  0.2044,  0.0831,  0.5557,  2.0813,
        -0.5299, -0.1625,  0.4563,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2676, -2.0738, -0.2753, -0.1729, -0.4330, -0.1295, -0.2252, -0.6607,
         0.0754, -0.1931, -0.0398,  0.0367, -0.0888, -0.1239,  0.0071, -0.3366,
         0.0091, -0.0203, -0.2290,  0.2289,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0595, -2.5756, -1.1598, -0.5344,  0.0190, -0.4098, -0.6855, -0.1427,
        -0.0271, -0.2801,  0.1730,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0168, -1.5131, -0.8303, -0.8145,  0.0207, -0.1526, -0.0780, -0.2914,
        -0.3366, -0.1294, -0.0295, -0.1642, -0.0570,  0.0165, -0.0070, -0.1700,
         0.0544, -0.0517, -0.1300,  0.1809, -0.0725,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4509, -1.1263, -0.7584, -0.1317, -0.1847, -1.9298,  0.1127, -0.0665,
        -0.0886, -0.0366,  0.0868, -0.4039, -0.1631,  0.0050,  0.0266, -0.0135,
         0.0043,  0.0897,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1607, -0.8112, -0.6089,  0.0068,  0.0999,  0.1120,  0.0774,  0.0638,
        -0.0098,  0.0021,  0.0187,  0.0624, -0.0301, -0.4451, -0.4575,  0.0170,
        -0.1427, -0.3165, -0.1667,  0.0733, -0.0465, -0.2399, -0.5475, -0.0271,
        -0.2433,  0.0235, -0.1537, -0.0548, -0.0672, -0.5149, -0.0134, -0.0369,
        -0.0482,  0.0636, -0.0584,  0.0244,  0.0050,  0.0506,  0.2556,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3265, -1.9201, -0.1493, -0.3462, -0.0439,  0.0185,  0.0131,  0.0397,
         0.0058, -0.1743, -0.1325, -0.6619,  0.0148,  0.0689,  0.1699, -0.0108,
        -0.7539, -0.0155,  0.0328, -0.1037, -0.1012,  0.0052,  0.0342,  0.0306,
        -0.0235, -0.0262, -0.0626, -0.0126, -0.2368, -0.5665, -0.0649, -0.0318,
        -0.1918, -0.2210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 3.0220e-05, -1.0100e+00, -3.8604e-01, -1.7956e+00,  3.2955e-01,
        -1.3453e-01,  3.8739e-02,  2.3217e-01, -7.0333e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1151, -1.5710,  0.0821, -0.2694, -0.2019, -0.5414, -0.0043, -0.1845,
        -0.2369,  0.0050, -0.1794, -0.0569, -0.0254, -0.0690,  0.0586, -0.0666,
         0.0656, -0.8036, -0.0512,  0.0667, -0.0416, -0.0324, -0.1349,  0.0106,
        -0.2526, -0.0069, -0.0848,  0.0198, -0.0658,  0.0402,  0.0129, -0.5000,
        -0.0351, -0.2065,  0.0908,  0.0103,  0.0066,  0.1742,  0.0815, -0.0055,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3470, -0.3930, -1.3789, -0.0891, -0.3020, -0.0364,  0.0996, -0.0691,
        -0.2198, -0.2891,  0.0078, -0.0192,  0.0198, -0.0022,  0.0508, -0.0295,
         0.0346, -0.1727, -0.3381,  0.0879, -0.0819,  0.1588,  0.0323, -0.0700,
         0.1279, -0.1093, -0.2878,  0.0829, -0.2997, -0.6858, -0.0844,  0.0415,
        -0.0281,  0.0490,  0.0595, -0.0049, -0.2563, -0.2707, -0.0072, -0.0364,
         0.0967,  0.0889,  0.0785,  0.0331, -0.0184,  0.0572, -0.0437,  0.0042,
         0.0273,  0.0302,  0.1033,  0.0486,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0108, -1.5570, -0.0595, -0.4557, -0.1706,  0.0942, -0.4656, -0.1116,
        -0.3031, -0.2844,  0.2000,  0.0882, -0.3102, -0.0992, -0.2001, -0.4532,
        -0.2090, -0.0967, -0.3393,  0.0656, -0.1076,  0.1359,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2737e-01,  3.9674e+00, -2.9995e-01, -1.2368e-01, -1.6811e-01,
        -7.9807e-02,  1.2751e-01,  2.5739e-01,  9.2776e-02,  1.6412e-04,
         1.8883e-01,  3.6801e-01,  1.7591e-01,  1.9639e-01, -5.1365e-01,
         6.0555e-01,  4.8641e-02,  1.3244e-01, -8.5786e-02,  6.2096e-03,
        -2.4695e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2065, -0.9212, -0.2747, -0.1072, -0.0152,  0.0386, -0.0833,  0.0121,
        -0.1834, -0.0294,  0.0081, -0.0126,  0.0308,  0.0880,  0.0241, -0.0348,
         0.0064, -0.0033, -0.1100,  0.0049,  0.0300,  0.0205, -0.0415,  0.0398,
        -0.0187, -0.0558, -0.1528, -0.2039, -0.0144, -0.1790, -0.4240, -0.4740,
         0.0256, -0.1898, -0.1359, -0.0158,  0.0137,  0.0040,  0.1011,  0.0428,
        -0.4190, -0.0665,  0.0328, -0.0278, -0.1827,  0.0104, -0.1876, -0.0392,
         0.0253,  0.0221, -0.0369, -0.0314,  0.0770, -0.1448,  0.0355],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5908e-01,  1.4049e-03,  7.0279e-02, -4.0021e-02, -4.4867e-02,
        -1.0281e-01, -5.9799e-01, -1.1119e-01,  4.1995e-02,  3.8496e-02,
        -5.2014e-02,  4.3898e-02, -3.6721e-02, -1.1070e-01, -2.3210e-01,
        -9.2948e-02, -3.7919e-02, -4.4696e-02, -4.6273e-02, -3.0545e-01,
        -1.1999e-01, -1.0846e-01, -1.1367e-01,  2.0985e-02, -5.6859e-04,
         8.1687e-03, -6.3192e-02, -2.4588e-01, -1.2710e-01,  8.1056e-04,
         1.4408e-02,  1.6928e-02, -7.7422e-02, -2.9746e-01, -1.9164e-02,
        -4.5466e-02,  5.7731e-03, -9.0535e-02, -1.2276e-01, -1.0282e-01,
        -3.5701e-02, -1.2065e-02,  6.4371e-02,  3.4802e-02,  4.9294e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1893, -1.4991, -0.5523, -0.7198, -0.1052, -0.1004, -0.1411, -0.0742,
        -0.1786, -0.0460,  0.0864, -0.1629, -0.0352, -0.5642, -0.3814, -0.0455,
        -0.0605, -0.3371, -0.0886, -0.0809, -0.5728,  0.1194,  0.1453, -0.1831,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1850,  0.0851,  0.0511, -0.0536, -0.0432, -0.0609, -0.3300, -0.3121,
         0.1141,  0.0548, -0.0724, -0.0289, -0.0373, -0.1440, -0.0207,  0.2681,
        -0.1664, -0.1791, -0.1214, -0.5183,  0.1079, -0.0467, -0.3011, -0.4404,
         0.0516, -0.1926, -0.3784, -0.0568, -0.0901, -0.2710,  0.0566, -0.0609,
        -0.1314,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2458, -3.2231, -0.0517, -0.6794,  0.0430, -0.1241, -0.1718,  0.0261,
        -1.1271,  0.0364, -0.0312, -0.0540,  0.2301, -0.4535, -0.0341, -0.1906,
        -0.1938,  0.3245, -0.2590,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1511,  0.0358,  0.2412, -0.0488, -0.4092, -0.1060, -0.3889, -0.4202,
        -0.4473,  0.0210, -0.0434, -0.0885,  0.0403, -0.0288,  0.0102,  0.0437,
         0.0042, -0.3173, -0.1407, -0.0729, -0.2783, -0.3451, -0.0448,  0.0247,
         0.0122,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1207, -1.2473, -0.8613, -0.0640, -0.0687, -0.4478, -1.3171,  0.0742,
        -0.0369, -0.1668,  0.0795, -0.0027, -0.3926, -0.0304,  0.0669,  0.2046,
         0.3958,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0785,  0.0284, -0.0497,  0.0378,  0.0165, -0.0392,  0.0102,  0.0314,
         0.0476, -0.3415, -0.1121, -0.0419,  0.1096,  0.0051,  0.0221, -0.1037,
        -0.5801, -0.4839, -0.0709, -0.3114,  0.0567, -0.7867, -0.7885, -0.0196,
        -0.2851, -0.0268, -0.0250, -0.0244, -0.0719,  0.0090, -0.0739,  0.2968,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0923,  0.6992,  0.8859,  0.0706, -0.0152,  0.0387,  0.3463,  0.9020,
         0.0100, -0.0472, -0.1096, -0.0696, -0.2202, -0.0601,  0.1450, -0.1010,
         0.0532, -0.0249,  0.2604, -0.0510,  0.1286, -0.0427,  0.3152,  0.4889,
         0.1539,  0.0686,  0.0793, -0.2230,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6684, -0.0837, -0.0357, -0.0166,  0.0772, -0.3390, -1.0326, -0.2613,
        -0.1002, -0.1384,  0.0086, -0.8730, -0.1092,  0.0811, -0.0471, -0.6251,
        -0.0527, -0.1077,  0.0087, -0.0243, -0.1138,  0.0552, -0.0052, -0.1083,
        -0.4610,  0.1881,  0.1902,  0.1863, -0.1616,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0732, -0.5503, -0.0465,  0.0104, -0.4654, -0.1621,  0.0223,  0.0603,
        -0.0584,  0.1194,  0.1727, -0.0425, -0.1633, -0.5889,  0.1744, -0.2109,
         0.1563, -0.6235, -0.0696, -0.2812,  0.0062, -0.0048, -0.2733, -0.0318,
        -0.2295, -0.3882, -0.2045, -0.0653, -0.1231, -0.0430, -0.0544,  0.1158,
        -0.1034,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0539, -1.7718, -0.1031, -0.5549,  0.0931, -0.3643,  0.1851, -0.2809,
        -0.0879, -0.4549, -0.3471, -0.0939, -0.0701,  0.0070, -0.0713, -0.0035,
        -0.4180, -0.1477, -0.7104, -0.0116, -0.4707, -0.0804, -0.1959,  0.0761,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2147, -0.7530, -0.8194, -0.0261, -0.2931, -0.0263, -0.1204, -0.3621,
         0.1227, -0.1749, -0.0277, -0.0942, -0.2362, -0.0381,  0.0621,  0.0636,
        -0.0541, -0.3176,  0.0388, -0.1822, -0.1779,  0.0918,  0.0669,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0666e-03, -1.1415e+00, -7.3090e-02, -1.6999e-01, -1.8057e-01,
        -2.3344e-01,  5.3758e-02,  5.1983e-03,  4.8747e-02,  6.3377e-02,
         8.4772e-02,  1.3738e-02, -3.6864e-02,  2.6550e-02,  7.9407e-04,
        -4.0926e-02, -4.7710e-01, -3.7862e-01, -3.3470e-01,  1.2217e-02,
        -6.8720e-02, -2.1803e-01, -4.5261e-02, -5.6448e-02, -4.8023e-01,
        -1.2802e-01, -1.0141e-01, -6.5070e-02, -7.4886e-02, -4.9732e-01,
        -3.3684e-01, -1.1432e-01, -3.2474e-01, -4.7758e-02, -1.2911e-01,
        -2.3893e-01, -7.0881e-02, -1.5452e-01,  5.0203e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5402, -0.3647, -0.0221, -0.1439, -0.5747, -0.0832,  0.0376,  0.0550,
         0.0681,  0.0899,  0.0599,  0.0263,  0.0469, -0.0412, -0.0348, -0.2621,
        -0.1101, -0.0861, -0.0288,  0.0122, -0.3213,  0.0481,  0.0178, -0.0729,
        -0.5395, -0.7668, -0.0873, -0.2800, -0.0977, -0.4041, -0.1240, -0.0733,
         0.0291,  0.0034, -0.0608, -0.0563, -0.0817, -0.2511,  0.0322,  0.0030,
        -0.0368, -0.0345, -0.1344,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0981e-01, -1.2902e+00, -8.1903e-03, -5.2581e-02,  1.0135e-01,
        -7.6174e-03, -9.9056e-02,  2.1406e-02, -3.1104e-01,  1.0244e-02,
         5.3329e-02,  1.3090e-02, -1.2828e-01,  3.8323e-02, -1.5612e-02,
         2.1909e-02, -1.8716e-01, -3.2441e-01, -1.6335e-02, -6.4979e-02,
        -6.8716e-03, -4.7455e-02, -4.2603e-02, -5.0887e-02,  4.0213e-02,
         1.1291e-02,  5.9559e-02, -6.0115e-03,  3.5491e-02,  5.4049e-02,
         1.0948e-01, -1.7464e-01, -3.1259e-01,  1.6306e-02,  1.4695e-02,
        -5.1626e-02,  1.4312e-01, -3.8554e-01, -5.1819e-01, -3.4328e-01,
        -1.9391e-01, -1.4978e-01,  3.4157e-02, -1.9846e-02, -5.8984e-02,
        -4.4059e-02, -1.8364e-02, -2.9327e-02,  4.5909e-04, -1.7191e-02,
        -7.1610e-02, -8.6518e-03, -8.2714e-03,  3.4602e-02, -2.2711e-02,
        -2.3507e-01, -3.0268e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2637,  2.2450, -0.1540,  0.0699,  0.2606, -0.1468,  0.3252,  0.7255,
        -0.1098, -0.0941,  0.6035, -0.5120, -0.0843,  0.0458,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0873, -1.5812, -0.0558,  0.1186,  0.0119,  0.0024,  0.0096,  0.0233,
        -0.3291, -0.0963, -0.3455, -0.8471, -0.3363, -0.1190, -0.5278, -0.2065,
        -0.2096, -0.4775,  0.0806, -0.4761, -0.7667, -0.1356, -0.1899,  0.0223,
         0.0704,  0.1237,  0.0340,  0.0319, -0.1349, -0.0690,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1502,  0.7414, -0.4879,  1.6529,  0.4813,  0.0192,  0.1849, -0.0331,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.3873, -0.5682,  0.0561,  0.0169, -0.1655, -0.6769, -0.0254,  0.0980,
         0.0106, -0.0499,  0.0761,  0.0440, -0.4606, -0.7979, -0.0674,  0.0782,
         0.0075, -0.0419, -0.0523, -0.0118, -0.0256,  0.0822,  0.0535,  0.0445,
        -0.4655, -0.1418, -0.1433, -0.0220, -0.3268, -0.5236, -0.1220, -0.0139,
        -0.0895,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6027e-01, -1.0883e+00, -2.5928e-01, -1.6012e-01, -2.9736e-02,
        -1.5676e-01, -3.3356e-02, -1.9494e-02,  5.4317e-02, -1.8501e-01,
        -1.8342e-02, -5.8567e-02, -1.4178e-01, -3.8047e-02,  1.0288e-01,
         8.1362e-02,  5.9162e-02, -1.2004e-03, -4.6004e-02,  5.5503e-02,
        -2.2835e-02,  3.3247e-02, -1.6078e-02,  8.7296e-04,  1.1237e-01,
         6.7091e-03,  1.3512e-04, -3.1565e-01, -7.7645e-02, -1.0208e-01,
        -1.6804e-02, -2.9834e-01, -3.6517e-01, -5.1877e-01,  7.6312e-02,
        -1.1948e-03, -2.0827e-01, -4.6689e-02,  3.6706e-02,  5.8301e-02,
         1.0140e-02,  3.0175e-02, -2.9986e-02, -3.9448e-01, -1.3036e-01,
        -9.0191e-03, -2.7466e-02, -1.2317e-02, -3.7944e-02,  2.5992e-02,
        -1.5074e-01, -1.3775e-01, -7.5970e-02, -1.1948e-01, -3.1032e-02,
         5.9286e-02, -2.7835e-02,  1.0274e-02,  2.9157e-02, -1.3741e-01,
        -4.1647e-02, -5.0830e-02, -4.3036e-01,  3.3255e-04,  2.7732e-02,
         8.5295e-03,  1.6050e-02, -8.1724e-03,  2.5861e-02, -1.4771e-01,
         5.5553e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3841,  0.1732,  0.0664, -0.0189,  0.2079, -0.0304,  0.7761,  0.0656,
        -0.0414, -0.1849, -0.1371, -0.0187, -0.0471, -0.0615, -0.1267, -0.0115,
         0.0503, -0.0473,  0.1874,  0.6792,  0.1784, -0.0021, -0.2219,  0.3777,
        -0.0513,  0.3265,  0.0826, -0.0214,  0.0148,  0.0224,  0.0570,  0.1664,
         0.6074,  0.5309,  0.2937,  0.1249,  0.8636,  0.3289,  0.0227,  0.0338,
        -0.1284,  0.1275,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6337e-01,  2.2331e+00,  4.0695e-03,  6.3193e-02,  3.8230e-02,
        -3.9656e-02, -1.1637e-01, -1.2029e-01,  1.1245e-02,  2.0098e-01,
         9.3957e-03,  2.8309e-01,  4.9365e-01, -9.5298e-02, -5.3227e-02,
         6.9210e-03, -4.1703e-02,  3.0269e-01,  5.7617e-01, -2.3820e-02,
         8.5403e-02,  1.2757e-01, -3.2443e-02, -6.4116e-02,  8.0824e-04,
         2.9717e-01,  9.9401e-02,  8.7099e-02,  4.8078e-01,  4.4336e-02,
        -1.2555e-01,  4.6789e-02, -8.2479e-02,  1.9123e-02, -3.0011e-02,
         1.2084e-01, -8.2368e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0359,  2.5656,  0.3173,  0.2997, -0.0096, -1.0234,  0.0192, -0.0088,
         0.0058,  0.0136,  0.0947, -0.0518,  0.2716, -0.0352,  0.3626,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0693, -0.1918,  0.0523, -0.0121, -0.7518, -1.1237, -0.1647, -0.5335,
        -1.0267, -0.0895,  0.0694, -0.1981, -0.4753,  0.0653,  0.0401, -0.0089,
         0.0628,  0.2966,  0.0800,  0.1553,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1557,  0.0860,  0.0285, -0.0232, -0.4679, -0.0327, -0.0090, -0.0107,
         0.0263,  0.0311, -0.3169, -0.3321, -0.0481, -0.0400, -0.2293, -0.1749,
         0.0160, -0.1454, -0.0023, -0.3298,  0.0084, -0.0059, -0.0462,  0.0053,
         0.0334, -0.0788, -0.3071, -0.0136,  0.0369, -0.1714, -0.3911, -0.0145,
         0.0468, -0.0227, -0.1133, -0.3694,  0.0149, -0.0848, -0.0248,  0.0054,
        -0.0422, -0.0476,  0.0729,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0757,  0.0290,  0.0211, -0.0238, -0.3070, -0.2345, -0.3398, -0.3541,
        -0.2524, -0.0426,  0.1684,  0.0073, -0.0413, -0.4062, -0.6820, -0.0938,
         0.0198, -0.0152, -0.0358, -0.1274, -0.3801, -0.1071,  0.0873,  0.0242,
        -0.0367, -0.0053, -0.0733, -0.0283, -0.5882,  0.0011, -0.0595, -0.2139,
        -0.0068, -0.2397, -0.4350, -0.1707,  0.0901,  0.1642,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0370, -1.7010, -0.6001, -0.7425,  0.0564, -0.5139, -0.0061,  0.1765,
        -0.3864, -0.4254, -0.0614, -0.1246, -0.3051, -0.1979,  0.0270, -0.0284,
        -0.2065, -0.6026, -0.2675,  0.3027,  0.1097,  0.1897,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4474, -0.7711, -0.1701, -0.0191,  0.0536,  0.0600,  0.0582,  0.0137,
        -0.0439, -0.7599, -0.1211, -0.1302, -0.0967,  0.0093, -0.1835, -0.2950,
        -0.7188, -0.0376, -0.0088, -0.3642, -0.0686, -0.0248, -0.2856, -0.1762,
        -0.0588, -0.0245,  0.0051,  0.0223, -0.0733, -0.1799,  0.0117, -0.0339,
         0.1167,  0.0400, -0.0957, -0.3254,  0.0548,  0.0023,  0.0113,  0.0236,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0195, -0.1066,  0.0553,  0.1167, -0.0176, -0.3485, -0.0900, -0.0538,
        -0.2394, -0.0757, -0.0362, -0.0694, -0.0091, -0.6564, -0.2758, -0.2382,
        -0.0520,  0.0188, -0.5746, -0.0367, -0.0430, -0.0026,  0.0916,  0.0098,
        -0.0805, -0.6771, -0.1131, -0.3733, -0.5079, -0.0992,  0.0426, -0.0530,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3700, -2.3474, -0.2165,  0.1418, -0.1832, -0.1283, -0.2050, -0.4188,
        -0.5371,  0.0181,  0.0622, -0.3825,  0.1821, -0.1133,  0.0603, -0.0499,
        -0.3444, -0.0048,  0.0297,  0.0914,  0.0300, -0.1059, -0.1075, -0.7163,
        -0.0893, -0.0156, -0.4091, -0.1001, -0.0728, -0.0767, -0.0410, -0.0029,
         0.0030,  0.0272,  0.2517,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.9619, -0.2237, -1.0053, -0.8031, -0.1779,  0.1373, -1.2126, -0.8352,
        -0.1408, -0.5183,  0.0494, -0.1730,  0.0329, -0.2311, -0.0917, -0.0530,
        -0.1383, -0.1040,  0.1113,  0.1103,  0.4271,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1696, -2.8209, -0.1883, -0.5881, -0.6162, -0.1801, -0.0156, -0.2144,
        -0.4406,  0.1866, -0.0390,  0.1532,  0.0784,  0.0630, -0.2621,  0.0955,
        -0.2265, -0.2489,  0.1226,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2899,  0.0207, -0.8144,  0.0389, -0.2682, -0.1378,  0.0235,  0.1914,
         0.0065, -0.1400, -0.5795, -0.1539, -0.2463, -0.6400, -0.0191, -0.3800,
        -0.2250, -0.2098,  0.0682, -0.2911,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4334, -2.4065, -0.1795, -0.5035,  0.1712,  0.0424, -0.0555, -0.2249,
        -0.1145, -0.0703, -0.0447, -0.0718, -0.0460, -0.4228, -0.0516, -0.1464,
        -0.1928,  0.0180, -0.1751, -0.3641, -0.0567,  0.0358, -0.1884,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0442, -1.0316, -0.3001, -0.1117, -0.0206, -0.1606, -0.1275, -0.0778,
        -0.3966, -0.5721, -0.1263, -0.3238, -0.0640, -0.3160,  0.1045, -0.0286,
         0.0356,  0.0077,  0.0213, -0.0241, -0.1767, -0.0429,  0.1294, -0.1382,
         0.2892,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3074, -2.1734, -0.1780,  0.0302, -0.0077, -0.0049,  0.1376, -0.0190,
         0.1545, -0.2402, -0.7671, -0.0296, -0.1357,  0.1017, -0.0497, -0.2521,
        -0.0575, -0.0221, -0.0463, -0.0163, -0.5449, -0.2048, -0.3069, -0.2842,
         0.0385, -0.1074, -0.1085,  0.0839, -0.1818, -0.1408, -0.1119, -0.0060,
        -0.1683,  0.0873, -0.1306,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0733, -0.0721, -0.0730, -0.5357, -0.0748, -0.0850, -0.6964, -0.1254,
        -0.7944, -0.1437, -0.1910, -0.1783, -0.1411, -0.3968, -0.2559, -0.7173,
         0.0075, -0.4456,  0.1346,  0.0392, -0.1146, -0.2292,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0944, -0.2631, -0.1090, -0.0649,  0.3632,  0.0496,  0.0034,  0.2109,
         0.1200,  1.3167, -0.1851,  0.0173,  0.2054,  0.3561,  0.0441,  0.1743,
         0.1505,  0.3250, -0.5339,  0.2931, -0.2981,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0754,  2.9333,  0.6159, -0.0594,  0.1984,  0.0306,  0.5402,  0.6566,
         0.0141,  0.0066,  0.0083, -0.1199,  0.0900,  0.6321, -0.1974,  0.2190,
        -0.0540, -0.5270,  0.1213,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2347,  0.0499, -0.0997,  0.0685,  0.0646, -0.3791, -0.0471,  0.0105,
         0.0512, -0.0227, -0.0555, -0.1322, -0.0081, -0.1810, -0.0144, -0.0093,
        -0.0750, -0.2492,  0.0075, -0.1095,  0.0008,  0.0943, -0.0631,  0.0885,
        -0.0896, -0.0901, -0.0301, -0.1049, -0.0490, -0.4295, -0.0215, -0.0906,
        -0.2991, -0.7261, -0.0290, -0.0897, -0.1392,  0.0775, -0.0248, -0.1505,
         0.0573,  0.0385,  0.0038, -0.0076,  0.0346,  0.0928, -0.0754],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8829, -1.8436, -1.3362, -0.1093,  0.0667,  0.1049,  0.0200, -0.1138,
        -0.3228,  0.0253,  0.0283,  0.0360,  0.0993,  0.1019,  0.2418,  0.0166,
        -0.3587,  0.0114,  0.0021, -0.1304,  0.1317,  0.0669, -0.2955, -0.5651,
         0.0186,  0.1084,  0.1126,  0.0262,  0.1077,  0.0780,  0.1115,  0.1718,
        -0.0328,  0.0449,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9781e-01,  2.4578e+00, -4.2480e-02,  9.5155e-03,  3.0830e-01,
         2.7037e-01, -2.9850e-02,  5.9618e-02,  1.5842e-02,  1.7746e-01,
        -3.7413e-04, -1.2085e-01,  4.3958e-01,  6.5958e-01,  2.1298e-01,
        -3.4191e-02,  2.0902e-01,  7.2353e-03, -4.6269e-02,  3.5537e-01,
         1.2802e-01,  3.5788e-01,  7.8072e-02,  1.0276e-01,  9.0148e-02,
        -9.1053e-02,  9.3653e-02,  1.8878e-01,  1.0773e-01,  3.1035e-01,
         4.6856e-02, -2.4354e-02,  8.6624e-02, -3.9083e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.2691, -2.0673, -0.0186, -0.0475, -0.2649, -0.5422, -0.0248, -0.0372,
        -0.0059, -0.4680, -0.1059, -0.1111, -0.1851, -0.5207, -0.1984, -0.1068,
        -0.0710, -0.2299,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1683e-01,  1.2340e-01, -1.3886e-01,  7.5694e-02,  4.0779e-02,
         7.2036e-02,  1.7396e-03, -1.5090e-01, -7.0399e-01, -1.1168e+00,
        -5.3375e-02, -7.4815e-02,  3.2989e-02, -9.0294e-02,  1.4448e-01,
        -3.9874e-02, -1.1195e-01, -4.4712e-03,  9.2731e-02, -1.2648e+00,
        -4.6791e-02,  2.8465e-02, -1.1640e-01,  2.0547e-02, -8.2210e-04,
        -2.9413e-03, -2.8715e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2225,  0.0725,  0.0232, -0.0161, -0.2883,  0.0213, -0.0904, -0.0469,
        -0.9095, -0.0841, -0.0856, -0.1913, -0.6841, -0.2427,  0.0116, -0.0557,
         0.1081, -0.1085,  0.1074, -1.4151, -0.0022,  0.0275, -0.5001, -0.1465,
         0.2393, -0.2706,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6402, -1.7229, -0.1925, -0.7008, -0.5829, -0.1720, -0.1266,  0.0446,
        -0.3997, -0.2693, -0.3008, -0.2590,  0.0465, -0.0079, -0.2168, -0.3920,
        -0.1965, -0.1378, -0.0483, -0.1597, -0.0403,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1893, -1.0979, -0.8026, -0.6579, -0.2397, -0.0765, -0.7430, -0.0222,
        -0.1077,  0.0379, -0.0990, -0.1451,  0.2397, -0.1328,  0.1067,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5554, -0.0671,  0.1576,  0.0954, -0.7010, -0.0246,  0.2748,  0.0290,
        -0.0150,  0.2041,  0.1102, -0.3526, -1.3352, -0.5139,  0.2282,  0.1447,
         0.0987, -0.7183,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3374,  3.0725,  0.5013,  0.6392, -0.0664,  0.0997,  0.2317,  0.2548,
         0.0375,  0.3122,  0.5091,  0.4051, -0.0260,  0.6448,  0.0159,  0.0667,
         0.0209, -0.0690, -0.4741,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2151, -0.7277, -0.5567, -0.1228, -0.1929, -0.1503,  0.0632, -0.0501,
        -0.0788, -0.0271, -0.0081,  0.0462, -0.1129, -0.0032,  0.0033, -0.0619,
        -0.3122,  0.0243,  0.0399,  0.0721, -0.4366, -0.0030, -0.2780, -0.0062,
        -0.3171, -0.0493, -0.4163,  0.0906, -0.1100, -0.1240, -0.0127, -0.0147,
        -0.0983, -0.0098, -0.1368, -0.0155, -0.2123, -0.0191, -0.0613,  0.0185,
         0.0011, -0.0059,  0.0093, -0.0702], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1495,  1.9793,  0.2647,  0.9593,  0.5168,  0.2022,  0.1431,  0.1025,
         0.6044,  0.0560,  0.1638,  0.1079, -0.1315, -0.0067, -0.1426, -0.0770,
        -0.2831,  0.6765, -0.0458,  0.0684,  0.1840, -0.0752,  0.0049,  0.0363,
        -0.0221,  0.0284,  0.0140,  0.2128, -0.0182,  0.0135,  0.0625,  0.1071,
        -0.1599, -0.0691,  0.5562,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4148, -2.6079, -0.2127, -0.6822, -0.2218, -0.0751, -0.9701, -0.4960,
        -0.0262, -0.1105,  0.0556, -0.1414, -0.3928,  0.0050, -0.0486, -0.2478,
        -0.0830, -0.0504,  0.1438,  0.0043, -0.0137, -0.0060, -0.1900,  0.7849,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3233,  2.4353,  0.0818,  0.2945,  0.1754,  0.1050, -0.1582,  0.0664,
         0.6442,  0.0820,  0.2464, -0.0575, -0.0647,  0.2943,  0.6205,  0.1448,
        -0.1905,  0.0420,  0.4526,  0.0982,  0.0591,  0.2371, -0.0679,  0.1898,
        -0.0043,  0.2422,  0.2468,  0.2824,  0.0449,  0.1827,  0.1882,  0.0073,
        -0.4893,  0.0300,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0977,  0.0753, -0.0376, -0.0938, -0.0695, -0.1112, -0.7240, -0.1310,
        -0.0295, -0.0343, -0.0109, -0.0064, -0.3852, -0.0556, -0.4939, -0.7045,
        -0.0362,  0.0630,  0.0227,  0.1429,  0.0758,  0.0010,  0.1480, -0.2638,
        -0.1924, -0.0068, -0.1705, -0.0194,  0.1353, -0.0201,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.5845, -0.0462, -0.1458, -1.4486, -0.0578,  0.0964, -0.1322, -0.1044,
        -0.1655, -0.8277,  0.1691, -0.0367,  0.0829,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1466,  0.0251,  0.1224, -0.0469,  0.0434, -0.1195,  0.0060, -0.3901,
        -0.8245,  0.1546, -0.1266, -0.6180, -0.0175, -0.5108, -0.6408, -0.1239,
         0.1839, -0.0798,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5002, -1.0338, -1.3917,  0.3378, -0.5610, -0.1808, -0.2127, -0.2713,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0199, -0.7963,  0.0694, -0.2257, -0.0810, -0.1251, -0.1261, -0.1339,
         0.0570, -0.0243, -0.0122,  0.0602, -0.0439, -0.0633,  0.0332, -0.0419,
         0.0106,  0.0211, -0.0325, -0.0403,  0.0370,  0.0829, -0.1997, -0.2019,
         0.0034, -0.1973, -0.0610, -0.0247, -0.5280, -0.2886,  0.0331, -0.1556,
        -0.0895, -0.8301,  0.0021, -0.2734, -0.1850,  0.0523,  0.1118, -0.0170,
        -0.0685, -0.0160, -0.1102,  0.0449,  0.0514,  0.1750,  0.1362],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2302,  0.8425,  0.8914,  1.9356, -0.0251, -0.3409,  0.2379, -0.0379,
         0.4222, -0.0470, -0.5298,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2082, -2.3718, -0.3808, -0.7260, -0.0853, -0.0527, -0.2220, -0.6885,
        -0.0806, -0.2443, -0.2322,  0.0558, -0.4277, -0.0272,  0.1033, -0.0476,
         0.1285, -0.2089,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2839, -0.2906, -1.8361,  0.0262,  0.0994,  0.0934, -0.9658,  0.0522,
        -0.3548,  0.0316, -0.0040,  0.1244,  0.1695,  0.1457, -0.1009,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4044, -2.4111, -0.1591, -0.1084, -0.1318, -0.3209, -0.6161, -0.3351,
        -0.0573, -0.5564, -0.4243, -0.0324, -0.0183,  0.0427, -0.1425,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4644,  0.0793,  0.0790,  0.1427, -0.0435, -0.0832, -0.0670, -0.0051,
        -0.4497, -0.2571, -0.1258, -0.3618, -0.7518, -0.1597,  0.1036, -0.0111,
        -0.2401, -0.0308,  0.0350,  0.0665, -0.3988, -0.7181,  0.0140,  0.1614,
        -0.1210,  0.0501,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0945, -0.1161, -0.1397, -0.0503,  0.1109, -0.0652, -0.1592, -1.2526,
        -1.3430, -0.1595,  0.1211,  0.1291,  0.1947,  0.1879, -0.5165,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0488, -2.5452, -0.2255, -0.5317, -0.6632, -0.0311, -0.2961, -0.0812,
         0.0143, -0.5975,  0.1012, -0.0653, -0.2988, -0.0419, -0.0881, -0.0133,
        -0.1196, -0.0219, -0.4061, -0.0287, -0.3272,  0.1493,  0.0101,  0.0881,
         0.0914,  0.4061,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1152,  0.1029,  0.0573,  0.1525,  0.0112, -0.0704, -0.0036,  0.0560,
        -0.2221, -0.2575, -0.7960, -0.0235, -0.0580, -0.0737, -0.5713, -0.3587,
        -0.0623, -0.2028, -0.0243, -0.7315, -0.3448, -0.1514, -0.1411, -0.4448,
        -0.3868, -0.1563,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-4.3152e-01, -1.6846e+00,  1.2355e-01, -3.6510e-02, -7.6766e-02,
         1.1932e-01, -2.6657e-01, -5.5303e-01, -2.3693e-02, -2.4902e-01,
        -1.5650e-01, -2.7950e-01, -5.2284e-01,  5.8466e-02, -1.7134e-01,
        -1.3299e-01, -8.0177e-02, -5.0452e-02, -1.1410e-01, -3.3715e-02,
         1.8758e-01, -1.6012e-01,  1.0791e-02, -2.8638e-01, -5.0892e-01,
         1.6805e-03, -2.4644e-01, -6.9744e-02,  2.2804e-02, -1.3260e-02,
        -7.3379e-03, -1.2412e-01, -5.0730e-02,  5.7747e-02,  3.8000e-02,
        -2.6489e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7923e-02, -7.9103e-01, -2.2322e-01,  3.4771e-02, -2.8810e-01,
        -1.1148e-01, -2.6022e-01, -4.0016e-01,  8.3666e-03, -4.4909e-04,
        -2.7385e-01, -9.5086e-03, -1.0492e-02,  6.1904e-02, -1.3153e-02,
        -9.3705e-02,  2.3304e-02, -4.8192e-02,  3.9816e-02,  2.9045e-02,
        -8.7377e-02, -3.4390e-01, -4.8543e-02, -1.2701e-01, -3.6704e-01,
        -4.9901e-01, -2.0797e-02, -4.0704e-01, -3.4783e-01,  1.3534e-01,
         3.3519e-02, -3.2968e-01, -2.4132e-01,  5.1114e-02, -2.3495e-01,
         2.6741e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0860, -0.1725, -0.0796, -0.1044,  0.1731, -0.6373, -0.2464, -0.0967,
        -0.1038,  0.1046, -0.1567, -0.5395, -1.2529, -0.2096,  0.2103, -0.1803,
        -0.1219,  0.0440,  0.0109,  0.1721,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0006e-01, -1.1817e+00, -4.1770e-01, -3.6959e-01,  1.8790e-02,
        -4.3499e-02, -2.2415e-02, -2.3716e-01, -1.6433e-02, -1.6341e-01,
         3.6819e-02,  1.0149e-02, -1.1245e-01, -3.3450e-04, -2.6511e-01,
        -1.2965e-01, -1.0297e-01, -3.0603e-01, -2.8999e-02, -1.5626e-02,
        -3.3551e-02, -4.0581e-02,  4.1707e-02,  1.5635e-01, -3.6421e-02,
         5.7308e-02,  1.8056e-02,  9.5047e-02, -2.0653e-02, -3.7485e-01,
        -1.8755e-01, -4.0159e-01,  2.0459e-01, -5.7707e-01,  2.4144e-01,
         3.0017e-01,  4.8347e-03,  8.6051e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2450, -0.6531, -0.2856, -0.1724, -0.0485,  0.0847, -0.0481, -0.0427,
        -0.0139, -0.0402, -0.3318, -0.5416,  0.0097,  0.0393, -0.0551, -0.4025,
         0.0185, -0.3113, -0.0313,  0.0299,  0.0036,  0.0222,  0.0460,  0.0015,
        -0.0048, -0.0402, -0.0623, -0.1672, -0.3645,  0.0372, -0.0455, -0.0790,
         0.0264, -0.0102, -0.1310, -0.0145, -0.0471,  0.1135,  0.0814,  0.0490,
        -0.2025], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2256e-01, -3.9395e+00, -2.5748e-01,  3.7719e-01, -6.6143e-02,
        -2.8475e-01,  8.4056e-02,  3.7465e-02, -1.2935e-01, -2.7129e-02,
        -2.5553e-03, -3.8996e-01, -5.3100e-01,  2.1560e-02,  1.2753e-01,
        -4.0339e-01,  1.3192e-02, -2.2901e-01, -6.1394e-02,  2.4500e-02,
         2.8511e-02, -3.2276e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2889,  0.0461,  0.0033, -0.0163,  0.0877, -0.0723, -0.0135,  0.0634,
        -0.0659,  0.0100,  0.0976, -0.3385,  0.0079, -0.1388, -0.0734, -0.5490,
        -0.0165, -0.2645, -0.1889, -0.3864, -0.3539,  0.0111,  0.0187, -0.2674,
         0.0177, -0.0078,  0.0412, -0.1504, -0.4345, -0.0601, -0.0661, -0.2485,
        -0.0281,  0.0693,  0.0286,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6007, -0.2229, -0.7352, -0.1225, -0.5299,  0.0303,  0.0281,  0.0973,
        -0.0195,  0.0301, -0.3323,  0.0822,  0.0083, -0.0232, -0.0818, -0.2291,
         0.2187, -0.0538, -0.3339, -0.0132, -0.2913,  0.0786, -0.0932, -0.2808,
        -0.1257, -0.1089, -0.3642,  0.0135,  0.0542,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3777, -1.8922, -0.7492, -0.3709, -0.2726, -0.3069, -0.1313, -0.0475,
        -0.1203, -0.0834, -0.1184, -0.1134,  0.0094,  0.0583, -0.0944, -0.3552,
        -0.1661, -0.0598, -0.1258, -0.0354, -0.0425, -0.1873, -0.0321,  0.1226,
        -0.0830,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8674e-02,  1.8025e-02, -1.0411e-01, -8.9656e-02, -9.1492e-01,
        -9.1482e-03, -1.1700e-01, -7.9219e-01, -8.0056e-01, -3.1378e-04,
        -7.8868e-02,  1.5155e-01, -1.1856e-01,  1.5290e-01, -4.6132e-01,
        -3.9877e-01,  5.1762e-02, -1.7265e-01,  3.0436e-04,  7.9893e-02,
        -9.6671e-02, -8.1668e-03,  8.6128e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4239, -0.1438,  0.0061, -0.0373,  0.0106,  0.1341, -0.0308, -0.0560,
         0.0194, -0.0394,  0.0184, -0.0928, -0.3933, -0.3644, -0.0781, -0.0787,
        -0.0041, -0.2017,  0.0296, -0.0232,  0.0536, -0.1484, -0.0627, -0.0923,
        -0.2256,  0.0242, -0.8176,  0.0255, -0.4879, -0.0632, -0.4531, -0.1069,
        -0.0512, -0.1157, -0.2097, -0.1429, -0.1582, -0.0012, -0.3357,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4443, -0.0706, -0.0658, -0.9156, -0.8009, -0.0665, -0.1788, -0.6479,
        -1.1160,  0.5418,  0.2559,  0.0091,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.1288, -0.3533, -1.3176,  0.0419, -0.0820, -0.1437, -0.1657,  0.0476,
        -0.0125, -0.1583,  0.0285, -0.0357, -0.0396,  0.0101, -0.1197, -0.0105,
        -0.1184, -0.2507,  0.0063, -0.0090, -0.1152, -0.1323,  0.0655,  0.0324,
         0.0031,  0.0313,  0.0453, -0.0633, -0.1044, -0.0315,  0.0327,  0.0319,
        -0.2320, -0.0178,  0.0015,  0.0380, -0.2140, -0.0261, -0.0206,  0.0029,
         0.0299, -0.0203,  0.0623, -0.0037,  0.0074,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1264, -2.4065, -0.4761, -0.9074, -0.2488,  0.0102, -0.1085, -0.2719,
        -0.6435, -0.1039, -0.2466,  0.3245,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4068, -3.3466, -0.4469, -0.7189,  0.1972,  0.4047, -0.3689, -0.1527,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3431e-02,  2.0195e+00,  1.1812e-01,  1.4140e-02,  3.7854e-01,
        -4.4887e-02,  1.5516e-01,  8.8245e-02,  2.3714e-01,  5.8744e-01,
         1.5113e-01,  8.4261e-02,  1.4490e-02,  1.0174e-02, -6.8019e-03,
        -5.4663e-02, -2.5034e-02, -3.7972e-02, -1.4053e-02,  2.5048e-02,
        -6.8332e-02, -5.6208e-02, -9.1780e-03,  3.9842e-02,  1.8714e-02,
        -2.2659e-02,  5.7755e-03,  1.3968e-04,  2.4747e-01, -1.8710e-02,
         2.3354e-02,  6.1042e-02, -2.0275e-02, -5.4041e-02, -3.6635e-02,
        -2.0798e-02, -2.6407e-02, -4.6291e-03, -7.1798e-02,  7.4923e-03,
        -5.4092e-03, -1.5576e-02,  1.8491e-02,  8.3038e-02, -2.1178e-03,
        -1.8076e-01,  5.7190e-01, -1.7621e-01,  4.2068e-01,  7.0665e-01,
         1.2014e-01,  2.4317e-03,  2.1334e-01,  8.9095e-02,  1.7779e-01,
         9.3586e-03,  1.1895e-01,  3.2919e-02, -1.4552e-02, -2.2236e-02,
        -9.8121e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1543, -0.0534,  0.0995, -0.1497, -0.4383,  0.0121, -0.1324,  0.0332,
        -0.0334,  0.0160,  0.0179, -0.3465,  0.0130, -0.2544, -0.3919,  0.0491,
         0.0035, -0.0161, -0.1708, -0.0503, -0.2775, -0.3032, -0.1540, -0.0822,
        -0.2493, -0.0881, -0.0199, -0.1187, -0.0985, -0.0642,  0.0360, -0.0392,
        -0.1705,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1607, -0.5028, -1.1024, -0.4191, -0.1223,  0.1461, -0.2824,  0.0952,
         0.0441, -0.0517, -0.0546, -0.0070, -0.1611, -0.0708, -0.0157,  0.9367,
        -0.0249, -0.6676, -1.7534,  0.1309, -0.0447, -0.1103,  0.2084, -0.2572,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1196, -0.5337, -0.5274, -0.3411, -0.0592, -0.4708, -0.0067, -0.2646,
        -0.0144, -0.2140, -0.3593, -0.2539, -0.4606, -0.0132, -0.0826,  0.0283,
        -0.0194,  0.0176, -0.0240,  0.0260, -0.1419, -0.0853, -0.0136, -0.0849,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2258, -0.0231, -0.2177, -0.4769, -0.0677,  0.0614, -0.1679, -0.2639,
         0.0123, -0.0786,  0.0188, -0.0371,  0.0626, -0.0139, -0.3348, -0.5941,
        -0.5695,  0.0901, -0.0613, -0.1748, -0.3246,  0.1042, -0.1175, -0.4462,
         0.0440, -0.0449, -0.2737, -0.0279,  0.0369, -0.1360, -0.1057, -0.1335,
         0.0235,  0.1690,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5354,  0.1157, -0.0640,  0.0776, -0.5941, -0.0107,  0.0429,  0.0329,
         0.0167,  0.0602, -0.0490,  0.0075, -0.0118, -0.6248,  0.1552,  0.0498,
        -0.3686,  0.0729, -0.2069,  0.0372, -0.2743, -0.6153,  0.3600, -0.0831,
         0.1070,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1048,  0.0848, -0.0861, -0.0913, -0.0038, -0.0781, -0.8330, -0.4396,
        -0.9484, -1.1679,  0.3111, -0.1360, -0.1245,  0.3234, -0.4592,  0.2807,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1337, -1.3481,  0.1679, -0.6509, -0.3108, -0.4710, -0.9779, -0.1038,
         0.2133,  0.0439,  0.1290, -0.1180, -0.2706, -0.0916,  0.1222,  0.0401,
        -0.1106, -0.2352, -0.1437, -0.0673, -0.0391,  0.1966,  0.2621,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0985, -0.1334,  0.2342,  1.6392,  3.2397, -0.5309, -0.4887, -0.2060,
         0.0678, -0.5578, -0.3834, -0.3369, -0.5909,  0.8095,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.0898, -0.4987, -0.5077, -1.0451, -0.1616, -0.0499, -0.2462,  0.2084,
        -0.3077, -0.3008, -0.4487, -0.7187, -0.2932,  0.0246, -0.0228,  0.1122,
        -0.2194, -0.6498, -0.1177, -0.5089, -0.1016, -0.0239,  0.1000,  0.1583,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2004, -0.2127, -0.8293, -0.1032, -0.8133,  0.0768, -1.4258,  0.0871,
         0.0120,  0.3263, -0.2664,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1406,  0.1005,  0.1507, -0.0337, -0.0109, -0.3875, -0.7054,  0.0337,
        -0.5442, -0.1737, -0.1896,  0.0579, -0.2702, -0.0686, -0.2860,  0.0021,
        -0.1057, -0.3329,  0.0442,  0.1136, -0.0469,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0101,  0.0148,  0.0662,  0.3389,  0.1440,  0.1015,  0.1097, -1.1785,
        -0.6713, -0.6362, -0.1237,  0.0901, -0.2733, -0.0816,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1489, -0.1515, -0.0296, -0.0042, -0.0266,  0.0658, -0.0561, -0.1225,
        -0.0873,  0.0351,  0.3790,  0.3999,  1.2328, -0.1672,  0.1166,  0.2424,
        -0.1113,  0.1449, -0.1389,  0.0373,  0.0308,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3552, -2.2776, -0.0943, -0.8928,  0.0870, -0.6304, -0.2324, -1.0592,
        -0.3108, -0.4330,  0.0065, -0.0754,  0.1700,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0966, -0.7128, -0.0819, -0.0376, -0.0817, -0.0381, -0.2146, -0.4056,
        -0.1102, -0.0244, -0.0089, -0.0370,  0.0823, -0.0146,  0.0247, -0.0864,
         0.0231, -0.0406,  0.0121,  0.0137, -0.0583,  0.0943, -0.1097, -0.1139,
        -0.9570,  0.1468, -0.5241, -0.4201, -0.4833, -0.0641, -0.1821, -0.0146,
        -0.0454, -0.0198, -0.0043,  0.0294, -0.4234, -0.0540,  0.0052, -0.0396,
        -0.1332,  0.0289,  0.0260, -0.0156, -0.2662], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2214,  0.2252,  0.6013, -0.0156,  0.0316, -0.0051, -0.0071,  0.1083,
        -0.0171, -0.0163,  0.6387, -0.0125,  0.1128, -0.0342,  0.3375, -0.0092,
         0.7822, -0.0696,  0.0443,  0.0339,  0.1084,  0.3465, -0.1098,  0.6843,
         0.0199, -0.0342,  0.3685,  0.3626,  1.2906,  0.2648,  0.0904,  0.0584,
         0.1391,  0.1944,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5065, -0.1692, -0.2783, -0.1401, -0.1044, -0.0837,  0.0927,  0.0367,
         0.0785,  0.0625, -0.1516, -0.0858, -0.0259, -0.0511,  0.0440,  0.1095,
        -0.0050, -0.1477, -0.1231, -1.5679, -2.3862,  0.0852, -0.0423,  0.0515,
         0.0461, -0.0231,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6900e-01, -2.8032e+00, -5.7776e-02, -4.3622e-01,  1.3372e-01,
        -6.9505e-02,  9.3465e-02, -2.6919e-01, -8.3230e-04,  7.6884e-02,
         1.2736e-01,  2.9081e-02,  1.0588e-01, -6.5869e-02, -8.7803e-02,
        -4.2823e-01, -5.4952e-01,  1.8833e-01, -2.5940e-01, -2.4616e-02,
        -1.4714e-01,  5.4986e-02,  9.0818e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2772, -0.5418, -0.1496, -0.1580,  0.0476,  0.0910,  0.0146, -0.2539,
        -0.4962, -0.0539,  0.1567,  0.0040,  0.0514,  0.0383, -0.8878, -0.0031,
        -0.0485, -0.2872, -0.5430, -0.1765, -0.1249, -0.0249,  0.0233,  0.1577,
        -0.0050, -0.0412, -0.0932, -0.3506,  0.1070,  0.0311, -0.0613,  0.0367,
        -0.0301,  0.2112, -0.0588,  0.0392, -0.1451,  0.0245,  0.0326,  0.0466,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0571,  0.0142, -0.0198, -0.2046, -0.3097,  0.0092, -0.0217, -0.2423,
         0.0783, -0.0482, -0.0223, -0.0440, -0.1292, -0.0290, -0.2135, -0.2104,
        -0.2062, -0.0545, -0.1895, -0.1721, -0.0125, -0.3322, -0.2119,  0.0362,
        -0.0655, -0.0352, -0.0562, -0.3565, -0.0394, -0.2407, -0.4641, -0.0823,
        -0.0575, -0.0390,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.4043, -0.3558, -0.1864, -0.0586, -0.4309, -0.2026,  0.0399,  0.0958,
         0.0198,  0.0274, -0.0405, -0.1123,  0.1732, -0.0649, -0.6969,  0.0373,
        -0.0801, -0.0368, -0.3806, -0.0049,  0.0089, -0.0119,  0.0074,  0.0124,
        -0.2370, -0.4843,  0.1507,  0.0008,  0.1060, -0.0318, -0.1369,  0.0174,
         0.0371,  0.0205, -0.3513,  0.0547, -0.1187, -0.2147, -0.0162, -0.2048,
        -0.2675, -0.0972, -0.3315, -0.1304,  0.0054, -0.0008, -0.0058, -0.0174,
        -0.0766, -0.0481, -0.1480, -0.0446], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1368, -0.0622,  0.0707, -0.1419,  1.3331,  0.2361,  0.0375, -0.3024,
        -0.0288,  0.0902, -0.0809, -0.0611,  0.0780,  0.7977,  0.2544,  0.2633,
         0.8834,  0.1680,  0.2717, -0.0266,  0.1621,  0.0538,  0.1626,  0.6822,
        -0.0027,  0.0181,  0.0786,  0.0477, -0.0515,  0.0642, -0.0486,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5602e-01, -1.0461e+00,  3.1684e-02, -1.7683e-01,  6.0640e-02,
         1.8877e-01, -7.9564e-02, -8.1472e-02,  3.5652e-02,  2.2492e-01,
         2.3833e-02, -5.0123e-01,  3.2758e-04, -5.9131e-02, -4.5971e-01,
        -3.2747e-01, -8.3066e-02, -4.5233e-01, -7.9995e-01, -6.5194e-01,
        -1.4483e-01, -1.9358e-02, -8.1650e-02, -2.5144e-02, -1.5738e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0775,  0.3267,  0.0262,  0.0691, -0.1895, -0.6526, -2.0413, -0.2694,
        -0.0216, -0.3589,  0.3131, -0.4441,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3024e-02, -1.9108e+00, -1.8508e-01, -1.8828e-01, -3.3978e-01,
        -1.1174e-01, -4.0052e-02, -4.9599e-02, -5.4357e-04, -4.1326e-02,
         7.6628e-02, -1.0917e-01, -3.6866e-01, -5.6867e-01,  4.1132e-02,
        -2.5239e-01,  3.0858e-04, -3.6199e-02, -8.9727e-02,  1.4734e-02,
        -1.6202e-01, -8.0659e-02, -1.6380e-01, -1.6813e-01, -2.4673e-01,
        -1.6410e-01,  8.7404e-03,  3.1495e-02,  1.4498e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1918,  2.4744, -0.0184,  0.1808, -0.1438,  0.4414,  1.9446,  0.1632,
         0.0162, -0.3956, -0.2795,  2.3543,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6648, -2.1044,  0.3010, -0.5661,  0.0326, -0.4846, -0.8273, -0.1755,
        -0.3726, -0.2553,  0.0329, -0.0633,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1080,  0.2622, -0.6399, -0.0325, -0.8396,  0.0350,  0.0821, -0.0742,
        -0.7787, -0.2877,  0.0151, -0.6973, -0.1518, -0.0453,  0.1128, -0.5046,
        -0.0296, -0.1093,  0.0344, -0.0920,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5937e-01, -1.3462e+00,  6.0911e-02, -4.6154e-01, -7.9645e-01,
        -1.5822e-01, -1.7774e-01, -9.1137e-02, -9.8120e-02,  1.9540e-03,
        -5.9336e-02, -2.8711e-01, -8.1856e-02, -7.8641e-02, -9.7972e-02,
         3.8241e-02, -1.7614e-01,  4.0501e-02, -4.0273e-02, -7.2756e-02,
        -8.9454e-02,  6.2513e-02, -5.1072e-03, -1.6117e-04,  1.1190e-02,
         7.3341e-02,  9.1099e-02, -1.9146e-01, -2.3832e-01,  2.2453e-02,
         6.0419e-02,  6.4196e-02, -1.2414e-01, -4.6628e-01, -1.0565e-01,
         1.1731e-01, -1.1584e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5113, -4.8752, -0.4044,  0.4285, -0.2384, -0.0068,  0.3533, -0.2111,
        -0.0675,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4993,  0.0942, -0.0615, -0.8279,  0.0127,  0.0563, -0.4194, -0.3605,
         0.0608,  0.0043, -0.0942, -0.0216, -0.4186, -0.5912, -0.0644,  0.1078,
         0.0087,  0.1175, -0.3300, -0.2869,  0.0466,  0.0717, -0.0084,  0.0062,
         0.0819,  0.1335, -0.0224,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3378, -2.2879,  0.2889, -0.6273, -0.1715, -0.1257, -0.0837, -0.0511,
         0.0211,  0.0484,  0.0417, -0.7814, -0.0186,  0.1065, -0.0420,  0.1432,
        -0.0753,  0.0583,  0.0324,  0.1043,  0.0698, -0.3933, -0.4180, -0.1070,
        -0.0359, -0.1132, -0.0439, -0.2283, -1.0037, -0.0767,  0.0781, -0.0046,
         0.0561,  0.5958,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.0913,  0.0010,  0.0052, -0.0333, -0.2225, -0.0938,  0.0124,  0.0252,
         0.0492,  0.2170, -0.0366, -0.5122,  0.2278,  0.0502,  0.0290, -0.1763,
        -0.6274,  0.2831, -0.2017, -0.0675,  0.0318,  0.0034, -0.1836, -0.0639,
         0.1130, -0.0090,  0.0039, -0.0256,  0.0276,  0.0285, -0.0607, -0.3993,
        -0.3927,  0.0875, -0.0850, -0.0741, -0.3069, -0.0457,  0.0491,  0.0564,
        -0.0502, -0.2039,  0.0168,  0.0601,  0.1306,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2753, -0.0092, -0.1306, -0.0054, -0.3384, -0.8669, -0.6896, -0.0268,
         0.0275, -0.7170, -0.3822, -0.0833, -0.5023, -0.1999, -0.2343, -0.0878,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7032,  0.1508, -0.2029, -0.0868,  0.0258, -0.1586,  0.0298, -0.3424,
        -1.3068, -1.0995, -0.3011, -0.6777, -0.0092, -0.2247, -0.4588,  0.0472,
        -0.0471, -0.0431,  0.0261, -0.0962, -0.1219,  0.1025,  0.0386,  0.0291,
        -0.0355,  0.1345, -0.1174,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0035, -0.0715, -0.3572,  0.0123, -0.1246, -0.0199, -0.0937, -0.0595,
        -0.1186, -0.0178,  0.0054, -0.0143, -0.1799, -0.0384, -0.0994, -0.0245,
        -0.1487,  0.0162, -0.0034, -0.2370,  0.0580, -0.1769, -0.0422, -0.4919,
         0.0395,  0.0111, -0.1579, -0.2911,  0.0512, -0.2424,  0.0156, -0.1721,
        -0.3488, -0.1042, -0.1317, -0.2129, -0.0978, -0.1266, -0.0154,  0.0536,
        -0.0955, -0.2562, -0.0287, -0.0637,  0.0643,  0.0758], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6353e-02,  1.2573e-01,  2.2676e-01,  3.9359e-02,  7.0150e-02,
        -1.1499e-01,  3.0495e-02, -6.1545e-01,  1.0280e-01,  1.6708e-02,
         1.8363e-02,  9.6917e-04,  9.6541e-02,  2.6147e-02,  2.8836e-02,
         4.6625e-02,  1.0968e-02, -1.3818e-01, -8.5919e-01, -1.7133e+00,
        -3.0299e-01,  5.3235e-02,  1.2007e-03, -1.8652e-02,  3.2243e-03,
        -3.5523e-02, -3.2619e-01,  4.8085e-01,  2.0607e-01, -2.6257e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2874,  0.0304, -0.4609,  0.0493,  0.0596,  0.4529,  0.5328,  1.6769,
         0.2912,  0.2729,  0.4844,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3411, -1.6046, -0.1869, -0.1802, -0.2864, -0.1300, -0.2210, -0.7346,
        -0.0057,  0.0170, -0.0161,  0.0419, -0.0596, -0.1035, -0.0858, -0.3868,
        -0.0531, -0.1498, -0.0766, -0.2441,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3727,  2.7865,  1.5513,  0.9965, -0.2433,  0.2575,  0.7299, -0.0443,
         0.0410,  0.3107, -0.3544,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3758e-01,  2.2320e+00,  1.1448e+00,  1.2295e+00,  3.5493e-02,
         3.9957e-01,  1.6688e-01,  2.2863e-01,  4.6804e-01, -8.8903e-02,
        -2.9889e-02,  4.8177e-02,  9.8625e-04,  1.3060e-01,  7.1099e-02,
         4.0096e-01, -1.1694e-01,  7.5839e-03, -5.9540e-03, -2.4289e-03,
        -4.5576e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4555, -1.7126, -0.6531,  0.3787,  0.0269, -2.0209,  0.0843, -0.0778,
        -0.0640, -0.0884,  0.0816, -0.2339,  0.0591, -0.0495, -0.0765, -0.0229,
        -0.0459,  0.0226,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0300, -0.7445, -0.5139,  0.0362, -0.0018,  0.0876, -0.0360,  0.0169,
         0.0068, -0.0345, -0.0158,  0.0087,  0.0270, -0.2524, -0.3101,  0.0846,
        -0.1385, -0.6041, -0.2126, -0.2265, -0.1184, -0.2501, -0.4887, -0.0142,
        -0.2018,  0.0080, -0.1723, -0.0402, -0.0424, -0.3215,  0.0099, -0.0132,
        -0.0486,  0.0522, -0.0836, -0.0199, -0.0148,  0.0308,  0.0772,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2892e-01, -2.7314e+00, -4.8641e-02, -4.4981e-01, -8.1278e-02,
        -4.1470e-02,  5.9382e-02,  1.2710e-01, -5.2791e-01, -5.5045e-02,
         1.9590e-02, -7.8018e-01,  1.3890e-01, -1.4172e-02,  5.8036e-02,
         4.6061e-02, -4.3850e-01,  1.1084e-02,  9.1155e-02,  3.3807e-02,
        -5.4434e-02,  6.2392e-02,  2.8968e-02,  4.7733e-02, -7.1759e-03,
        -5.5096e-03, -1.6578e-01,  9.4249e-03, -3.1724e-01, -5.3829e-01,
        -2.4668e-03,  6.0356e-03,  6.2753e-02, -2.7003e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.8085, -0.0507, -1.4923, -1.1830, -0.1417, -0.4777, -0.1769, -0.3168,
        -0.2881,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8533e-01, -1.6831e+00, -9.3781e-02, -3.8508e-01, -5.2569e-02,
        -5.5522e-01,  3.3087e-02, -1.7945e-01, -2.2772e-01, -8.6646e-03,
        -2.3427e-01, -1.8543e-01, -6.0655e-03, -7.0738e-02, -1.7056e-01,
        -6.8087e-01, -6.3012e-02, -8.5719e-01, -1.1314e-01, -4.4040e-02,
         8.4367e-02,  6.1493e-02,  4.6996e-02,  1.8938e-02, -3.1758e-01,
        -1.6874e-04, -7.1973e-02, -3.0597e-04,  8.4299e-03,  1.1080e-02,
        -7.2141e-02, -2.6768e-01,  8.1444e-02, -2.4535e-01, -9.0764e-02,
        -5.6172e-02,  7.9233e-02,  3.0592e-02, -1.2804e-01,  2.3532e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3678, -0.7915, -1.3165, -0.1826, -0.4068, -0.0820,  0.0426, -0.0620,
        -0.3388, -0.2786,  0.0488,  0.0535,  0.0230,  0.0302, -0.0299,  0.1030,
         0.0841, -0.1198, -0.3150,  0.0024, -0.2963, -0.0109, -0.0063, -0.1870,
         0.1743, -0.1828, -0.2709,  0.0269, -0.2531, -0.4776,  0.0159,  0.0047,
         0.0237, -0.0717,  0.0254,  0.0757, -0.3228, -0.2809, -0.0057, -0.0078,
         0.0466, -0.0367,  0.0147,  0.0267,  0.0177,  0.0104,  0.0365, -0.0152,
         0.0378,  0.0559, -0.1976,  0.0394,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4286e-02, -1.8775e+00, -1.9647e-01, -2.5557e-01,  8.7638e-02,
        -1.0650e-01, -5.7589e-01, -9.2889e-02, -1.6263e-01,  4.4777e-02,
        -2.1498e-02, -2.4628e-01, -6.4324e-01,  1.6864e-03, -2.0607e-01,
        -4.5119e-01, -3.0752e-01, -5.9538e-02, -3.7505e-01, -6.7541e-03,
         9.5005e-02,  2.2539e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5788e-01,  3.6575e+00, -1.0785e-01, -2.1066e-04, -5.8049e-02,
        -9.6300e-02,  2.5141e-02,  3.2627e-01,  6.6284e-02,  3.4531e-02,
         2.4358e-01,  5.2149e-02,  6.5811e-02,  6.1457e-02, -2.3509e-01,
         4.4217e-01,  1.4587e-01,  1.2598e-01,  6.9215e-02, -2.2126e-01,
         2.0367e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0464e-02, -8.3377e-01, -2.1968e-01, -1.8532e-01, -9.8614e-02,
        -9.0687e-03,  3.8269e-02,  7.5804e-02, -7.0408e-02,  5.1353e-03,
        -6.6310e-03, -2.0434e-02,  4.1978e-02,  7.3204e-02,  1.8242e-02,
        -4.7065e-02, -7.0496e-04, -1.0751e-02, -1.0117e-01, -5.9436e-03,
         3.2268e-02,  1.7189e-02, -1.5224e-02,  1.3061e-02,  2.8388e-03,
        -3.4878e-02, -7.6406e-02, -1.4546e-01, -1.7128e-02, -1.8488e-01,
        -3.9809e-01, -5.5448e-01, -2.3210e-02, -1.5633e-01, -2.7676e-01,
        -3.6841e-02,  4.0737e-02,  8.8455e-03,  7.4867e-02,  1.7765e-02,
        -2.7012e-01,  1.3469e-03, -3.0306e-03, -9.4430e-02, -2.0750e-01,
        -3.7986e-02, -2.4972e-01, -9.9913e-02,  1.1375e-02,  9.0155e-02,
        -1.0071e-01, -1.3583e-01,  2.4607e-02,  6.9736e-02, -1.6234e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1024, -0.0996,  0.0046, -0.1083, -0.0780, -0.1171, -0.4508, -0.0812,
        -0.0046, -0.0035, -0.0118, -0.0151,  0.0127, -0.0861, -0.2485, -0.1532,
        -0.0486, -0.0968,  0.0064, -0.1675, -0.0279, -0.1490, -0.1831, -0.0082,
         0.0227,  0.0053,  0.0204, -0.2238, -0.0281,  0.0456,  0.0781,  0.0044,
        -0.0555, -0.2931,  0.0937, -0.1347,  0.0284, -0.2083, -0.1899, -0.1554,
        -0.0377,  0.0067,  0.0603, -0.0293,  0.1325,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2138, -1.7332, -0.8534, -0.4788,  0.2775, -0.1283, -0.1354, -0.0072,
        -0.5740,  0.0165,  0.0483, -0.3347, -0.0036, -0.4119, -0.6157,  0.0060,
        -0.0825, -0.3837, -0.0728, -0.2212, -0.5092,  0.0038, -0.2633,  0.0882,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1925,  0.1484,  0.0080,  0.0216, -0.0409, -0.0128, -0.1383, -0.1951,
        -0.0367, -0.0125,  0.0723,  0.0056, -0.0632, -0.3115,  0.0171,  0.1261,
        -0.2157, -0.2593, -0.2212, -0.3082,  0.0448,  0.0523, -0.1827, -0.2983,
         0.1567, -0.0716, -0.3409, -0.0745, -0.0941, -0.2463, -0.0882, -0.1316,
         0.0913,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4132, -4.0994, -0.0876, -0.8814,  0.0056, -0.0970, -0.1589, -0.0614,
        -0.9021,  0.0523,  0.1177, -0.0297,  0.5263, -0.5481,  0.2479, -0.2291,
        -0.1946, -0.0582, -0.5145,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3143, -0.0042,  0.0721, -0.3348, -0.6240, -0.1312, -0.3260, -0.4010,
        -0.5995, -0.0420, -0.0417, -0.0191,  0.0451, -0.0088, -0.0188, -0.0266,
         0.0076, -0.1699, -0.0482, -0.0868, -0.4247, -0.4111, -0.2898,  0.0057,
        -0.2609,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4615, -1.4874, -0.8310, -0.1411, -0.0907, -0.4492, -1.0726,  0.3016,
        -0.0554, -0.0131, -0.0707,  0.1040, -0.4409, -0.0078, -0.0034, -0.0644,
        -0.0635,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.0354, -0.0426,  0.1764,  0.0479,  0.0047,  0.1370,  0.1052, -0.0372,
        -0.0529,  0.4538,  0.1504,  0.0687, -0.1028,  0.0087, -0.0886,  0.1494,
         0.8571,  1.3504, -0.2695,  0.4307,  0.1133,  0.8334,  0.7723, -0.0645,
        -0.2240, -0.1082, -0.1750, -0.1044,  0.0210, -0.0500,  0.1662, -0.5283,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1679, -0.6190, -1.1444, -0.1533, -0.0910, -0.1183, -0.2819, -0.4908,
         0.0124,  0.0033,  0.0713, -0.0683, -0.0069,  0.0355, -0.0914,  0.1248,
        -0.0663,  0.0836, -0.3187, -0.1938, -0.1352, -0.0654, -0.1613, -0.5443,
        -0.0546, -0.0561, -0.1145, -0.0640,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3923,  0.1113, -0.0839,  0.0377,  0.0065,  0.4369,  0.9914,  0.0566,
         0.0097, -0.0663, -0.0790,  0.6704, -0.0307, -0.1120, -0.0943,  0.2966,
        -0.0068,  0.2521,  0.2041, -0.0198, -0.0176, -0.0042,  0.0492, -0.0191,
         0.3280, -0.2429, -0.0833,  0.0800,  0.1423,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1679, -0.5520,  0.0274, -0.4061, -0.7611,  0.0577, -0.0875, -0.0220,
        -0.0298,  0.1235,  0.0659,  0.0299, -0.2085, -0.7158,  0.2055, -0.2601,
        -0.0511, -0.6928, -0.1553, -0.1292,  0.0911,  0.0665, -0.1447, -0.0147,
        -0.1427, -0.2239, -0.3076,  0.0160, -0.0050, -0.0961, -0.0584,  0.1125,
         0.1133,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2511, -1.7180, -0.0727, -0.4091, -0.1034, -0.2920, -0.0788, -0.4197,
        -0.0847, -0.2864, -0.3985, -0.0476,  0.0940,  0.0120, -0.0214,  0.0587,
        -0.0850, -0.0620, -0.5385, -0.0567, -0.2976, -0.0425,  0.0585,  0.2062,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1872, -0.7754, -0.8812, -0.0220, -0.4217, -0.1247, -0.2090, -0.2629,
         0.0807, -0.2610, -0.0620, -0.3035, -0.2789, -0.1278, -0.0071,  0.0113,
         0.0500, -0.1711,  0.0590, -0.5044, -0.2429, -0.0994,  0.4579,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1214e-02, -1.2381e+00,  8.0403e-04,  1.1161e-01, -5.1753e-02,
        -1.4382e-01,  5.0290e-02,  2.3334e-02, -4.9989e-02, -5.0959e-02,
        -1.3988e-01, -1.3673e-02, -1.1185e-01,  1.2304e-01, -1.3020e-02,
        -3.3467e-02, -4.9782e-01, -2.8236e-01, -2.3895e-01, -3.8261e-02,
         1.0611e-01, -1.5035e-01, -1.2708e-02, -1.4257e-01, -3.7672e-01,
        -2.7692e-02, -3.1879e-02, -2.2077e-02,  1.2392e-02, -3.2589e-01,
        -3.7625e-01, -8.7725e-02, -1.9826e-01, -1.3505e-02, -1.9988e-01,
        -3.9268e-01, -5.1068e-02, -1.2335e-01, -9.6407e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1747, -0.4263,  0.0033, -0.2339, -1.0641, -0.0647,  0.1154, -0.0275,
        -0.0574,  0.0444,  0.0105,  0.0644,  0.0678,  0.0653,  0.0370, -0.3615,
         0.0416,  0.0022,  0.0136,  0.1144, -0.2318,  0.0048, -0.0342, -0.0240,
        -0.4300, -0.6766, -0.1023, -0.2109, -0.0818, -0.4101,  0.0170, -0.0532,
         0.1283, -0.0255, -0.0195, -0.0251, -0.0924, -0.2920,  0.1118,  0.0567,
        -0.0653, -0.0869, -0.0980,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1683, -1.3280,  0.0313,  0.0484,  0.1110, -0.0364, -0.2006,  0.0071,
        -0.5290,  0.0349,  0.0484,  0.0262, -0.1533, -0.0882, -0.0221, -0.0799,
        -0.3156, -0.4229, -0.1089,  0.0027,  0.0019,  0.0623, -0.0151,  0.0717,
        -0.0029, -0.0085,  0.0265,  0.1484,  0.0817,  0.0508,  0.0640, -0.3766,
        -0.7100, -0.2023, -0.0411, -0.0807, -0.0616, -0.1534, -0.1995, -0.1915,
        -0.2663, -0.3309, -0.0371, -0.0117, -0.0568, -0.0697, -0.0149, -0.0346,
        -0.0340, -0.0278, -0.0727,  0.0201, -0.0851, -0.0340,  0.0073,  0.0508,
        -0.1373], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6316,  2.7960,  0.2130,  0.1614,  0.3581,  0.0217,  0.7602,  0.7407,
         0.0113,  0.2632,  0.5465,  0.0425, -0.2019,  0.0330,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2301, -1.3874, -0.0337,  0.0358, -0.0260, -0.1273, -0.0417, -0.0076,
        -0.2275, -0.0054, -0.3428, -0.5456, -0.0982,  0.0347, -0.2276, -0.1141,
        -0.0751, -0.1807,  0.0894, -0.3437, -0.6261, -0.0649, -0.2171, -0.0122,
         0.1089,  0.0330,  0.0359,  0.0190, -0.0071,  0.1009,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1570, -1.9568,  0.2481, -1.0716, -0.4353,  0.3952,  0.0040,  0.1224,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.4243, -0.8828, -0.0857, -0.2162, -0.2243, -0.4959,  0.0081, -0.0108,
        -0.0484, -0.0309,  0.1499,  0.0051, -0.3066, -0.4491, -0.0193,  0.0427,
        -0.0114, -0.0123,  0.0260,  0.0209, -0.0289,  0.0488,  0.0156, -0.0345,
        -0.3363,  0.0515, -0.2371, -0.0505, -0.3015, -0.4016,  0.0394, -0.1099,
        -0.2143,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1284e-01, -1.4253e+00, -6.2630e-01, -8.3864e-02, -1.0883e-03,
        -1.8591e-01, -8.1402e-02, -4.4090e-02,  6.9572e-02, -7.7034e-02,
         5.5623e-03, -9.5477e-02, -8.5279e-02,  4.5438e-02,  9.6735e-03,
        -2.8272e-02, -6.6967e-03,  1.3226e-03, -8.7709e-02,  1.2043e-02,
        -6.6377e-03, -2.0283e-02, -6.6869e-02, -4.6593e-04,  8.2150e-02,
        -4.5064e-02, -9.2946e-02, -3.6387e-01, -2.3084e-02,  4.2069e-03,
        -2.1690e-02, -7.6795e-02, -3.4547e-01, -7.1295e-01, -7.5828e-03,
        -5.7371e-02, -1.3404e-01, -1.7847e-02, -3.3898e-02,  8.6926e-03,
         2.7053e-02,  1.9981e-04, -4.7175e-02, -3.8079e-01, -6.7915e-02,
        -1.7300e-02, -2.8338e-03, -7.5271e-03, -5.3702e-02, -6.8938e-02,
        -6.2945e-02, -2.1549e-01, -4.4752e-02, -1.6694e-01, -5.4200e-02,
         1.2066e-02, -9.2578e-03,  6.2315e-03,  1.3643e-03, -7.8267e-02,
        -7.2002e-02, -8.3011e-02, -5.2193e-01, -3.6342e-02, -7.1694e-03,
        -5.3659e-02, -4.1366e-02,  9.8753e-02,  9.4780e-02, -6.4914e-03,
         8.9342e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1556, -0.0342, -0.1498,  0.0214, -0.2567, -0.0451, -0.2416, -0.0370,
         0.0259,  0.1171, -0.0375,  0.0369,  0.0540,  0.0277,  0.1003,  0.0747,
        -0.0515,  0.0351, -0.1456, -0.7296, -0.0329, -0.0593,  0.0505, -0.2995,
         0.2041, -0.4730, -0.0882, -0.0052,  0.0116, -0.0061, -0.0441, -0.0629,
        -0.4327, -0.3801, -0.0628, -0.0385, -0.4514, -0.3788, -0.0295, -0.0366,
         0.0993,  0.0055,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2205, -1.6194,  0.1392,  0.0612, -0.0389, -0.0272,  0.0353, -0.2052,
        -0.0661, -0.2479, -0.1645, -0.3482, -0.3801, -0.0149,  0.0516, -0.0154,
        -0.0168, -0.4605, -0.6059, -0.0368, -0.0598, -0.2092,  0.0419, -0.0079,
        -0.0470, -0.4545, -0.1970, -0.1328, -0.5665,  0.0740, -0.0319, -0.0487,
         0.0076, -0.0507, -0.1231,  0.0355,  0.0497,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5149, -2.6759, -0.2777, -0.7275,  0.0402,  0.1404,  0.0045,  0.0851,
         0.1462,  0.0151, -0.2248,  0.0652, -0.2583, -0.1602, -0.1993,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1934,  0.2537,  0.0158,  0.0350,  1.1920,  1.0543,  0.1952,  0.6064,
         1.2412,  0.2715, -0.1769,  0.0798,  0.0855, -0.0118, -0.2722,  0.0179,
         0.0947,  0.0730,  0.1168, -0.3543,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0629,  0.0304, -0.0423,  0.0456, -0.5460, -0.0521, -0.0460, -0.1405,
        -0.0562,  0.0268, -0.2714, -0.4538, -0.0478, -0.0502, -0.2308, -0.1639,
         0.0594, -0.1919,  0.0769, -0.2194, -0.0517, -0.0145, -0.0490, -0.0385,
        -0.0061, -0.2316, -0.3859,  0.0061, -0.0520, -0.1374, -0.4119, -0.0041,
         0.0194,  0.0015, -0.2417, -0.1907,  0.0410, -0.1600, -0.0261, -0.0226,
        -0.0427, -0.0401,  0.1525,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0259,  0.0090, -0.0766, -0.0264, -0.1634, -0.0295, -0.2411, -0.7131,
        -0.0584, -0.0155,  0.0896, -0.0016,  0.0220, -0.5663, -0.6817,  0.0690,
         0.0381, -0.0060, -0.0354, -0.0104, -0.3888, -0.0364,  0.0617, -0.0019,
        -0.0653,  0.0062, -0.0946, -0.0406, -0.2624, -0.0511, -0.0640, -0.2286,
        -0.0093, -0.2719, -0.3548, -0.1251,  0.0559, -0.0881,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4081, -2.6987, -0.9473, -0.8655,  0.0819, -0.7244, -0.3970, -0.3674,
        -0.1170, -0.5605, -0.0906, -0.1639, -0.5070, -0.1134, -0.0501, -0.2095,
         0.0336, -0.4101, -0.1278,  0.1292,  0.0855,  0.0588,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3373e-01, -8.9084e-01, -2.3111e-01, -9.3061e-02, -7.3157e-02,
        -3.0910e-01,  3.4192e-02, -1.6584e-02, -1.1471e-01, -1.1043e+00,
        -9.9137e-02,  2.5771e-01, -5.2777e-02, -1.7874e-01, -4.2432e-01,
        -4.0005e-01, -6.4090e-01,  4.0885e-02, -4.7450e-02, -2.3299e-01,
        -4.9643e-02, -9.6720e-02, -4.0642e-01, -1.4967e-01, -8.2680e-04,
        -1.2886e-02, -4.3053e-02,  2.5034e-01, -3.6206e-02, -5.1375e-01,
        -2.6092e-01,  6.7448e-02, -4.1436e-02, -7.3735e-02, -1.8401e-01,
        -6.4889e-01,  7.1638e-02, -6.0889e-03,  7.8892e-02, -1.8982e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2506,  0.0029,  0.0251, -0.0350, -0.0237, -0.1613, -0.0356,  0.0994,
        -0.1491, -0.0084, -0.0202,  0.0058, -0.0563, -0.5502, -0.3723, -0.0409,
         0.0992,  0.1085, -0.3038, -0.0414, -0.0057, -0.0221, -0.0187,  0.0882,
        -0.0053, -0.5092, -0.1111, -0.3943, -0.6411, -0.2266, -0.0272, -0.1032,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1906e-01, -3.0883e+00, -2.0149e-01, -1.2987e-01, -2.3687e-01,
         6.6322e-02, -1.3254e-01, -6.1670e-01, -7.1910e-01, -1.4819e-02,
        -1.2927e-01, -1.6998e-01, -1.4392e-02, -1.5092e-02, -1.7538e-02,
        -2.5759e-02, -3.5152e-01, -1.4786e-02, -3.2767e-02,  1.4706e-03,
        -2.3517e-02,  3.8185e-02, -8.8472e-02, -4.3233e-01,  4.2995e-03,
         9.8344e-02, -3.1652e-01, -3.2163e-02,  3.0834e-02, -1.8989e-01,
         2.4346e-02, -4.3019e-03, -6.3089e-02,  8.5821e-02, -5.8927e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.0734, -0.0180, -0.9924, -0.7433, -0.1821, -0.0926, -1.2203, -0.7356,
        -0.3155, -0.7107,  0.0786, -0.3655,  0.0595, -0.2931, -0.1148, -0.0153,
        -0.1330, -0.0360,  0.0218,  0.0679,  0.0577,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2200, -2.7795,  0.1764, -0.4891, -0.7319, -0.2641,  0.0139, -0.4510,
        -0.2797,  0.2774,  0.0080,  0.0325,  0.0360, -0.0689, -0.1366,  0.0996,
        -0.1848, -0.0795,  0.2189,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0866, -0.4002, -0.5576,  0.0044, -0.1678, -0.0133,  0.0667,  0.1669,
         0.0940, -0.4482, -0.8425, -0.0295, -0.3774, -0.7500, -0.1470, -0.2739,
         0.1404, -0.1596,  0.1633, -0.2321,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0449, -1.5541,  0.1654, -0.3667,  0.1037, -0.0883, -0.2963, -0.1712,
        -0.0636, -0.0241, -0.0487, -0.1173,  0.0243, -0.3073, -0.2064, -0.2486,
        -0.3853, -0.0609, -0.2234, -0.2883,  0.1579,  0.0278,  0.0381,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0778, -0.9382, -0.3756, -0.0689,  0.0184, -0.3035,  0.1674, -0.0619,
        -0.2109, -0.1851, -0.0057, -0.4378, -0.0291, -0.5956,  0.1143,  0.1468,
         0.0082,  0.0039, -0.0480, -0.1882, -0.3617,  0.0224,  0.0736, -0.0301,
         0.0726,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3621,  3.1932,  0.3620,  0.4929,  0.0692, -0.0507, -0.2381,  0.0866,
        -0.2718, -0.1702,  0.7092, -0.0922,  0.0986,  0.2738,  0.0204, -0.0449,
         0.0788,  0.0217, -0.0409,  0.0499,  0.3915,  0.9364,  0.2896,  0.5808,
         0.3712,  0.1807,  0.0386,  0.0875,  0.1098,  0.1606,  0.0399,  0.0172,
         0.1143, -0.1009,  0.1787,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0537,  0.0269, -0.1815, -0.3959, -0.0423,  0.1127, -0.4720,  0.0553,
        -0.6331, -0.0436, -0.1003, -0.0829, -0.0619, -0.5541, -0.4197, -0.9960,
        -0.2133, -0.6345,  0.1334,  0.0635, -0.0312,  0.0819,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1818, -0.0492, -0.2143,  0.0137,  0.1071,  0.1204,  0.1136,  0.1072,
        -0.0980,  1.1586,  0.0391,  0.3108,  0.1040,  0.1162, -0.0207,  0.5761,
         0.1524,  0.0967, -0.1183,  0.0421, -0.7759,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4313, -2.7605,  0.0279, -0.0820, -0.1680,  0.0392, -0.5987, -0.9955,
        -0.0117, -0.1109,  0.1127,  0.0926,  0.0600, -0.5128,  0.0592, -0.4056,
         0.2080,  0.5227, -0.0615,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3622,  0.0923, -0.3562, -0.0558,  0.0499, -0.4398,  0.0424, -0.0454,
        -0.0172, -0.0325, -0.0668, -0.0834, -0.0036, -0.1236, -0.0026, -0.0523,
        -0.2091, -0.0684, -0.0251, -0.1603, -0.0327,  0.1765, -0.1908,  0.0423,
        -0.2784, -0.0103, -0.1615, -0.3103,  0.0061, -0.6434, -0.0438,  0.0203,
        -0.1872, -0.4272, -0.0200, -0.0236, -0.0954,  0.0073, -0.0467, -0.1631,
         0.0078,  0.0623,  0.0148, -0.0064, -0.0353,  0.0224,  0.0721],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2539, -1.4013, -1.0079, -0.1051,  0.0660,  0.0044,  0.0026, -0.1465,
        -0.2176,  0.0267, -0.0137, -0.0376,  0.0141,  0.0425,  0.0088, -0.0117,
        -0.6035,  0.0046,  0.0133, -0.0606,  0.0670,  0.0216, -0.3253, -0.2377,
         0.0583,  0.0858,  0.0225,  0.0126, -0.0095,  0.0390,  0.0324,  0.0335,
         0.0888, -0.1089,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1789,  3.0001,  0.1137, -0.1111,  0.1964,  0.2945, -0.0188, -0.0036,
         0.0358,  0.1282, -0.0927,  0.0325,  0.3588,  0.6277, -0.0334,  0.0165,
         0.0812, -0.2449, -0.0381,  0.6863,  0.0555, -0.0105,  0.1058, -0.0107,
         0.0440, -0.0355, -0.0189,  0.1421,  0.0918,  0.3435,  0.0815,  0.0213,
        -0.0088, -0.0167,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.6796e-02, -2.3711e+00,  8.4234e-02,  4.2162e-02, -1.8165e-01,
        -5.9368e-01,  2.8750e-01, -9.0734e-03, -2.1533e-03, -6.4787e-01,
        -1.6820e-01, -2.2817e-02, -2.8055e-01, -7.5070e-01, -2.0638e-01,
        -1.0553e-01,  3.1740e-02, -1.1593e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4790, -0.0491,  0.0147, -0.0077, -0.0196, -0.0585,  0.0877, -0.3053,
        -0.7154, -1.5492, -0.0750,  0.0923, -0.1560, -0.0920,  0.1089, -0.1608,
        -0.3103,  0.0248,  0.1929, -1.4356,  0.1239, -0.0671, -0.1672,  0.1565,
         0.1686, -0.0901, -0.1863,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6144, -0.0264,  0.0068,  0.1115, -0.0240, -0.0064, -0.0653, -0.2523,
        -0.7031, -0.0662, -0.0312, -0.1415, -1.4062, -0.0955, -0.0389,  0.1054,
        -0.0289, -0.0816, -0.0281, -0.8410,  0.1522,  0.0270, -0.0907,  0.0501,
         0.0914, -0.2231,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6013, -1.4174,  0.0083, -0.5197, -0.6337, -0.2165, -0.0933, -0.0261,
        -0.3567, -0.2341, -0.4282, -0.4639,  0.1013,  0.0952, -0.0225, -0.4541,
        -0.0787, -0.1696, -0.0321,  0.1601,  0.1298,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1276, -1.8995, -1.1533, -1.2695,  0.0379, -0.2403, -0.5592, -0.0655,
        -0.1119, -0.0684,  0.0785, -0.4104,  0.0723, -0.0862,  0.2821,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3764,  0.0235,  0.1345,  0.0376,  0.0328, -0.0620,  0.1345, -0.4228,
        -0.0426, -0.1869,  0.0106, -0.1116, -1.6051, -0.4360,  0.4189,  0.0174,
         0.4635, -0.0534,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2422, -1.8811, -0.6275, -0.9040, -0.0987, -0.2542,  0.0821, -0.4556,
        -0.2397, -0.4364, -0.6361, -0.3631,  0.0623, -0.3987, -0.1414, -0.0596,
        -0.0324,  0.1422,  0.4106,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1401, -0.8300, -0.4946, -0.1149, -0.0018, -0.1564,  0.0361,  0.0418,
        -0.1549,  0.0079, -0.0188,  0.0798, -0.1652,  0.0355,  0.0123, -0.0929,
        -0.2686, -0.0834,  0.0892,  0.0016, -0.3863,  0.0125, -0.1930, -0.0610,
        -0.1825, -0.0082, -0.3611,  0.0297, -0.0145, -0.1315,  0.0163, -0.0477,
        -0.2125, -0.0229, -0.0785, -0.1056, -0.1947,  0.0027, -0.1038, -0.0417,
         0.0120,  0.0097,  0.0169, -0.0753], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3037, -1.4848, -0.1167, -0.7117, -0.4475, -0.2964, -0.1308, -0.1719,
        -0.4879, -0.0873, -0.1301,  0.1194,  0.1464, -0.0062, -0.0047, -0.0410,
         0.0254, -0.3608, -0.0359, -0.0779, -0.1047,  0.0039, -0.0473, -0.0296,
        -0.1530,  0.0574, -0.0048, -0.6533,  0.0068,  0.0379, -0.0699, -0.0923,
         0.0442, -0.1950,  0.1553,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9631e-02,  3.3587e+00,  2.4097e-01,  4.8764e-01,  4.0775e-02,
         1.9814e-01,  8.4939e-01,  4.1998e-01,  2.1249e-01,  1.2314e-02,
         3.1833e-02,  2.9749e-01,  7.9429e-01,  3.4455e-02, -6.6597e-02,
         5.0415e-01,  2.3571e-03,  3.0603e-02,  3.5292e-02,  9.4601e-02,
        -2.3642e-02, -2.7032e-02, -1.1035e-01, -6.0365e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1229, -1.3705,  0.0242, -0.3355,  0.0097, -0.0243,  0.0722, -0.1128,
        -0.4520, -0.0315, -0.1003,  0.0483,  0.0556, -0.3049, -0.3866, -0.0065,
         0.1046, -0.0130, -0.3835,  0.0192,  0.0240, -0.2370, -0.0288, -0.2546,
        -0.0806, -0.3867, -0.5237, -0.2145, -0.0946, -0.1737, -0.2598, -0.0250,
        -0.1250, -0.1256,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2349,  0.0710, -0.0410, -0.2305,  0.0774,  0.0130, -1.0847, -0.2016,
         0.1395, -0.0943,  0.0920, -0.0133, -0.5947, -0.0268, -0.5875, -0.6158,
        -0.0901,  0.0108, -0.0476, -0.0391,  0.0403, -0.1188,  0.0679, -0.7495,
        -0.3168, -0.1127, -0.3123,  0.1219, -0.1251, -0.0439,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-1.2729,  0.2321, -0.1631, -0.7800, -0.0032, -0.0642, -0.1130, -0.0257,
        -0.5481, -0.9868, -0.0722,  0.0332, -0.0927,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1832,  0.1035, -0.2363,  0.3172,  0.0372, -0.1705, -0.4152, -0.5781,
        -0.8678, -0.0333,  0.0666, -0.3618,  0.3441, -0.2606, -0.5071, -0.0058,
        -0.0262, -0.0287,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2704, -1.9107, -1.2293, -0.1864, -0.7471,  0.0388,  0.0326, -0.1189,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0157, -1.4721, -0.0084, -0.1960, -0.1968, -0.2192, -0.0936, -0.1178,
         0.0651, -0.0084,  0.0770,  0.2089, -0.0432, -0.0185,  0.0332, -0.0145,
         0.0154,  0.0588,  0.0647, -0.0565,  0.0969,  0.0274, -0.1116, -0.1228,
         0.0217, -0.1450, -0.0990, -0.0153, -0.3094, -0.4065,  0.0316, -0.4423,
        -0.0765, -0.5191, -0.1158, -0.3424, -0.2966, -0.1107,  0.0181,  0.0426,
        -0.1145, -0.0700, -0.0743,  0.0081,  0.1775, -0.0090,  0.0222],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3519, -1.3128, -0.4716, -1.5374,  0.1271,  0.0835,  0.0169, -0.3758,
        -0.1739, -0.3007,  0.0633,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3921, -2.3043, -0.2745, -0.9891, -0.2862, -0.0332, -0.3626, -1.0307,
        -0.0502, -0.6277, -0.1992,  0.1595, -0.6574,  0.0082, -0.1323,  0.1494,
        -0.1211, -0.1829,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2662, -0.1204, -2.8540,  0.6690,  0.1398,  0.2095, -0.4007, -0.0355,
        -0.6945, -0.2735, -0.1206, -0.0282,  0.1896,  0.0564, -0.1935,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1862,  2.7582,  0.3367,  0.2799,  0.4151,  0.6275,  0.8874,  0.0309,
         0.3011,  0.8285,  0.8580,  0.0856, -0.1157,  0.9661,  0.0249,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4449,  0.0585,  0.0907, -0.1490, -0.0525,  0.1620,  0.0203,  0.0613,
        -0.6595, -0.0348, -0.0860, -0.2947, -0.5915,  0.0611, -0.0485, -0.1103,
        -0.3667,  0.0418,  0.1230,  0.0047, -0.5538, -0.8374,  0.3682, -0.1182,
        -0.0446, -0.0713,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7178, -0.0548,  0.2567,  0.1589,  0.2194,  0.0989,  0.4788, -0.6831,
        -2.4939, -0.0643,  0.0250,  0.0070,  0.2261,  0.1683, -0.1266,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6069e-02, -2.6852e+00,  1.7096e-02, -3.5324e-01, -4.8560e-01,
        -4.4094e-01, -3.0833e-01,  9.3445e-02, -6.0221e-02, -1.1812e+00,
         6.1580e-02,  1.0713e-02, -1.9115e-01, -8.8636e-02, -3.7211e-02,
         8.8955e-02, -8.2040e-02, -2.4371e-02, -2.1587e-01,  9.8691e-02,
        -2.7897e-01, -5.3415e-02, -4.7298e-02, -2.5937e-03,  2.6585e-01,
        -1.7128e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1340, -0.0304, -0.0248,  0.1186, -0.0342, -0.0889,  0.0488,  0.0377,
        -0.1823, -0.1811, -0.4006, -0.0151, -0.0718,  0.0017, -0.4810, -0.4530,
        -0.0284, -0.2257, -0.0402, -0.3442, -0.0407, -0.1581, -0.0015, -0.1582,
         0.0706, -0.0036,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.2027, -2.0082, -0.1660, -0.1511, -0.1597, -0.0261, -0.2129, -0.5418,
         0.0455, -0.2897, -0.0571, -0.2864, -0.4269, -0.0177, -0.1795,  0.0069,
         0.0077, -0.0151, -0.1612, -0.0882, -0.0356, -0.1379,  0.0915, -0.3382,
        -0.6765, -0.0513, -0.2489, -0.1041,  0.0353,  0.0688, -0.1184, -0.2093,
        -0.0545,  0.0418,  0.0344,  0.0676,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0734, -0.5532, -0.2801,  0.0302, -0.3726,  0.0283, -0.3593, -0.4095,
         0.0606,  0.0658, -0.1887, -0.0097, -0.0792,  0.1489, -0.0154, -0.0897,
         0.0024, -0.0251,  0.0640,  0.0745, -0.0084, -0.3710, -0.0979, -0.0871,
        -0.2909, -0.3632,  0.0270, -0.3415, -0.3043,  0.0374,  0.0245, -0.3561,
        -0.4566, -0.0555, -0.0830,  0.0494,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2911, -0.3717, -0.0021, -0.2844,  0.3207, -1.1520, -0.0166, -0.2591,
        -0.3179, -0.0656, -0.0415, -0.7064, -1.1277, -0.1408,  0.0853, -0.0331,
        -0.0965, -0.0961, -0.3102, -0.0635,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1940e-01, -8.2525e-01, -8.2659e-02, -1.4199e-01,  9.2945e-02,
        -4.4317e-02, -7.4644e-02, -8.7645e-02, -3.8946e-02, -2.1462e-01,
        -1.1022e-02, -8.9900e-02, -1.0802e-01, -6.5491e-04, -5.7234e-01,
        -2.1790e-01, -1.4635e-01, -2.2914e-01, -4.9519e-02, -2.9649e-02,
        -1.0624e-01, -5.6195e-02,  8.3509e-03,  1.1637e-01, -2.2737e-02,
         3.7105e-03, -1.5519e-02, -5.3641e-03, -7.8656e-02, -2.8642e-01,
        -3.7525e-01, -1.2813e-01, -3.1965e-02, -5.5033e-01,  5.5607e-02,
        -1.0060e-02,  3.9947e-01, -2.3131e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1302e-01, -1.1473e+00, -5.3741e-01, -5.5311e-01, -9.0466e-02,
        -4.0174e-03, -5.6797e-02,  2.2335e-03, -7.9784e-02, -8.8871e-02,
        -4.1251e-01, -5.7715e-01, -5.4628e-02,  8.5857e-03, -6.1195e-02,
        -5.1242e-01, -6.8756e-02, -2.3627e-01, -4.2119e-03,  1.0945e-03,
         5.1456e-02,  3.3219e-02,  7.6830e-02,  2.1949e-02,  6.0444e-02,
        -1.4370e-01, -4.6319e-02, -1.7298e-01, -4.1022e-01, -1.2079e-02,
        -5.0026e-02, -1.1455e-01,  1.1366e-02, -3.4447e-02, -2.5582e-01,
        -2.1255e-02, -4.6695e-02, -4.4407e-03, -1.6839e-02,  3.0744e-02,
         2.2043e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0499, -2.1189,  0.0449, -0.2140,  0.0389, -0.7359, -0.0890,  0.0342,
        -0.0082,  0.0254,  0.0389, -0.3748, -0.4544, -0.0316, -0.1074, -0.6339,
        -0.0829, -0.2767, -0.0195, -0.1467,  0.2166,  0.0850,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1215,  0.0544,  0.0180,  0.0199,  0.0486, -0.1796, -0.0079,  0.0168,
        -0.0642, -0.0066, -0.0347, -0.3057, -0.0972, -0.3105,  0.0056, -0.4769,
         0.0082, -0.3414, -0.0671, -0.2261, -0.4702,  0.0521,  0.1716, -0.2481,
         0.0307,  0.0163, -0.0797, -0.2892, -0.4138,  0.0137, -0.2279, -0.3025,
        -0.0917,  0.0279,  0.1212,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5063, -0.3452, -1.6388,  0.0850, -0.4818, -0.0403, -0.0089,  0.1556,
        -0.0054, -0.0705, -0.2129,  0.0588, -0.0734, -0.0291, -0.0856, -0.2093,
         0.1730,  0.0128, -0.6518, -0.0743, -0.2549,  0.1499, -0.1644, -0.5060,
        -0.2994, -0.1175, -0.4615, -0.0283, -0.1450,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1955, -1.7756, -0.4824, -0.3549, -0.1160, -0.2923, -0.0239, -0.0247,
        -0.0397, -0.1492, -0.1323, -0.1012,  0.0360,  0.0681,  0.0441, -0.2164,
        -0.0351, -0.0370, -0.2589,  0.0818,  0.0447, -0.0673,  0.0817,  0.0325,
         0.6189,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1098, -0.0437,  0.0388,  0.0192, -1.4194, -0.2374, -0.1905, -0.7655,
        -0.8184, -0.1728, -0.0545, -0.0337,  0.0641,  0.1524, -0.4774, -0.5976,
         0.0956, -0.1719, -0.0222,  0.0756, -0.1062,  0.0016,  0.0216,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0107e-01, -1.8742e-02,  2.9035e-02, -2.9822e-04, -2.9082e-02,
         7.1209e-02,  6.4989e-02,  2.7508e-02,  2.8925e-02, -1.2586e-02,
        -2.7005e-02, -1.7697e-02, -4.5686e-01, -3.1270e-01, -4.8850e-02,
        -7.7172e-02,  8.5109e-02, -2.7137e-01, -2.4140e-02, -2.2475e-02,
        -3.7533e-02, -8.9473e-02,  3.9652e-02, -1.1798e-01, -1.2995e-01,
        -1.0732e-01, -8.4921e-01, -4.7862e-02, -5.7333e-01, -4.7103e-02,
        -2.3062e-01, -1.9572e-02, -3.0769e-02, -2.5718e-02, -2.8977e-01,
        -6.3719e-02,  3.7027e-02,  1.5289e-02,  7.6058e-03,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6167, -0.0944,  0.3305,  0.7761,  2.4355,  0.1661,  0.0436,  1.3342,
         1.3790, -0.0047,  0.1284,  0.2720,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.3313, -0.4195, -1.5336,  0.1022,  0.0219, -0.4241, -0.2386, -0.0312,
         0.0418, -0.1236,  0.0359, -0.0313, -0.0520,  0.0123, -0.2795,  0.0223,
        -0.1906, -0.3585, -0.0354, -0.0481, -0.1455, -0.1422, -0.0049, -0.0490,
         0.0034, -0.0278, -0.0832, -0.1149, -0.0776, -0.0625,  0.1519,  0.1060,
        -0.2176,  0.0071,  0.0604, -0.0168, -0.2303, -0.0284, -0.0035,  0.0030,
         0.0486,  0.0593, -0.0031,  0.0125,  0.1242,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1215, -4.2408, -0.4880, -1.1339, -0.3319,  0.1846,  0.1170, -0.1748,
        -1.0515, -0.2494,  0.0804,  0.5116,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2568, -2.3754, -0.4118, -0.8503, -0.1903, -0.3965, -0.3065,  0.5231,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7548e-01, -1.6485e+00, -3.2390e-01, -7.2003e-02, -3.6094e-01,
        -7.5744e-02, -6.7576e-02, -8.8168e-02, -2.9237e-01, -4.6136e-01,
        -1.9237e-01, -9.4200e-02, -2.4120e-02, -1.0043e-02,  2.8849e-02,
        -7.6200e-03,  1.5687e-01,  1.0470e-02,  6.3328e-02, -1.9166e-02,
         2.7166e-02,  4.8893e-02, -6.7514e-02, -3.1549e-04, -2.4728e-02,
        -1.5769e-02, -1.8851e-02, -5.8564e-02, -1.6266e-01, -2.1185e-02,
        -2.8200e-02, -3.0778e-02,  4.0786e-04,  1.2290e-02, -2.9549e-02,
         5.5240e-03, -3.7847e-02, -3.3621e-02,  1.5913e-03, -2.0593e-02,
        -4.7084e-02, -1.9061e-02, -2.7824e-02, -9.2488e-02, -6.1856e-02,
         2.5856e-01, -6.3663e-01, -8.6240e-02, -5.2744e-01, -7.2343e-01,
        -9.9015e-02, -1.6888e-02, -2.9901e-01,  4.3173e-02, -1.3676e-01,
        -2.0902e-02, -5.2535e-02,  5.9396e-03,  4.3494e-02,  1.5290e-01,
         1.0732e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0108, -0.0227,  0.0286, -0.1946, -0.4797,  0.0397, -0.1469,  0.1264,
         0.0536,  0.0244, -0.0709, -0.4579,  0.0042, -0.2688, -0.3990,  0.0214,
        -0.0779,  0.0313, -0.1689, -0.1679, -0.4067, -0.2019, -0.0519, -0.0471,
        -0.1436, -0.1442, -0.0670, -0.0651, -0.0601, -0.0700,  0.0025,  0.0046,
        -0.0858,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1383, -0.3788, -0.6858, -0.4641,  0.0019,  0.1012, -0.4681,  0.0781,
         0.0021, -0.0486, -0.0071, -0.0732, -0.3353, -0.0602,  0.1268, -0.0771,
        -0.0482, -0.2700, -0.7904,  0.0843,  0.2461,  0.1604, -0.2524, -0.0015,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2269, -0.8077, -0.5794, -0.0325, -0.0364, -0.4182,  0.0325, -0.2300,
         0.0207, -0.1842, -0.3869, -0.3355, -0.4767,  0.0192,  0.0397, -0.0188,
         0.0345, -0.0491,  0.0838,  0.0149, -0.2064, -0.0487, -0.0383, -0.1205,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1318, -0.1372, -0.2695, -0.5855, -0.0634,  0.1358, -0.0236, -0.3353,
        -0.0494, -0.0183,  0.0522, -0.0462, -0.0633, -0.0401, -0.3502, -0.3268,
        -0.3954,  0.0234, -0.0175, -0.2573, -0.7626, -0.0219, -0.4071, -0.5156,
        -0.0174,  0.0304, -0.2412, -0.0110,  0.0849, -0.1812, -0.1180, -0.0731,
         0.1203,  0.1572,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0950,  0.0868,  0.0280,  0.0701, -0.8620, -0.0569, -0.0647, -0.0970,
         0.0185,  0.0396,  0.0677,  0.1814, -0.0767, -1.2044,  0.0823,  0.2643,
        -0.5471,  0.1455, -0.5057, -0.0310, -0.3041, -0.9167,  0.2304, -0.0537,
        -0.3435,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3820,  0.1098,  0.0272,  0.0236,  0.0780,  0.2116, -0.6665, -0.3601,
        -0.5472, -2.0558, -0.1236, -0.0980, -0.2539, -0.0308,  0.0087, -0.2079,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1632, -1.9544,  0.0116, -0.5900, -0.1134, -0.3686, -1.1420,  0.2036,
         0.0343, -0.0672,  0.0809,  0.0870, -0.1773, -0.0322,  0.1579,  0.0347,
        -0.2476, -0.4195, -0.1484, -0.1191, -0.0172,  0.1241,  0.0293,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1726, -0.0145,  0.0547, -1.4279, -2.7350,  0.5426,  0.2907, -0.1014,
        -0.1886,  0.1591, -0.0978,  0.2790,  0.0984, -0.9901,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.1850, -0.4256, -0.3950, -0.6108, -0.0023, -0.1267, -0.3148,  0.0946,
        -0.1378, -0.0318, -0.5158, -0.5459, -0.0156, -0.0108, -0.0140, -0.1466,
        -0.2294, -0.5058, -0.0685, -0.5708, -0.1324,  0.0060,  0.1075,  0.1377,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0963,  0.4507, -0.4323, -0.1475, -0.8965, -0.1150, -1.1951, -0.0662,
         0.0597, -0.0750, -0.0373,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1444,  0.1135,  0.0572,  0.0343,  0.1634, -0.4927, -0.6530, -0.1282,
        -0.4244, -0.1087, -0.3183,  0.0101, -0.2027,  0.0520, -0.2249, -0.0033,
        -0.0901, -0.4274, -0.1372, -0.0862, -0.0372,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3698, -0.0854, -0.2565, -0.0972, -0.3088, -0.0520, -0.1366, -1.7778,
        -0.4996, -0.9542, -0.3139, -0.1092, -0.0914, -0.3356,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5493,  0.0373, -0.0081, -0.0967, -0.2174,  0.0909,  0.0322, -0.6905,
        -0.2141, -0.0183,  0.6016,  1.1624,  1.9649,  0.2026,  0.0156,  0.3387,
        -0.3281,  0.2635,  0.1064, -0.0282,  0.1109,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0140, -1.8757,  0.0323, -0.5712,  0.2282, -0.5679, -0.1170, -0.8634,
        -0.0684, -0.6324, -0.1722, -0.1467,  0.2388,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3939, -1.1970, -0.0224, -0.0472, -0.0179, -0.1545, -0.3324, -0.3403,
        -0.1667,  0.0740,  0.1668,  0.0148, -0.0212,  0.0544,  0.0472,  0.0101,
         0.0612,  0.0200,  0.0647,  0.1109, -0.0569,  0.0658,  0.0817,  0.0551,
        -0.4191,  0.0978, -0.5081, -0.4566, -0.6340, -0.0334, -0.2748,  0.1346,
         0.0647,  0.0907,  0.0884,  0.0554, -0.0940,  0.0055,  0.0225, -0.0813,
        -0.0995, -0.0056, -0.0193,  0.2001,  0.0926], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1437e-01, -2.0865e-01, -4.3136e-01, -2.4921e-02,  8.8432e-02,
        -7.4328e-02,  5.1248e-02, -1.8565e-01,  1.3784e-02, -3.9076e-02,
        -5.3167e-01,  1.0197e-01, -3.5248e-02, -2.3488e-03, -3.7728e-01,
        -2.1050e-01, -9.0610e-01, -6.5078e-02, -5.2715e-02,  1.0496e-01,
        -9.6332e-02, -6.6095e-01, -3.7789e-02, -5.3218e-01, -6.3207e-02,
         9.4301e-02, -3.3513e-01, -1.7988e-01, -3.4583e-01, -4.5638e-02,
         4.3195e-02,  4.5613e-04, -2.8480e-01,  9.7395e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1620,  0.3216, -0.1327, -0.5423, -0.0450,  0.0106,  0.0731, -0.0450,
         0.1923, -0.0703, -0.2147, -0.0022,  0.0695, -0.0782,  0.1193,  0.0487,
        -0.0179,  0.1090, -0.2404, -1.2935, -1.5072, -0.1953,  0.0467, -0.5943,
        -0.0484,  0.0094,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0933, -2.6442, -0.0745, -0.5107,  0.1651,  0.0611, -0.1099, -0.3181,
        -0.0979,  0.0876, -0.0132,  0.0259, -0.0396, -0.0286, -0.2081, -0.3069,
        -0.7125,  0.0178, -0.1171, -0.0448, -0.2393, -0.1461, -0.2057,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2515, -1.1537, -0.1823, -0.2802, -0.0674,  0.0121, -0.0182, -0.1538,
        -0.3624, -0.0607,  0.0239,  0.0312,  0.0094, -0.0681, -0.8246,  0.1337,
        -0.1110, -0.2123, -0.3980,  0.0206,  0.0124, -0.0088,  0.0498,  0.0234,
         0.0166, -0.0113, -0.2847, -0.5686, -0.0660, -0.1064,  0.0520, -0.0719,
        -0.0414,  0.0537,  0.0154, -0.0289, -0.1559,  0.1130, -0.0923,  0.0024,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6292e-01, -1.4377e-01,  2.2257e-02, -1.4595e-01, -3.0821e-01,
        -1.6144e-04, -1.4747e-01, -4.1738e-01,  7.6357e-02, -4.9312e-02,
        -2.5153e-02, -4.8283e-02, -1.7510e-01, -4.8221e-02, -1.1516e-01,
        -2.6517e-01, -2.1123e-01, -3.5846e-02, -1.1769e-02, -1.8261e-01,
        -6.3061e-02, -3.5291e-01, -2.4520e-01,  1.6850e-02, -5.0933e-02,
        -5.0394e-02, -3.5414e-02, -2.3637e-01, -7.2434e-03, -4.6785e-02,
        -2.1072e-01, -1.3642e-02,  1.6586e-01, -3.7992e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-2.3687e-01, -1.7318e-01, -1.0236e-01, -8.2546e-02, -3.2447e-01,
         4.8648e-03,  9.5797e-02, -1.1078e-03,  2.3801e-02,  1.7458e-02,
        -2.4185e-02, -4.0461e-02, -2.1062e-02, -5.6354e-02, -6.2524e-01,
         7.5266e-02,  2.5763e-02, -4.2719e-02, -2.2875e-01, -5.2034e-03,
         1.4213e-02, -2.2870e-02, -2.8048e-02,  2.4323e-04, -3.1087e-01,
        -4.2319e-01,  6.4837e-03, -2.3520e-02, -2.0421e-02,  3.0092e-02,
        -1.5387e-02,  4.1705e-02,  8.2566e-03, -2.8765e-02, -1.9407e-01,
         1.4486e-02, -4.7813e-02, -1.4589e-01, -3.6928e-02, -1.2658e-01,
        -1.3725e-01,  1.9829e-02, -1.3519e-01, -7.4739e-02, -1.2244e-02,
        -2.6120e-02,  1.2611e-02,  1.2020e-02, -6.9320e-02, -7.6818e-02,
         1.3759e-02, -4.9005e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2521,  0.0898,  0.0582,  0.0686, -0.5484, -0.0595,  0.0424,  0.1546,
         0.0396,  0.0117,  0.0173,  0.0446, -0.1277, -0.9919, -0.2294, -0.3803,
        -0.7408, -0.4179, -0.4848,  0.0687, -0.2239, -0.0496, -0.1730, -0.4350,
         0.0312, -0.0477,  0.0699,  0.0401, -0.0763,  0.0341, -0.0642,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4378, -1.3352,  0.1300, -0.0711,  0.0291,  0.1346, -0.1807, -0.0503,
        -0.1230,  0.1065, -0.1096, -0.5997, -0.0904,  0.0762, -0.2970, -0.4191,
        -0.0345, -0.3446, -0.6003, -0.5447, -0.1979, -0.1265, -0.1393,  0.1046,
         0.0721,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1907, -0.0458,  0.1756,  0.1684, -0.2079, -1.2402, -2.3258,  0.0087,
        -0.1094, -0.0351,  0.1915,  0.0757,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0696, -1.9368, -0.4103, -0.4415, -0.7051, -0.0407, -0.0936, -0.0308,
        -0.0235, -0.0698, -0.0556, -0.2932, -0.3600, -0.6020, -0.2500, -0.3353,
        -0.0479, -0.0329, -0.0523,  0.0574, -0.1915, -0.0450, -0.1289, -0.0140,
         0.1690, -0.2539,  0.1002, -0.0398, -0.0564,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2939, -2.1405,  0.2168,  0.3256,  0.1404, -0.4690, -1.2004,  0.2350,
        -0.0862,  0.1134, -0.1387,  0.0476,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1132, -1.7714, -0.6640, -0.5794,  0.0346, -0.5164, -0.6528, -0.0548,
        -0.8753, -0.1356,  0.0039, -0.0643,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0871,  0.2920, -0.8282,  0.1780, -0.5604,  0.1802, -0.1112, -0.0846,
        -0.8014, -0.0984,  0.0490, -0.8786,  0.0145, -0.0968, -0.0619, -0.6086,
        -0.0246,  0.1337, -0.0590, -0.1300,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1519, -1.2767, -0.1939, -0.2955, -0.3047, -0.0063, -0.1865,  0.0128,
        -0.1115,  0.0124, -0.1145, -0.3814, -0.1978, -0.0956, -0.0378,  0.0260,
        -0.0158,  0.0091, -0.0057, -0.1445, -0.2723,  0.1580,  0.0808, -0.1358,
        -0.0082,  0.1078, -0.0028, -0.1778, -0.1284,  0.0254,  0.0651, -0.0264,
        -0.1605, -0.4839, -0.1028, -0.0351, -0.0133,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8136e-01,  5.5587e+00,  1.6438e-01, -3.5000e-01,  1.5502e-01,
        -1.9249e-01,  4.2667e-03,  1.1783e-01,  2.8428e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0440,  0.2189,  0.0262, -0.6158, -0.0346, -0.0537, -0.5262, -0.4898,
        -0.0808,  0.0453,  0.2353,  0.0285, -0.3943, -0.3750,  0.0773,  0.1742,
        -0.0157,  0.1844, -0.1878, -0.3191, -0.0248,  0.0608, -0.0270,  0.0696,
        -0.0329, -0.0202, -0.0740,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1814, -1.9339,  0.2011, -0.4427,  0.0286, -0.2903, -0.0411,  0.0300,
         0.0222,  0.0732,  0.0877, -0.4907, -0.0655,  0.0119,  0.0091,  0.0141,
        -0.1082,  0.0662, -0.0093,  0.1104,  0.0871, -0.3431, -0.5448,  0.0799,
        -0.0374, -0.0602, -0.0394, -0.2334, -0.3874,  0.0369,  0.0047, -0.0660,
        -0.0467, -0.1821,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-2.0417e-01, -1.1644e-02, -1.2666e-02,  1.9387e-02, -2.3745e-01,
        -1.1557e-01,  4.2104e-02,  2.7265e-02,  1.3752e-04,  1.9457e-02,
        -4.4679e-02, -2.7087e-01,  4.4968e-02,  5.8144e-02,  1.6679e-01,
        -3.5985e-01, -6.4615e-01,  7.0384e-02, -1.7205e-01,  3.9879e-03,
         4.0537e-02, -1.2376e-02, -1.0584e-01, -1.0156e-02,  8.5840e-02,
         9.3154e-03, -5.5524e-03,  3.1318e-02, -6.9307e-03,  1.8013e-02,
         3.1157e-02, -4.6370e-01, -4.7307e-01, -1.0079e-01, -1.8671e-01,
        -2.3910e-01, -2.2091e-01, -4.0485e-02,  1.9610e-02, -5.9877e-03,
        -6.8810e-02, -1.6717e-01, -2.9375e-02,  2.2926e-02,  7.1760e-02,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6611,  0.0560, -0.3463,  0.2165, -0.4889, -0.4138, -1.4195,  0.1433,
         0.4640, -1.0153,  0.6884, -0.2049, -0.6102, -0.1008, -0.2502, -0.5073,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7397e-01, -1.6161e-01,  4.5036e-02,  3.2778e-02,  3.3894e-02,
         1.4582e-01, -3.2208e-02,  3.7460e-01,  1.1979e+00,  1.1796e+00,
         4.3265e-01,  7.3675e-01, -1.6946e-01,  5.4010e-01,  5.8176e-01,
        -5.0491e-02,  6.5571e-02,  5.2428e-02, -2.5635e-02,  7.8681e-02,
        -1.6920e-02,  6.4660e-03,  1.2003e-02, -1.4862e-01, -1.9791e-02,
         9.3436e-04, -2.8656e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9055e-02,  1.7135e-02, -2.8107e-01, -5.9090e-02, -1.8305e-01,
         1.9498e-02, -1.0612e-01, -4.5458e-02, -9.7744e-02, -3.7086e-03,
        -1.1132e-02, -1.4783e-02, -1.1959e-01, -3.8625e-02, -1.9521e-01,
        -4.5490e-02, -1.5566e-01,  4.3157e-04,  1.5988e-02, -2.4324e-01,
        -1.6474e-03, -1.4280e-01, -4.8903e-02, -3.3717e-01,  8.6534e-02,
        -6.4103e-03, -7.3038e-02, -3.5702e-01,  2.5499e-04, -2.1760e-01,
         1.2983e-02, -1.7145e-01, -2.5942e-01,  2.6409e-02, -8.7719e-02,
        -2.5209e-01, -1.5121e-01, -3.0200e-01,  1.5423e-03,  1.9839e-02,
        -9.2378e-02, -1.6400e-01, -3.1206e-02, -5.9064e-02,  6.9910e-02,
        -6.5725e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1717,  0.2172, -0.1387,  0.1547, -0.0483,  0.0604, -0.0726, -1.2145,
         0.0278, -0.1441, -0.1628,  0.1867,  0.0495, -0.0810, -0.1092,  0.0350,
        -0.0450, -0.1634, -0.5481, -1.1412,  0.0072, -0.0022, -0.0436, -0.1006,
         0.0181, -0.0453, -0.3699, -0.1873,  0.0024,  0.2394,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1269,  0.0857,  0.0878,  0.0558, -0.1362,  0.1192,  0.3827,  1.8880,
         0.3835,  0.0317, -0.0521,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0151, -1.6982, -0.0706,  0.1485, -0.3773, -0.0316, -0.3195, -0.5887,
         0.0164, -0.0299, -0.0671,  0.0123, -0.0214, -0.0907,  0.1617, -0.5855,
        -0.2224, -0.1137,  0.0413, -0.2756,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0813, -2.1404, -0.8298, -0.5295,  0.5603, -0.3057, -0.4262, -0.0656,
        -0.0883, -0.0160, -0.0210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0195, -2.3798, -0.9448, -0.9604, -0.3707, -0.4000, -0.1163, -0.4174,
        -0.3651,  0.0818, -0.0776, -0.0103, -0.1388, -0.1901, -0.0648, -0.3455,
         0.1470, -0.0288, -0.0559,  0.0414,  0.8975,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3786, -1.6361, -0.6114, -0.1221,  0.1683, -1.3150, -0.1493, -0.1182,
        -0.1932, -0.0150,  0.0117, -0.2687, -0.0930, -0.0915, -0.0908,  0.0049,
         0.0754,  0.2053,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1855, -0.5812, -1.0428, -0.0729, -0.0825,  0.0629, -0.0193,  0.0373,
         0.0021, -0.0206, -0.0027,  0.0165, -0.0919, -0.3320, -0.2560, -0.1075,
        -0.1500, -0.2416, -0.1835, -0.0391, -0.0449, -0.2153, -0.5069, -0.0521,
        -0.2068, -0.0086, -0.3125, -0.1159, -0.0508, -0.3981,  0.0023, -0.0307,
        -0.0509,  0.0221, -0.0797,  0.0077,  0.0443,  0.0160,  0.2046,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3656, -2.1001, -0.0929, -0.4281, -0.0509, -0.0643, -0.0351, -0.1142,
        -0.0968, -0.0854, -0.1518, -0.3824, -0.0643, -0.0563, -0.2024,  0.0157,
        -0.2443, -0.0375,  0.1400, -0.1356, -0.1008, -0.0354,  0.0297, -0.0240,
        -0.0453, -0.0063, -0.1933, -0.0625, -0.2786, -0.4581, -0.0100, -0.0298,
         0.1102,  0.2924,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.0820,  0.2289, -0.6456, -0.6078, -0.0627, -0.7913,  0.4282, -0.0810,
         0.3303,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7859e-02, -1.9177e+00, -7.2294e-02, -3.8099e-01, -1.4020e-01,
        -5.2937e-01,  1.4653e-03, -2.2783e-01, -1.3802e-01,  9.9955e-03,
        -2.2771e-01, -1.4482e-01,  3.6582e-02, -1.1457e-01,  4.8762e-02,
        -6.5808e-01, -1.3588e-01, -8.7200e-01, -7.6832e-02, -1.2735e-01,
         4.4414e-02,  9.7777e-02, -4.4640e-02,  1.1957e-01, -1.7662e-01,
         5.5747e-02, -6.8486e-02,  6.6265e-02,  6.1395e-03,  3.9478e-02,
         1.3289e-02, -3.2188e-01, -2.3307e-02, -2.3409e-01,  6.1840e-02,
        -3.8255e-02,  8.1866e-02, -1.0465e-01,  2.2412e-01, -5.0377e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7116e-01, -5.0748e-01, -9.7591e-01, -3.3560e-02, -2.6927e-01,
        -9.4239e-02, -2.2473e-02, -1.4269e-01, -3.3594e-01, -4.7519e-01,
        -5.3068e-03, -2.3234e-03, -8.7061e-03, -1.2233e-02, -9.4302e-02,
         5.5774e-02,  4.0531e-02,  2.5193e-02, -3.2394e-01,  1.0568e-01,
        -2.2082e-01, -6.5253e-02,  7.9002e-02, -3.8780e-02,  3.7524e-02,
        -6.6158e-02, -1.6454e-01, -2.9374e-05, -2.6183e-01, -4.5069e-01,
        -9.7975e-03, -1.1020e-02, -6.8149e-03, -8.2496e-03,  2.4366e-02,
        -1.4469e-02, -2.4612e-01, -3.4559e-01,  2.0310e-02,  7.6723e-04,
         8.6389e-02, -7.8284e-03,  4.5937e-02,  3.8769e-02,  1.5711e-03,
         1.6075e-02, -2.0781e-02,  2.2735e-02, -3.9950e-03,  5.0970e-03,
         4.8752e-02, -8.2469e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0279, -1.6729, -0.5139, -0.4235, -0.1612, -0.0724, -0.6468, -0.5248,
        -0.3361, -0.0421,  0.2904, -0.1904, -0.4678, -0.1248, -0.1551, -0.3224,
        -0.3465,  0.0151, -0.2596,  0.0816,  0.0206,  0.3312,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1973,  3.6295, -0.5523, -0.0656, -0.0564, -0.1521,  0.1335,  0.6370,
        -0.1252,  0.1789,  0.2501,  0.3260,  0.1650,  0.0883,  0.0914,  0.8886,
         0.1829, -0.0937,  0.0057, -0.2324, -0.0163,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0424, -1.2336, -0.2020, -0.0819, -0.0844,  0.0240, -0.1751, -0.0015,
        -0.1788,  0.0199,  0.0260, -0.0137, -0.0122, -0.0059,  0.0310, -0.0339,
         0.0071,  0.0107, -0.1830, -0.0091,  0.0343,  0.0078,  0.0081,  0.0641,
         0.0167, -0.0099, -0.0569, -0.1854, -0.0329, -0.1684, -0.5381, -0.2775,
        -0.0170, -0.1987, -0.2572, -0.0632,  0.0067,  0.0744,  0.0233, -0.0388,
        -0.3770,  0.0077,  0.0560,  0.0115, -0.2138,  0.0129, -0.1592, -0.0657,
        -0.0297,  0.0654, -0.1194,  0.0199, -0.0140,  0.0372,  0.0025],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0374,  0.0200,  0.0207, -0.0289, -0.0381, -0.1257, -0.3061, -0.0942,
        -0.0727,  0.0057, -0.0353,  0.0058,  0.0092, -0.1313, -0.2618, -0.0498,
        -0.0470, -0.1309, -0.0628, -0.3644, -0.0434, -0.0349, -0.1489,  0.0434,
         0.0877, -0.0235,  0.0279, -0.4121, -0.0021,  0.0298,  0.0636,  0.0923,
        -0.0816, -0.5536, -0.0522, -0.1251, -0.0023, -0.2106, -0.2455, -0.3196,
         0.0039, -0.0262,  0.0176, -0.0194, -0.1338,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0249, -1.4905, -0.4675, -0.6773,  0.1561, -0.0866, -0.0527, -0.0567,
        -0.2898, -0.0643,  0.0772, -0.1395,  0.2301, -0.3522, -0.2483, -0.1694,
        -0.0693, -0.4430,  0.0048, -0.1457, -0.4695,  0.2042,  0.0718, -0.0172,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0770,  0.0772, -0.0110,  0.0783, -0.0449,  0.0146, -0.2032, -0.1554,
         0.0394,  0.0201,  0.0099,  0.0149, -0.0301, -0.1983,  0.0406,  0.0986,
        -0.1546, -0.2840, -0.2721, -0.3592, -0.1178, -0.0178, -0.1745, -0.2379,
        -0.0576, -0.1596, -0.2230,  0.0187, -0.1130, -0.2026, -0.0452, -0.1300,
        -0.2692,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2279, -3.7392, -0.2731, -0.4853,  0.0220, -0.0757,  0.0138, -0.2804,
        -1.1094, -0.1299,  0.1771, -0.0498,  0.1709, -0.5879,  0.1665, -0.4047,
        -0.3723,  0.0769,  0.0219,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3097e-01, -3.3408e-02,  7.9605e-02,  3.1880e-02, -5.3280e-01,
        -7.8365e-02, -6.5352e-01, -2.8311e-01, -7.2352e-01, -9.4470e-02,
         4.0245e-02, -2.7247e-02, -2.7171e-02, -7.8488e-03,  4.5234e-03,
         1.0032e-02, -2.3902e-04, -2.9569e-01, -1.5106e-01, -9.6294e-02,
        -3.1064e-01, -5.0198e-01, -5.2845e-02,  5.2265e-02, -1.2406e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1156, -1.5351, -0.5718,  0.0384,  0.0034, -1.1431, -0.7537,  0.2308,
        -0.2044, -0.2605,  0.0323,  0.0058, -0.1929,  0.0364, -0.0460, -0.0064,
         0.2339,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.0232,  0.1315, -0.1170,  0.0119, -0.0827,  0.0504, -0.0317,  0.0543,
         0.0266, -0.4429, -0.1227,  0.0445,  0.1261,  0.1993,  0.0640,  0.1371,
        -0.5597, -0.9102,  0.0920, -0.2674, -0.1435, -0.7626, -1.2772, -0.1166,
        -0.0471,  0.0737,  0.0801,  0.0127, -0.0697, -0.0680,  0.0019,  0.1546,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1736, -0.7323, -1.0453, -0.0290, -0.0405, -0.0158, -0.2394, -0.5498,
        -0.0709,  0.0574,  0.0775,  0.1489,  0.0261, -0.0294, -0.1140, -0.1157,
        -0.0888, -0.0155, -0.3599, -0.0417, -0.2191, -0.0534, -0.4930, -0.6896,
        -0.0482, -0.0253, -0.0714, -0.2010,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3464,  0.6056,  0.0272, -0.1798,  0.1013,  0.6777,  1.0929,  0.3948,
         0.1558,  0.3964, -0.0254,  0.5159,  0.0333,  0.1085,  0.1488,  0.4622,
         0.0089,  0.2924, -0.0018, -0.0517,  0.1514, -0.0331,  0.0227,  0.0430,
         0.3752, -0.0255, -0.0903,  0.0630,  0.0721,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7046e-01, -5.0012e-01, -2.0509e-01, -4.3198e-01, -9.1154e-01,
        -4.5642e-02,  5.1107e-02, -9.7639e-03, -9.8101e-02,  4.8443e-02,
         2.0148e-01,  1.1085e-02, -4.9377e-02, -9.8717e-01,  3.7081e-01,
        -2.3061e-01,  3.0185e-02, -1.9958e-01, -5.8867e-02, -2.1926e-02,
        -2.2591e-04,  4.8850e-02, -9.4141e-02, -2.3683e-02, -2.2742e-01,
        -2.8081e-01, -2.3939e-01, -8.3772e-03, -2.7941e-02, -2.8040e-02,
        -2.2270e-02,  2.2370e-01, -3.1475e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2180,  2.4804,  0.0566,  0.6643, -0.0339,  0.3553, -0.0607,  0.2829,
         0.1905,  0.5868,  0.6448,  0.1022, -0.0306, -0.0499,  0.0309, -0.1003,
         0.3016, -0.0590,  0.7919,  0.2926,  0.6481,  0.0214, -0.1387,  0.1097,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0096, -1.1512, -0.7772, -0.0248, -0.3426, -0.1583, -0.1915, -0.3756,
         0.0922, -0.1620,  0.0184, -0.2343, -0.3367, -0.0800, -0.0335, -0.0329,
        -0.0754, -0.1602, -0.0327, -0.6343, -0.0578,  0.0507, -0.2525,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2002, -1.3270, -0.0043,  0.0102, -0.1437, -0.1084,  0.0701, -0.0205,
        -0.0671, -0.0357,  0.0516, -0.0230, -0.0871,  0.0174,  0.0026,  0.0176,
        -0.5099, -0.3172, -0.1780, -0.0472, -0.0254, -0.1954, -0.0453, -0.0844,
        -0.3655, -0.1285, -0.0059, -0.0391, -0.1117, -0.3001, -0.3724, -0.0578,
        -0.2519, -0.0549, -0.1105, -0.3744, -0.0605,  0.0787, -0.0101,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0281, -0.3561, -0.0037, -0.1852, -0.4937, -0.0292,  0.0291,  0.0464,
         0.0030,  0.0872,  0.0242,  0.0501, -0.0596,  0.0470,  0.0754, -0.3027,
        -0.1327,  0.0074,  0.0409,  0.0270, -0.3240, -0.0478, -0.0412, -0.0069,
        -0.3065, -0.7645, -0.0471, -0.1239, -0.0193, -0.4228, -0.1695, -0.1089,
        -0.0108, -0.0528, -0.0542, -0.0695, -0.0964, -0.4060,  0.0534,  0.0391,
        -0.0655, -0.0189, -0.0443,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0737, -1.3777, -0.0672, -0.0113,  0.1049, -0.0051, -0.1145,  0.0267,
        -0.4284,  0.0047,  0.0493, -0.0227, -0.1906,  0.0092, -0.0178, -0.0106,
        -0.2087, -0.4583,  0.0043, -0.0752,  0.0396,  0.0287, -0.1636, -0.0016,
         0.0203, -0.0275,  0.0836,  0.0138,  0.0430, -0.0214,  0.0741, -0.3558,
        -0.6940, -0.1271, -0.0238,  0.0352,  0.0971, -0.0059, -0.2461, -0.0265,
        -0.2446, -0.3132, -0.0458,  0.0771, -0.0163, -0.0805, -0.0124,  0.0054,
         0.0142,  0.0431,  0.0791,  0.0251,  0.0092,  0.0237, -0.0203, -0.0925,
        -0.0315], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2738,  3.2696,  0.3807,  0.3282,  0.3027, -0.2142,  0.7006,  1.4492,
        -0.0544,  0.0807,  0.4211, -0.0667, -0.0050, -0.4627,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4103, -0.8117, -0.1447,  0.1415,  0.0649, -0.2194, -0.1001,  0.0489,
        -0.1271, -0.0173, -0.2425, -0.7482,  0.0876, -0.0641, -0.1707, -0.0834,
        -0.2277, -0.0929,  0.0690, -0.4161, -0.5637, -0.0434, -0.1484, -0.0652,
         0.1168,  0.0846,  0.0168, -0.0415,  0.0816, -0.1584,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5889, -0.9204,  0.2202, -1.6747, -0.3166,  0.3855, -0.0579,  0.4263,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 4.8201e-02, -9.9954e-01, -1.1411e-01, -9.8867e-02, -2.1650e-01,
        -2.6693e-01,  1.4811e-01, -2.8976e-03, -5.8827e-02, -7.4103e-02,
         2.8679e-04,  3.0620e-02, -1.2764e-01, -3.9803e-01, -3.7787e-02,
        -7.0888e-02, -1.0287e-02, -1.7688e-02, -1.2156e-02, -1.3046e-02,
         1.1373e-02, -6.3188e-03, -5.7395e-02, -1.0064e-01, -2.0843e-01,
        -7.6486e-02, -4.3806e-01, -8.5549e-02, -2.7371e-01, -4.5551e-01,
        -9.6331e-02, -1.0649e-01, -2.3237e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2094, -1.2160, -0.5983, -0.2274, -0.1302, -0.2148, -0.2022,  0.0172,
         0.0809, -0.3035,  0.0111, -0.0513, -0.0420,  0.1165,  0.0028,  0.0333,
         0.0196,  0.0176, -0.0998,  0.0407, -0.0443,  0.0186, -0.0090,  0.0169,
         0.0699, -0.0077, -0.0593, -0.3088,  0.0072,  0.0148, -0.0020, -0.0388,
        -0.2124, -0.3975,  0.0086, -0.0345, -0.2052, -0.0257,  0.0045,  0.0640,
         0.0072, -0.0756, -0.0347, -0.3906, -0.0430, -0.0473, -0.0264, -0.0176,
         0.0146, -0.0063, -0.0766, -0.1133, -0.0662, -0.1288, -0.0090,  0.0191,
        -0.0278,  0.0329,  0.0036, -0.2844, -0.0237, -0.0406, -0.2506,  0.0016,
         0.0145, -0.0434, -0.0605,  0.0315,  0.0906, -0.0095, -0.1311],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0584, -0.1609, -0.2001, -0.0679, -0.3934, -0.1201, -0.7571, -0.0880,
         0.0177,  0.0700,  0.0168,  0.0558,  0.0165,  0.0115,  0.0389, -0.0362,
        -0.0116,  0.1023, -0.1012, -0.7524, -0.1881,  0.0845,  0.0638, -0.3444,
        -0.0367, -0.2357, -0.0420,  0.0944,  0.0701, -0.0716,  0.0174, -0.0526,
        -0.4097, -0.4150, -0.1797, -0.0239, -0.4120, -0.2314, -0.0376,  0.0014,
        -0.0083,  0.0052,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0177, -1.7861, -0.0659, -0.1286,  0.0081,  0.0662,  0.0843,  0.0427,
        -0.1386, -0.4208, -0.2424, -0.2311, -0.3316, -0.0135, -0.0113,  0.0648,
        -0.0559, -0.4009, -0.7551, -0.1240, -0.1615, -0.0863,  0.0140, -0.0248,
        -0.0872, -0.3878, -0.0937, -0.0587, -0.8131, -0.0687,  0.0759, -0.0344,
        -0.0083, -0.1349, -0.0568, -0.2223,  0.1460,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8814, -3.7081, -0.4412, -0.4233,  0.0504,  0.2364, -0.1554, -0.1044,
         0.0390,  0.1815, -0.2244,  0.2316, -0.4342, -0.2829,  0.5270,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0902,  0.4721,  0.0510, -0.1705,  1.4649,  1.6663,  0.0343,  0.6391,
         1.2337,  0.2063, -0.0922, -0.1198,  0.4944, -0.1019, -0.4113,  0.2062,
         0.1157,  0.2264, -0.0192, -0.6226,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0294,  0.0671, -0.1023,  0.1051, -0.8087, -0.1415,  0.0722, -0.0797,
        -0.0051, -0.0737, -0.2316, -0.3639, -0.0854, -0.0536, -0.2574, -0.1270,
        -0.0056, -0.1656,  0.1015, -0.2019, -0.0528, -0.0112,  0.0226,  0.0266,
        -0.0437, -0.2650, -0.3330, -0.0565,  0.0327, -0.2959, -0.3865, -0.1023,
         0.0068, -0.0367, -0.1727, -0.3040,  0.0768, -0.1721, -0.0514, -0.0976,
        -0.0882, -0.0731, -0.0453,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3508, -0.1021, -0.0660, -0.2812, -0.4587, -0.1760, -0.3664, -0.6226,
        -0.2089, -0.0818,  0.0997,  0.0764,  0.0247, -0.3646, -0.4767, -0.0590,
         0.0111,  0.0681, -0.0376, -0.0430, -0.3739, -0.0383,  0.0911,  0.0084,
        -0.1043, -0.0574,  0.0308,  0.0374, -0.4877, -0.0037, -0.0667, -0.3122,
        -0.1333, -0.4006, -0.5839, -0.3151,  0.0050, -0.2021,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2441e-01, -1.6972e+00, -6.8470e-01, -8.0185e-01,  1.9319e-01,
        -5.1678e-01, -6.0394e-02,  7.7978e-05, -3.2754e-01, -7.8624e-01,
        -6.6436e-02,  1.9186e-02, -5.0941e-01,  5.4581e-03, -4.5068e-02,
         1.9239e-02, -2.5538e-02, -3.0815e-01, -3.4043e-02,  1.6023e-01,
         3.0502e-01, -1.4121e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0208, -1.1801, -0.1695,  0.0895,  0.1550, -0.0951, -0.0125,  0.0880,
        -0.0243, -0.7516, -0.0401,  0.0558, -0.0492,  0.0510, -0.2707, -0.2487,
        -0.6713, -0.0665, -0.0303, -0.4817,  0.0493, -0.0912, -0.3398, -0.0847,
        -0.2146, -0.0268, -0.0277,  0.1335, -0.1595, -0.3980, -0.0636, -0.1375,
        -0.0276, -0.2597, -0.2622, -0.2552,  0.1386, -0.0058,  0.0209, -0.0448,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2738e-01, -1.6681e-02, -5.5749e-02,  1.1442e-01, -1.1854e-01,
        -1.5198e-01, -8.8486e-02, -2.2701e-02, -2.8959e-01, -8.5004e-02,
        -5.9900e-02, -7.1124e-02, -8.2669e-02, -6.5454e-01, -5.2447e-01,
        -1.9630e-02, -7.0081e-02,  1.1466e-02, -4.3088e-01,  1.4768e-04,
         1.3770e-01,  1.5564e-02,  5.0766e-02, -4.6025e-02,  1.5611e-02,
        -4.2786e-01, -6.5264e-02, -3.8842e-01, -6.9866e-01,  7.3461e-02,
        -6.3127e-02, -1.8130e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2023e-01, -2.3806e+00,  5.4959e-02,  1.1951e-01, -3.7041e-02,
        -5.2443e-02, -3.5302e-03, -2.1997e-01, -5.2599e-01, -1.6882e-01,
        -3.3897e-02, -2.1326e-01,  2.3555e-04,  1.7187e-02, -1.2097e-02,
        -1.4218e-01, -3.3279e-01, -6.5485e-02,  7.4379e-03, -1.4159e-02,
        -6.2731e-02,  4.5565e-02, -4.9005e-02, -5.1831e-01, -1.1238e-01,
         8.9684e-02, -3.9396e-01, -5.7773e-02,  6.4969e-02, -2.0002e-01,
         6.5846e-02, -9.0458e-02,  1.1914e-02,  1.1883e-01,  4.6425e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.3966, -0.2955, -0.8707, -0.7717, -0.3117, -0.1210, -0.7574, -0.8422,
        -0.0851, -0.7238, -0.0479, -0.2101, -0.0170, -0.2890, -0.1030, -0.0831,
        -0.2482, -0.0505, -0.0450,  0.0476,  0.0696,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4888, -3.9315,  0.0554, -0.6193, -0.8251, -0.4835,  0.0503, -0.4481,
        -0.6732,  0.0525, -0.0302, -0.1141,  0.0411,  0.0678, -0.3609, -0.0723,
        -0.4890,  0.2474, -0.1881,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2132, -0.0851, -0.8136,  0.2188, -0.2253, -0.1151,  0.0329, -0.0958,
         0.0452, -0.3713, -0.8266, -0.3067, -0.3451, -1.0942,  0.1021, -0.1565,
         0.0264, -0.1963,  0.0867, -0.1129,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1407, -2.0246,  0.2610, -0.4529,  0.1595, -0.0170, -0.2571, -0.1419,
        -0.0155,  0.0515, -0.0481, -0.0395,  0.0293, -0.2026,  0.1798, -0.1716,
        -0.2862, -0.0440, -0.0947, -0.2250,  0.0671,  0.0901, -0.1660,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9271e-04, -1.2346e+00, -2.2316e-01, -7.3586e-02, -8.2161e-02,
        -1.9061e-01,  1.3175e-01, -3.1378e-01, -4.6293e-01, -6.6136e-01,
        -1.6189e-01, -5.2298e-01, -9.0447e-02, -4.4857e-01, -4.3056e-02,
         1.2987e-02,  1.3610e-01,  6.3386e-02, -5.7616e-02,  7.7488e-02,
        -2.3603e-01, -9.9685e-02,  6.4899e-02, -6.2334e-02,  2.1475e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3193,  3.6528,  0.2183, -0.0382, -0.0464,  0.2647,  0.0833,  0.0981,
        -0.1697,  0.4278,  0.6458, -0.0122,  0.0131, -0.1031, -0.0280,  0.2535,
         0.1240, -0.0225, -0.0244, -0.0039,  0.4550,  0.7866,  0.2830,  0.2021,
         0.1531,  0.1302,  0.2049,  0.1200,  0.0237,  0.1577,  0.0786,  0.0085,
         0.1824,  0.1413,  0.1667,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0742, -0.0965,  0.0629, -0.6746, -0.1015, -0.0278, -0.5124, -0.1689,
        -0.7153,  0.0234, -0.0402,  0.0162, -0.1958, -0.2660, -0.4012, -0.8433,
        -0.1717, -0.6150, -0.0987, -0.1246, -0.1792,  0.1884,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2999,  0.1583, -0.2079,  0.1119,  0.3333,  0.4684, -0.0509,  0.0254,
        -0.1629,  1.7678,  0.5250,  0.1094, -0.0314,  0.2195,  0.1163,  0.7948,
         0.2870,  0.2750, -0.2209, -0.1037, -0.1403,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1929, -2.9548, -0.3408, -0.1370, -0.0063,  0.1079, -0.2964, -0.6573,
        -0.0159,  0.1292,  0.0442,  0.0765,  0.0302, -0.5953,  0.0482, -0.3372,
         0.0799,  0.0335, -0.0571,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0335,  0.2345, -0.0065,  0.0058, -0.0321, -0.4984,  0.0369, -0.1576,
         0.0032, -0.0548, -0.0134, -0.1357, -0.0442, -0.1505,  0.0493,  0.0139,
        -0.3130, -0.1917,  0.0604, -0.1688, -0.0752, -0.0617, -0.2183,  0.0471,
        -0.2610, -0.0652, -0.1185, -0.1433,  0.0567, -0.7016,  0.0279, -0.2337,
        -0.2456, -0.4695, -0.0159, -0.0530, -0.0211, -0.0446, -0.0349, -0.1950,
         0.0090,  0.0449,  0.0058, -0.0608, -0.0055,  0.0156,  0.0577],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0276,  2.2986,  1.0270,  0.0269, -0.0976, -0.1111,  0.0939,  0.0158,
         0.3463, -0.0855,  0.0237,  0.0023, -0.0055, -0.1301, -0.0351, -0.1032,
         0.6550,  0.0326, -0.0188,  0.1184,  0.0203, -0.0496,  1.2110,  1.4234,
        -0.2357, -0.1346,  0.3774,  0.0172, -0.1064, -0.0497, -0.0557, -0.1418,
         0.0853, -0.3045,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0659e-01, -2.2588e+00, -1.1467e-01,  9.4984e-02, -1.4808e-01,
        -1.7015e-01,  4.4816e-02, -1.4287e-02, -4.4991e-02, -1.1371e-01,
         6.1399e-02, -8.3371e-03, -2.8856e-01, -4.5210e-01,  2.1198e-02,
        -2.8957e-02, -2.3601e-02,  2.4292e-04,  1.6558e-01, -5.9423e-01,
        -1.2890e-01, -1.4771e-01, -1.6908e-01, -7.1243e-02, -5.5885e-02,
         5.2063e-02,  4.4400e-02, -1.0397e-01, -1.3657e-02, -3.4366e-01,
         1.6431e-02,  2.4009e-02, -2.1986e-01,  2.4282e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.0623,  2.5004,  0.1197,  0.0904,  0.0488,  0.5798,  0.0613,  0.3159,
         0.1592,  0.6023,  0.0975, -0.0611,  0.1242,  0.7724,  0.1608,  0.1455,
        -0.1691,  0.0570,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4495, -0.0328,  0.0810,  0.1048,  0.0311,  0.0261,  0.0739,  0.0616,
        -1.0968, -2.4128,  0.0503, -0.3724, -0.1502, -0.0388,  0.0299,  0.0511,
        -0.1139,  0.0376,  0.1864, -0.4112,  0.0405, -0.1060, -0.0140,  0.1019,
        -0.0151,  0.0992, -0.2355,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3832, -0.1051,  0.0300, -0.0032,  0.5094,  0.0689,  0.0397, -0.0413,
        -0.5575,  0.0729, -0.0426, -0.3675, -0.8863,  0.0197,  0.0820, -0.0195,
        -0.0300, -0.1684, -0.1350, -1.3664, -0.0507, -0.0585, -0.3013, -0.1120,
        -0.3363, -0.1813,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7122, -2.5303, -0.0209, -0.7448, -0.7392,  0.2008,  0.0153, -0.2486,
        -0.6410, -0.2274, -0.6005, -0.4716,  0.1306,  0.0561, -0.0800, -0.5342,
        -0.2730, -0.1686,  0.0450, -0.1052,  0.4154,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2087, -1.4672, -1.1689, -0.8994, -0.1627,  0.0049, -1.0214, -0.0034,
        -0.2150,  0.1488,  0.0328, -0.2678,  0.2229,  0.0326,  0.0429,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1464, -0.0090,  0.0280,  0.0794, -0.3208,  0.0733,  0.1906,  0.0157,
         0.1236,  0.2254, -0.0394,  0.0103, -1.9477, -0.5003,  0.4861, -0.1713,
        -0.0620,  0.0502,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8674e-01, -2.4806e+00, -7.7823e-01, -1.4954e+00,  3.9034e-02,
         1.3734e-01,  1.3897e-01, -2.2593e-01, -2.0542e-01, -4.9724e-01,
        -5.0350e-01, -3.4284e-01,  1.1075e-03, -5.6460e-01, -8.8378e-02,
         1.6385e-02, -1.0628e-02,  2.9080e-01,  3.7928e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1637e-01, -1.0299e+00, -5.4277e-01, -1.2822e-01, -3.6020e-01,
        -1.7262e-01,  1.4924e-01, -7.3185e-02, -7.9847e-02,  2.0254e-03,
         1.1951e-02, -5.7690e-03, -7.7833e-02, -1.1437e-02,  3.7337e-02,
         5.8373e-02, -3.4842e-01,  2.2987e-02, -4.8567e-03,  1.8992e-02,
        -1.9819e-01, -2.5155e-02, -2.2313e-01, -4.3222e-02, -1.4423e-01,
        -1.5345e-02, -2.7012e-01,  8.9002e-02, -5.9707e-02, -1.9205e-01,
         2.4864e-04,  2.4672e-02, -1.7621e-01, -6.2217e-02, -2.1138e-01,
         6.0765e-03, -1.8831e-01,  5.4130e-02, -5.8896e-02, -2.9716e-02,
        -1.0141e-02, -5.0186e-03,  5.0729e-02, -3.2771e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0568, -1.2856, -0.1113, -0.4371, -0.4463, -0.1227, -0.2520, -0.0367,
        -0.4934, -0.0511, -0.2313,  0.1558,  0.0019,  0.0026, -0.1080,  0.0260,
         0.0568, -0.3458,  0.1125, -0.0358, -0.1815, -0.0109, -0.0733, -0.0275,
         0.0322,  0.0384,  0.0896, -0.2334,  0.0234,  0.1011, -0.0247, -0.3744,
         0.0478, -0.2659,  0.1724,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4443, -4.8956, -0.3671, -1.2276, -0.1523, -0.1523, -0.8765, -0.4634,
        -0.1650, -0.0096,  0.1130, -0.3112, -0.8771, -0.1795,  0.0407, -0.4250,
         0.0676, -0.0360,  0.1706,  0.0288,  0.0919, -0.0429, -0.0087,  0.0214,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8058e-01,  1.9091e+00,  6.8655e-03,  2.4902e-01,  1.3259e-02,
        -1.9859e-02, -1.0725e-01,  2.1176e-01,  6.5360e-01,  1.4626e-01,
         2.6224e-01, -2.8515e-03, -6.8744e-02,  1.3884e-01,  3.3417e-01,
         1.6077e-02, -2.4995e-02,  2.9437e-02,  3.9661e-01,  1.6699e-03,
         1.6014e-02,  3.0581e-01, -7.6787e-02,  2.5934e-01, -3.2348e-02,
         3.2340e-01,  2.5955e-01,  2.0602e-01, -3.1129e-03,  4.8731e-02,
         2.3316e-01, -5.6968e-02, -2.6342e-01,  8.3504e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4646, -0.0440, -0.1213,  0.0546,  0.0152,  0.1847, -0.7806, -0.1350,
         0.2785,  0.0410, -0.0016,  0.0860, -0.6586, -0.0843, -0.6846, -1.1046,
        -0.1018, -0.0885, -0.0273,  0.2392,  0.0650, -0.0086, -0.2076, -0.3887,
        -0.1498, -0.0731, -0.1470, -0.0660,  0.2072,  0.1179,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.3948,  0.0159, -0.3083, -1.0445, -0.2415, -0.2089, -0.1974, -0.1900,
        -0.5324, -0.9324, -0.0197, -0.1609, -0.1790,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4110,  0.0511,  0.2131,  0.0215, -0.0492, -0.2699,  0.1073, -0.9977,
        -1.2741, -0.0160,  0.0310, -0.6963, -0.0677, -0.0855, -0.6105,  0.1283,
         0.1646,  0.0206,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6466, -1.4720, -2.5503, -0.5625, -0.2958, -0.2553, -0.1849, -0.3169,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0246e-01, -1.9011e+00, -1.3194e-01, -1.2987e-01, -2.7320e-01,
        -2.3413e-01, -2.0347e-01,  1.9818e-02, -5.5358e-02, -8.9344e-02,
         4.6703e-02,  2.1039e-01, -1.3294e-01, -7.6090e-02, -4.1786e-03,
        -6.4461e-02,  4.6013e-02,  3.0683e-02,  5.3263e-02,  5.5321e-03,
         7.5425e-02, -1.4520e-01, -3.0490e-01, -2.5946e-01, -1.5747e-02,
        -1.5109e-01, -6.3072e-02, -3.7341e-02, -5.4589e-01, -3.0808e-01,
        -4.2162e-03, -3.8113e-01, -5.6825e-02, -7.2982e-01,  1.3792e-02,
        -2.4014e-01, -2.2057e-01, -2.1000e-02,  7.4117e-02,  2.1645e-02,
        -3.9145e-02, -8.0856e-02,  1.4151e-03,  5.2299e-02,  1.2705e-01,
        -9.9529e-02,  7.9009e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3020, -1.5007, -0.2130, -1.0980, -0.3549,  0.8825,  0.2769, -0.0579,
         0.0718, -0.7220, -0.2178,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4063,  2.3597,  0.7296,  0.6370,  0.1637, -0.0567,  0.2140,  1.0970,
        -0.0846,  0.3321,  0.1002, -0.2823,  0.5109, -0.0135, -0.2616,  0.1681,
        -0.0646, -0.5667,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0218, -0.7502, -2.1974, -0.1260,  0.3008,  0.1130, -1.7531,  0.1182,
        -0.4083, -0.0942,  0.2208, -0.0499,  0.0740,  0.4595,  1.0430,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5798e-01, -1.4830e+00,  7.1716e-02, -3.8112e-01,  3.4553e-02,
        -5.4539e-01, -9.1465e-01, -2.6398e-01,  6.2549e-04, -4.7552e-01,
        -5.0772e-01,  1.0738e-02, -3.6602e-02,  6.9303e-02,  4.1627e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5386,  0.0653,  0.0637,  0.1694, -0.0696, -0.0384, -0.0062,  0.0981,
        -0.2415, -0.1664, -0.1436, -0.6744, -0.8430, -0.1352, -0.0146,  0.0768,
        -0.3071,  0.6744, -0.1156,  0.0850, -0.5877, -1.2119, -0.0126,  0.0903,
        -0.0025, -0.0306,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3917, -0.5564, -0.1600,  0.1950,  0.2389, -0.2004, -0.0495, -1.1562,
        -1.3998, -0.2001, -0.0955,  0.2595,  0.1386,  0.0396, -0.0855,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1637, -2.6644,  0.1713, -0.3468, -0.4498, -0.3055, -0.5030,  0.1292,
        -0.0937, -0.6312,  0.1235,  0.0302, -0.2529, -0.0536, -0.2754,  0.0400,
        -0.1875,  0.1670, -0.3520,  0.1848, -0.1440,  0.1417, -0.0157,  0.0650,
        -0.0767, -0.1023,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2594, -0.0637,  0.0339,  0.0094, -0.0053, -0.0996, -0.0067,  0.0287,
        -0.2933, -0.2965, -0.5858, -0.0500, -0.0534,  0.1277, -0.6088, -0.3525,
         0.1283, -0.1384,  0.0179, -0.2580, -0.0490, -0.1281,  0.0088, -0.1767,
        -0.0160, -0.0711,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.0624e-01,  2.6124e+00, -1.8930e-01, -1.1827e-02,  1.0632e-01,
        -1.1263e-02,  5.0958e-01,  5.9564e-01,  1.6975e-02,  2.7320e-01,
         1.2368e-01,  1.1073e-01,  4.2551e-01, -8.0237e-02,  2.5542e-01,
         5.4074e-02, -7.3098e-02, -2.8891e-02,  1.0045e-01, -2.5173e-02,
        -4.9308e-02,  1.6279e-01, -6.3628e-02,  3.0117e-01,  8.1691e-01,
        -1.5173e-01,  1.7862e-01,  1.1722e-02, -1.8213e-03, -5.5867e-02,
         4.1973e-03,  4.8682e-01,  9.9583e-03,  4.8135e-02, -6.8900e-02,
        -8.8758e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3335, -0.7180, -0.4523, -0.1132, -0.4472,  0.0188, -0.5561, -0.7189,
         0.0024,  0.1085, -0.1868, -0.0172, -0.0338, -0.0137, -0.0619, -0.1502,
         0.0958, -0.0352,  0.1629,  0.0401, -0.0877, -0.3681, -0.0742, -0.1558,
        -0.3399, -0.5352,  0.0825, -0.2691, -0.4852, -0.1353, -0.1230, -0.5560,
        -0.4355, -0.0065,  0.0207, -0.2297,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2916,  0.0556,  0.3000, -0.2344,  0.0049, -1.0968, -0.1132, -0.0392,
         0.0547,  0.0655, -0.1674, -0.4043, -1.0526,  0.5931,  0.0846,  0.0362,
         0.0033,  0.0592, -0.0828,  0.1086,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2363, -0.7542, -0.1947, -0.4307, -0.0864,  0.0861, -0.0190, -0.0754,
         0.0174, -0.2405, -0.0678, -0.0239, -0.0326, -0.0417, -0.3464, -0.2542,
        -0.0406, -0.1534,  0.0192,  0.0090, -0.0466, -0.0228,  0.0280,  0.1048,
        -0.0331, -0.0440, -0.0187,  0.0551, -0.0573, -0.3104, -0.2677, -0.0994,
         0.0280, -0.3320, -0.1844, -0.1632, -0.0270,  0.0349,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1366, -1.4695, -0.9386, -0.4411, -0.0777, -0.0311, -0.0934,  0.0959,
         0.0378,  0.0260, -0.1332, -0.3169,  0.0257,  0.0415, -0.0201, -0.2654,
        -0.0288, -0.2320, -0.0031,  0.0416,  0.0176,  0.0548,  0.0373,  0.0152,
         0.0408, -0.0536, -0.0808, -0.1525, -0.2923,  0.0227, -0.0544, -0.0820,
         0.0778,  0.0105, -0.2389, -0.0137, -0.0644, -0.0347, -0.0248, -0.0370,
        -0.0176], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2708, -3.3846,  0.1084, -0.2511,  0.0858, -0.6126, -0.1458, -0.0677,
        -0.0729,  0.0187,  0.0034, -0.5687, -0.6736,  0.0326,  0.0075, -0.4936,
        -0.0331, -0.4927, -0.0087, -0.0734,  0.1089, -0.1243,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3057e-01, -1.2742e-02,  1.6776e-02,  1.9176e-02,  1.3149e-01,
        -1.2712e-01,  1.0800e-02,  6.2853e-02,  3.0335e-03,  1.4998e-03,
         6.2749e-02, -3.0178e-01,  2.7743e-02, -1.5649e-01,  3.1015e-02,
        -4.4186e-01,  1.9215e-02, -2.5608e-01,  1.3262e-02, -3.3829e-01,
        -5.4677e-01, -3.6189e-02, -7.0670e-02, -3.2373e-01,  9.5302e-03,
        -1.3056e-02, -3.4834e-02, -1.4870e-01, -4.9424e-01,  3.0295e-02,
        -2.4271e-01, -2.9464e-01, -3.7521e-05,  2.8434e-02,  5.3143e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6386, -0.2677, -1.4399, -0.1034, -0.3790, -0.2132,  0.0623, -0.1171,
         0.0600,  0.0510, -0.2691,  0.0480, -0.0425,  0.1260, -0.0151, -0.2121,
         0.0875, -0.1782, -0.2149,  0.0520, -0.1316,  0.1361, -0.0335, -0.3423,
         0.0374,  0.0512, -0.3916, -0.0487,  0.3396,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1135e-01, -2.5777e+00, -4.0026e-01, -2.7933e-01, -9.2122e-02,
        -3.9470e-01, -1.0122e-01, -6.4708e-02, -2.5319e-01,  2.6611e-02,
        -2.3371e-01, -4.5174e-01,  6.7745e-02, -2.2769e-02, -3.8169e-02,
        -2.5702e-01, -2.0612e-01, -6.6911e-04, -1.2980e-01,  1.1176e-01,
         4.3334e-02, -1.5487e-02,  1.7169e-02,  1.6887e-01,  1.0712e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0544,  0.0291, -0.1084,  0.0310, -0.9303, -0.0140, -0.0697, -0.5044,
        -0.6311,  0.0386, -0.1027,  0.0915, -0.0483,  0.0043, -0.2953, -0.3959,
         0.0595, -0.1512,  0.0266,  0.0372, -0.0267, -0.0814, -0.1787,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0624, -0.0353,  0.0079,  0.0162, -0.0143,  0.0943, -0.0030,  0.0384,
         0.0082, -0.0127,  0.0438, -0.0292, -0.2186, -0.6753,  0.0580, -0.0654,
         0.0139, -0.1836, -0.0191,  0.0572, -0.0961, -0.0171,  0.0693, -0.0753,
        -0.0669,  0.0452, -0.5408,  0.0416, -0.3585, -0.0717, -0.3551, -0.0292,
        -0.1656, -0.1248, -0.3844, -0.0219, -0.1301, -0.0942,  0.0270,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2536,  0.1328,  0.2950,  0.9391,  1.6576, -0.0826, -0.0091,  1.0464,
         1.1237,  0.1363,  0.2084,  0.3026,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.6980e-01, -5.3284e-01, -1.3647e+00, -2.1427e-02, -7.3858e-02,
        -4.2391e-02, -1.8812e-01,  1.9588e-01, -5.7779e-02, -3.2535e-01,
        -9.8827e-03,  1.0012e-01, -1.2703e-01,  1.0706e-01, -3.7920e-01,
         6.9826e-02, -1.7813e-01, -3.3762e-01, -5.3585e-03, -4.2948e-02,
        -1.2956e-01, -2.1401e-01,  2.2215e-02, -1.1885e-03, -1.4033e-01,
         2.4037e-02, -6.1947e-03, -2.1082e-01,  3.9639e-01, -2.4982e-01,
         5.5550e-02,  4.9343e-02, -3.2799e-01, -2.3880e-03, -2.7276e-03,
        -1.2521e-01, -2.6568e-01, -1.6230e-02,  2.7656e-02, -2.9478e-02,
         1.3916e-02,  2.1215e-02,  1.2337e-02, -2.1554e-01, -7.5589e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3585,  4.5470,  0.6721,  0.8975, -0.3459, -0.2054,  0.0237,  0.1941,
         1.1366,  0.2192,  0.1554, -0.0997,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5275, -5.8372, -0.2477, -0.6992,  0.0590,  0.6491,  0.0356,  0.5841,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7782e-01, -1.2808e+00, -2.7129e-01,  2.2473e-01, -3.8692e-01,
        -1.1456e-02, -8.2382e-02, -1.9639e-01, -2.5870e-01, -7.7540e-01,
        -1.8923e-01, -7.1386e-02, -2.5390e-02, -5.3376e-03,  3.4120e-03,
         3.4759e-02, -4.9109e-02, -1.5543e-02,  3.7946e-02, -2.4748e-02,
         4.0818e-03,  5.8629e-02, -6.7801e-02,  5.1631e-02, -5.1614e-02,
        -1.6634e-02,  1.9718e-02, -1.8053e-02, -1.5940e-01,  1.8580e-02,
        -4.0307e-02, -6.9260e-02, -2.1894e-02,  2.8366e-02, -1.0502e-02,
        -2.6038e-04, -9.3891e-03,  1.3545e-02,  2.7591e-03,  5.2812e-02,
         4.1468e-02, -1.6796e-02, -2.1829e-03, -3.0148e-02, -2.8966e-02,
        -7.0134e-02, -5.6813e-01, -1.1922e-01, -5.3956e-01, -5.5090e-01,
        -1.3267e-01, -5.3056e-02, -2.1142e-01, -2.1087e-02, -8.7664e-02,
        -7.7494e-03, -1.4411e-01,  3.8417e-02,  2.2439e-02, -9.8891e-03,
         1.9274e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1396, -0.0093, -0.0256, -0.2914, -0.3693, -0.0214, -0.3979, -0.0451,
         0.0314,  0.0910, -0.0580, -0.5024, -0.0347, -0.2739, -0.5347, -0.0586,
        -0.0757,  0.0188, -0.1129, -0.1206, -0.3691, -0.3498,  0.0659, -0.0909,
        -0.1911, -0.1654, -0.0852, -0.0718, -0.1456, -0.0824,  0.0125, -0.0876,
         0.0864,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1930,  0.3861,  0.9602,  0.6745,  0.1758,  0.1372,  0.3661, -0.2189,
        -0.0486,  0.0294,  0.0952, -0.0477,  0.1437,  0.0591,  0.0153, -0.1348,
         0.0337,  0.4554,  1.3175, -0.0671, -0.0314,  0.2785, -0.0892, -0.0379,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2020, -0.4478, -0.2859, -0.1555, -0.0519, -0.4460,  0.0356, -0.1784,
         0.0134, -0.3106, -0.3794, -0.3383, -0.5344, -0.0173,  0.0519,  0.0563,
         0.0755, -0.1012,  0.0701,  0.1715, -0.0803, -0.0954,  0.1559, -0.0955,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7188,  0.0202, -0.4879, -0.4575,  0.0649,  0.0767, -0.0156, -0.4077,
        -0.0059, -0.0873,  0.0587,  0.0516, -0.0216, -0.0864, -0.3116, -0.5242,
        -0.3158,  0.0238, -0.0337, -0.1803, -0.3985, -0.0201, -0.2157, -0.5782,
         0.0037, -0.0260, -0.1808, -0.1409,  0.0143, -0.2425, -0.1126, -0.1353,
         0.1879, -0.0106,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4018e-01, -1.3019e-02,  1.9360e-01,  6.5763e-04, -2.2426e-01,
         3.1527e-02, -3.7088e-02,  1.1212e-01, -2.4013e-02, -7.2626e-02,
        -2.6400e-01,  1.5541e-01,  1.4884e-01, -9.4907e-01, -2.0732e-01,
        -4.7637e-02, -1.0426e+00, -4.1612e-02, -3.2357e-01,  3.0747e-02,
        -3.9056e-01, -1.0934e+00,  1.4961e-01,  1.4193e-02, -1.2573e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2527,  0.1196, -0.2044, -0.1127, -0.0112,  0.2603, -0.7442, -0.3422,
        -0.6485, -1.6446,  0.3608, -0.1738, -0.1991, -0.2244, -0.0853, -0.2302,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8219, -1.6689,  0.1164, -0.6729, -0.1916, -0.4457, -0.4776, -0.1120,
         0.0397,  0.0335, -0.0495, -0.2122, -0.2295,  0.0405,  0.1537,  0.0115,
        -0.1466, -0.0707, -0.2958, -0.0272,  0.1124, -0.1349,  0.1044,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3189, -0.0242,  0.2545,  1.2302,  2.4197, -0.2641, -0.3209,  0.0809,
         0.3499, -0.0025, -0.2940, -0.2694, -0.0204,  0.1225,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.2629, -0.5947, -0.7677, -1.2769, -0.2850,  0.0337, -0.3839,  0.0159,
        -0.3832, -0.1486, -0.6559, -0.6077, -0.1248,  0.0078, -0.0970,  0.1418,
         0.0537, -0.7197,  0.1050, -0.5784, -0.1417,  0.0124,  0.0215, -0.0514,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0705,  0.0775, -0.4390, -0.0084, -0.5273,  0.3547, -1.0673, -0.2053,
         0.0354, -0.0291,  0.1175,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2688,  0.0429,  0.3699,  0.0415,  0.1041, -0.1341, -0.9424, -0.0988,
        -0.5323, -0.1512, -0.2435, -0.0585, -0.2793,  0.0261, -0.3324, -0.0898,
        -0.1392, -0.5469,  0.0168,  0.0181, -0.0094,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4621,  0.2409,  0.0768, -0.0614, -0.4446,  0.0270, -0.0489, -1.9317,
        -0.2948, -1.0065, -0.1191,  0.1150,  0.0096, -0.3360,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5955,  0.1958,  0.0909,  0.0513,  0.1975, -0.0085,  0.0650,  0.0281,
        -0.0168,  0.3423,  0.0094,  0.2867,  1.9705, -0.0933,  0.1213,  0.0152,
         0.2541, -0.0422, -0.1028,  0.2059, -0.1155,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3391,  3.3592, -0.1094,  0.3620, -0.5431,  0.8138,  0.2182,  0.6190,
         0.0469,  0.3678,  0.0206,  0.2212, -0.7247,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1374, -1.1254, -0.0780,  0.0130, -0.0700, -0.1212, -0.1912, -0.8796,
        -0.1617, -0.1459,  0.0018,  0.0862,  0.0781, -0.0302, -0.0159, -0.0926,
         0.0020, -0.0116,  0.0406,  0.1372, -0.0293,  0.0191, -0.0884, -0.0726,
        -0.4164,  0.1576, -0.4600, -0.2554, -0.2596, -0.0943, -0.0253, -0.0104,
        -0.0173,  0.0041, -0.0363,  0.0242, -0.5489, -0.0192, -0.0111, -0.0748,
        -0.1906, -0.0054, -0.0112,  0.0885, -0.0691], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4196, -0.2968, -0.4244, -0.1973,  0.0819,  0.1528, -0.0673, -0.3565,
        -0.0373,  0.2090, -0.5124, -0.1542, -0.1000, -0.1276, -0.4213,  0.1995,
        -0.8873, -0.0229,  0.1286, -0.0209, -0.0582, -0.8444,  0.0704, -0.4583,
         0.1316,  0.1337,  0.0563, -0.1958, -0.3799, -0.1400, -0.0474, -0.1437,
        -0.3662, -0.0522,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0752,  0.8431,  0.1842, -0.5384, -0.2093, -0.1313,  0.0879, -0.1053,
         0.7919,  0.0223, -0.0923,  0.0363, -0.0618, -0.0457, -0.0645,  0.2251,
        -0.0748,  0.2364,  0.0849, -2.2478, -1.9961,  0.0880, -0.0759,  0.0885,
        -0.0936, -0.3688,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4766, -2.5956, -0.0834, -0.4649,  0.1981, -0.0265, -0.1821, -0.3207,
        -0.1000, -0.1535,  0.1036,  0.1192,  0.0214,  0.1138, -0.1114, -0.8686,
        -0.7508,  0.0082, -0.1690, -0.0264, -0.1404, -0.0719,  0.1574,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0109,  1.1870,  0.0754,  0.4582,  0.0290, -0.0202, -0.0392,  0.4208,
         0.7167,  0.1235, -0.0577, -0.0078, -0.0585,  0.0522,  1.5388, -0.0053,
        -0.1356,  0.2863,  0.4066,  0.0533,  0.0804,  0.0249, -0.0456, -0.2070,
         0.0029, -0.0577,  0.2824,  0.6756,  0.1118,  0.0402,  0.0217, -0.0454,
        -0.0907, -0.1499,  0.0501, -0.1154,  0.1297, -0.1434, -0.0096, -0.1808,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1764, -0.0425,  0.0112, -0.2121, -0.4726, -0.0373, -0.2108, -0.3424,
        -0.0733, -0.0443, -0.0448, -0.0348, -0.1818,  0.0618, -0.1679, -0.2784,
        -0.2273,  0.0211, -0.0101, -0.2912, -0.0780, -0.2530, -0.0915, -0.0735,
         0.0118, -0.0610, -0.0045, -0.2621, -0.0270, -0.0701, -0.2321, -0.0578,
        -0.0262,  0.0947,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.0777, -0.1994, -0.1879, -0.0362, -0.5460, -0.0223,  0.1218,  0.0224,
        -0.0318,  0.0743, -0.0089, -0.0558,  0.2706, -0.0210, -0.9803,  0.1520,
         0.1060, -0.0444, -0.2733,  0.0154, -0.0338,  0.0216, -0.0787,  0.0849,
        -0.2583, -0.4671,  0.0330,  0.0344,  0.1228, -0.0116, -0.0353, -0.0678,
         0.0174,  0.1117, -0.1779,  0.0481, -0.1572, -0.2723, -0.0865, -0.2759,
        -0.1647, -0.0997, -0.2572, -0.0366, -0.0242,  0.0056, -0.0084,  0.0229,
        -0.0281, -0.0189, -0.0116,  0.0070], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3428e-01, -7.5494e-03,  4.1010e-02,  7.4436e-02, -1.1977e+00,
         5.9156e-02,  1.0282e-01,  9.6903e-02,  9.4179e-04,  4.7103e-02,
         1.6312e-01,  9.6305e-02, -1.9360e-01, -1.3441e+00, -2.7063e-01,
        -3.9660e-01, -9.8859e-01, -9.1796e-01, -5.4815e-01, -5.9247e-05,
        -8.1746e-02, -8.2772e-02, -1.8220e-01, -8.4585e-01,  1.0946e-01,
        -4.2175e-02,  1.1213e-02,  1.0222e-03,  1.3389e-01,  1.3515e-01,
         8.8810e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9422e-01, -8.6887e-01,  6.0408e-02, -1.8542e-01, -3.4231e-04,
        -5.5053e-02, -1.9195e-01, -4.8717e-02, -3.2792e-02,  2.9870e-01,
         3.6587e-03, -3.2120e-01, -1.0677e-01,  5.1359e-02, -3.1200e-01,
        -3.5655e-01, -5.8837e-02, -5.0555e-01, -3.4103e-01, -6.0573e-01,
        -8.4940e-02, -1.6013e-02, -7.0954e-02,  1.3303e-01, -3.2298e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2031, -0.1814, -0.0227,  0.1135,  0.4106,  1.2334,  1.8660,  0.3296,
        -0.1543,  0.2377,  0.2515,  0.3137,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0340, -1.8351, -0.1254, -0.1732, -0.4825, -0.0779,  0.0038,  0.0494,
         0.0107, -0.0111, -0.0519, -0.0625, -0.4131, -0.5093, -0.0192, -0.4301,
        -0.0673, -0.0323, -0.0195, -0.0927, -0.1605, -0.0302, -0.0736, -0.0881,
        -0.0298, -0.3148, -0.0952, -0.0617, -0.0497,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7173e-02,  4.5537e+00, -3.7771e-01, -7.3087e-01, -1.6010e-01,
         1.2387e+00,  2.0318e+00, -6.2400e-01,  3.9759e-02, -2.9270e-01,
         2.9077e-04, -1.3002e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2734, -2.3326,  0.0373, -0.7490,  0.3453, -0.9954, -1.0695, -0.2198,
        -0.3324,  0.1076,  0.2038, -0.2241,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2055,  0.5370, -1.3044, -0.0527, -1.0052,  0.0039,  0.0139,  0.1494,
        -0.8555,  0.2332, -0.0645, -0.8399, -0.0957, -0.1281, -0.1081, -0.5636,
         0.0810,  0.0986,  0.0723, -0.3219,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1149e-01, -1.7592e+00,  6.7155e-03, -5.4469e-01, -1.0337e+00,
        -1.7804e-02, -2.1497e-01, -1.7798e-03, -9.3973e-02,  1.1368e-03,
        -2.6859e-02, -3.8303e-01, -2.1455e-01, -8.4190e-02, -7.5743e-03,
         1.2570e-03, -1.8315e-01,  5.4447e-02,  4.2171e-02, -5.2007e-02,
        -1.9594e-01,  1.1315e-01, -5.6787e-02, -3.8739e-02,  3.9277e-02,
         3.6834e-02,  2.1592e-01, -2.4927e-01, -3.0312e-01, -4.1180e-03,
         4.0881e-02, -1.0757e-02, -1.8160e-01, -5.4518e-01, -3.2088e-02,
         1.2954e-01, -1.5026e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8420,  4.0579,  0.1377, -0.3638,  0.1418,  0.0781,  0.0257,  0.3238,
        -0.0423,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2364,  0.0868, -0.1798, -1.0774, -0.2466, -0.0192, -0.0846, -0.4670,
         0.1181, -0.1104, -0.0773, -0.2456, -0.6045, -0.7540, -0.1315, -0.0429,
         0.1390, -0.0145, -0.3285, -0.5130,  0.1675,  0.0217, -0.0676,  0.0964,
        -0.0662,  0.3719,  0.0339,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8054e-01, -2.2618e+00, -1.8946e-01, -4.6089e-01,  3.2852e-02,
        -1.8964e-01,  1.4563e-01,  1.3656e-01, -5.9716e-03,  1.6473e-01,
         5.4768e-02, -4.0861e-01,  4.2976e-02,  7.5999e-02,  2.7936e-02,
         9.5346e-02, -2.7687e-01, -1.2265e-01, -3.8013e-04,  6.5781e-03,
         3.9992e-02, -4.4146e-01, -8.3256e-01,  7.9735e-02, -5.7554e-03,
        -4.6090e-02, -1.1165e-01, -2.3160e-01, -6.9874e-01, -1.2431e-02,
        -6.7820e-02, -4.0609e-02,  1.2033e-01,  1.1508e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.2575,  0.0161,  0.0568, -0.0282, -0.3507,  0.0898,  0.0212,  0.0236,
         0.0722,  0.0415, -0.0037, -0.4260,  0.0856, -0.0071,  0.0290, -0.1266,
        -0.2922,  0.1212, -0.1077, -0.0468,  0.0678,  0.1102, -0.2150, -0.1216,
        -0.0130,  0.0400, -0.0153,  0.0097, -0.0243, -0.0610,  0.0565, -0.3575,
        -0.4421,  0.1301, -0.2054, -0.2539, -0.4845, -0.0081, -0.1626, -0.0560,
        -0.1021, -0.2279, -0.0043,  0.1246, -0.0150,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3048,  0.0342,  0.0635,  0.0402, -0.0620, -0.6684, -1.2155, -0.0705,
         0.1822, -0.8457, -0.0066, -0.0976, -0.4507,  0.1254, -0.0035, -0.2259,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7118,  0.0320,  0.0349, -0.0753, -0.0584, -0.0119, -0.0965, -0.3191,
        -1.0441, -0.9088, -0.2978, -1.2864, -0.0312, -0.3700, -0.6201,  0.0100,
        -0.1614, -0.0510,  0.0746, -0.0674, -0.0338,  0.0611, -0.0304, -0.0594,
        -0.1977,  0.2340, -0.0831,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1986,  0.0785, -0.4433, -0.0130, -0.1991, -0.0088, -0.1800, -0.0478,
        -0.0530, -0.0159,  0.0180, -0.0217, -0.1266,  0.0131, -0.1688, -0.0289,
        -0.0990,  0.0120, -0.0137, -0.0874,  0.0783, -0.1135, -0.0635, -0.3368,
        -0.0697, -0.0490, -0.1034, -0.3736,  0.1022, -0.2180, -0.0217, -0.0969,
        -0.2068, -0.0614,  0.0049, -0.1825, -0.1084, -0.2086, -0.0465,  0.0033,
        -0.1487, -0.1590, -0.0288, -0.0189,  0.1290,  0.1881], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1033,  0.2717,  0.1452,  0.0627,  0.0671, -0.1498,  0.0410, -0.8725,
        -0.0574, -0.0548, -0.0405,  0.0728,  0.0663, -0.0554, -0.0121,  0.0193,
        -0.0442, -0.0993, -0.5626, -1.6687, -0.0805, -0.0226, -0.0393, -0.0292,
        -0.0063, -0.0397, -0.3382, -0.1304,  0.0764,  0.1054,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.8084,  0.0612,  0.1841, -0.1549, -0.2743,  0.1911,  0.3451,  2.5120,
         0.6616, -0.0406,  0.1916,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0124, -2.3677, -0.1658,  0.0889, -0.2493, -0.0216, -0.1826, -0.3237,
        -0.0135,  0.0749,  0.0549,  0.0360, -0.0786, -0.0211, -0.1504, -0.3848,
        -0.0710, -0.1627, -0.0955,  0.2548,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6587, -2.8322, -1.0127, -0.6236,  0.4813, -0.2025, -0.5943,  0.1809,
        -0.1771,  0.1076, -0.0422,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4344, -2.5041, -0.7265, -1.2743,  0.0834,  0.1494,  0.0141, -0.2202,
        -0.5164,  0.0459,  0.0956, -0.0095,  0.0473,  0.0448,  0.0036, -0.3364,
         0.1130,  0.0867, -0.0765, -0.0844,  0.4170,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3372e-03, -1.4242e+00, -1.2574e+00, -2.9042e-01, -9.7363e-04,
        -1.4134e+00,  4.8279e-01,  1.4745e-01, -3.9084e-01,  2.3863e-01,
        -3.9285e-02, -3.0230e-01,  9.7386e-02, -4.7685e-02, -1.3134e-01,
        -1.5740e-01, -1.0265e-01,  1.7187e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3431e-02, -4.0928e-01, -1.2214e+00, -2.7974e-02, -9.2793e-02,
         3.9363e-02,  1.2647e-03, -3.1099e-02, -2.8875e-02, -5.9868e-02,
         1.1269e-02, -5.7782e-03, -3.3152e-02, -3.3924e-01, -3.2676e-01,
         2.3346e-02, -1.4731e-01, -3.0051e-01, -8.4457e-02,  1.6902e-02,
        -1.2648e-01, -3.4091e-01, -3.6163e-01, -1.5286e-02, -1.8516e-01,
        -1.1180e-02, -2.0303e-01, -3.1008e-02, -1.1343e-01, -3.0929e-01,
        -3.9747e-02, -6.2595e-02,  6.9416e-02,  4.5789e-03, -7.1405e-02,
        -9.0772e-04,  3.2387e-03,  4.0584e-02, -4.5551e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3141, -3.0898, -0.1071, -0.4586, -0.0218, -0.0151,  0.2066,  0.1555,
        -0.1592, -0.0578, -0.0913, -0.8709,  0.1249,  0.0232,  0.1430, -0.0253,
        -0.3891,  0.0561,  0.0268,  0.0051, -0.0446,  0.0250, -0.0161,  0.0521,
         0.0148, -0.0106, -0.0435,  0.1193, -0.2681, -0.5230,  0.0197, -0.0606,
         0.0877, -0.1854,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.0546,  0.3865, -0.3217, -1.2418, -0.0226, -0.6343, -0.1061, -0.1308,
        -0.3494,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3280, -1.1281, -0.0141, -0.3568, -0.0735, -0.4779,  0.0301, -0.4521,
        -0.1744, -0.0424, -0.4105, -0.1337, -0.0268, -0.0159,  0.0365, -0.2937,
        -0.1264, -0.5204, -0.0129,  0.0501, -0.0636, -0.0492,  0.0379, -0.0044,
        -0.3694,  0.0154, -0.0696, -0.0122,  0.0413,  0.0217, -0.1229, -0.3254,
         0.0554, -0.3314,  0.0500,  0.0205, -0.0022, -0.0296,  0.1305,  0.0806,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6733e-01, -3.3741e-01, -1.1795e+00, -3.3506e-02, -2.9654e-01,
        -1.8757e-01,  1.0023e-02, -1.1025e-01, -3.2733e-01, -3.1802e-01,
        -1.2410e-02, -1.2069e-02,  3.5238e-03, -5.8371e-03,  3.5604e-02,
         4.2129e-02,  4.3047e-02,  7.2156e-02, -4.4799e-02, -7.0492e-04,
        -2.1580e-01,  3.9712e-02, -5.5445e-03, -8.2072e-02, -5.3833e-02,
        -1.0233e-01, -2.5410e-01,  4.8186e-02, -2.2848e-01, -5.1389e-01,
         3.4414e-02, -6.9834e-02, -1.2229e-02, -8.2823e-03,  2.0379e-02,
         3.2960e-02, -1.8843e-01, -3.4834e-01, -2.8639e-02, -3.9604e-02,
         2.2458e-02,  9.2265e-03, -4.9368e-03,  1.7424e-02,  4.7696e-03,
         3.7267e-02, -2.1500e-02, -1.0360e-03, -5.0181e-02,  2.7663e-02,
        -8.6937e-02,  1.4355e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4014e-01, -2.8965e+00, -1.9700e-01, -2.5909e-01, -3.5837e-01,
         2.4259e-04, -5.2571e-01, -1.8020e-01, -4.3119e-01, -4.1854e-02,
         1.8275e-01, -1.1505e-02, -6.4238e-01, -3.5458e-02, -2.4731e-01,
        -5.2745e-01, -2.4950e-01, -1.5099e-01, -3.9902e-01, -1.8251e-02,
        -2.6950e-02, -2.2978e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2645,  5.7085,  0.3840,  0.0367, -0.2440,  0.0232,  0.0099,  0.7213,
        -0.0633, -0.0815,  0.1438,  0.4025,  0.0564,  0.2250,  0.0525,  0.3745,
         0.2519, -0.0191, -0.0396, -0.0843, -0.0510,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2396e-01, -8.4991e-01, -1.0355e-01, -4.9523e-02,  4.3195e-02,
         2.7038e-02, -8.5699e-02,  7.8562e-02, -1.8613e-01,  3.3965e-02,
         1.6691e-02, -3.0126e-02, -1.4622e-02,  3.5431e-02,  5.7277e-03,
        -7.2439e-03,  5.8821e-02,  2.2186e-02, -2.0039e-01, -3.1765e-03,
         2.5525e-02,  1.7704e-03, -2.7619e-03,  1.4951e-03, -2.4391e-04,
        -1.5397e-02, -1.2933e-01, -3.0717e-01, -2.9899e-02, -2.3925e-01,
        -6.7373e-01, -3.4321e-01, -1.3195e-02, -1.9509e-01, -3.8778e-02,
        -2.6285e-02,  4.3123e-02,  6.0599e-02,  4.1394e-02, -2.4119e-03,
        -4.2513e-01,  2.1950e-02, -2.1025e-02, -1.0827e-02, -1.2381e-01,
         3.0450e-03, -1.8156e-01, -5.5218e-02, -5.6420e-02,  7.6337e-02,
        -1.3112e-01, -1.3703e-02,  4.9899e-02, -7.7786e-02, -2.0435e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0090, -0.0285,  0.0141, -0.1090, -0.0755, -0.1859, -0.4667, -0.0990,
         0.1336,  0.0222, -0.0175,  0.0500, -0.0596, -0.0893, -0.2671, -0.0651,
        -0.1680, -0.1645, -0.0154, -0.1823, -0.1697, -0.1231, -0.1329,  0.0145,
         0.0570, -0.0293, -0.0240, -0.4228, -0.1152, -0.0280,  0.0939,  0.0088,
        -0.1085, -0.5574, -0.0173, -0.1190, -0.0490, -0.2001, -0.2074, -0.2315,
         0.0100, -0.0669, -0.1768, -0.3767, -0.0640,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5223, -1.7848, -0.4944, -0.4971,  0.1818, -0.1097, -0.0909,  0.0311,
        -0.4833,  0.0150,  0.1939, -0.3128,  0.0079, -0.3053, -0.4763, -0.0467,
        -0.1323, -0.2107, -0.0396, -0.2336, -0.2829, -0.0917, -0.1106,  0.1959,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0332,  0.0952,  0.0466, -0.0249, -0.0249, -0.0594, -0.3957, -0.2255,
         0.1569, -0.0050,  0.0117, -0.0452, -0.0570, -0.0847,  0.0730,  0.2454,
        -0.2966, -0.2084, -0.2745, -0.4169, -0.0367, -0.0575, -0.2934, -0.3614,
         0.0384, -0.1785, -0.2017, -0.0266, -0.1494, -0.4095, -0.1375,  0.2774,
        -0.3160,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9653,  4.0250,  0.1549,  1.0533,  0.0744,  0.1759, -0.1239,  0.0734,
         1.0069, -0.0337,  0.1801,  0.1009, -0.1887,  0.6977, -0.0094,  0.3897,
         0.0718, -0.0657,  0.3446,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0981,  0.0814,  0.1450, -0.0088, -0.4605, -0.0553, -0.2111, -0.5674,
        -0.7108, -0.0065,  0.0553, -0.0383,  0.0553,  0.0916, -0.0250,  0.0209,
        -0.0482, -0.2612, -0.1625, -0.0592, -0.1996, -0.3333, -0.0516,  0.0242,
        -0.0150,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0603, -1.2313, -0.7299,  0.1198, -0.1572, -0.5660, -1.0706,  0.2406,
         0.0418, -0.2206,  0.2386,  0.0964, -0.2358,  0.0502, -0.0254, -0.3530,
         0.4070,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.4522,  0.0042, -0.2093,  0.0303, -0.0130, -0.0095, -0.0247, -0.0244,
        -0.1649, -0.7717, -0.2020, -0.0423,  0.1221, -0.0610, -0.0103,  0.0063,
        -0.7746, -1.0429,  0.0246, -0.4070, -0.0434, -0.6119, -0.7165, -0.0392,
         0.0640,  0.0383, -0.0137,  0.0344,  0.0775, -0.1052, -0.0494,  0.1893,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1859, -0.6383, -0.8411, -0.1131, -0.0344, -0.1023, -0.2087, -0.7157,
        -0.0205,  0.0413,  0.0649,  0.0195, -0.0281,  0.0041, -0.1159,  0.1023,
        -0.1657, -0.0847, -0.4090, -0.0236, -0.4681, -0.0868, -0.4140, -0.8376,
        -0.0513, -0.0618, -0.5737, -0.1092,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2815, -0.6629, -0.0841, -0.2216, -0.2348, -0.6880, -1.0854, -0.4301,
         0.0135, -0.0501, -0.0865, -0.4870, -0.0802, -0.0031, -0.0834, -0.4423,
        -0.0690, -0.4171, -0.1395, -0.0116, -0.0390,  0.1162, -0.0426, -0.1404,
        -0.2555,  0.1524,  0.1246,  0.1531, -0.0495,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1037, -0.6999, -0.0704, -0.3480, -0.8889, -0.1535, -0.0132,  0.0229,
        -0.0575,  0.1509, -0.0139, -0.1256, -0.3241, -0.5164,  0.2044, -0.3394,
         0.0889, -0.6878,  0.0314, -0.1037, -0.0248, -0.0318, -0.0951, -0.0253,
        -0.1227, -0.2122, -0.2597, -0.0425,  0.0219,  0.0092, -0.0145, -0.0417,
         0.0881,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2935, -1.5155,  0.0385, -0.2459, -0.0202, -0.2582,  0.1219, -0.2308,
        -0.0803, -0.3797, -0.2427, -0.0363,  0.0252, -0.0508,  0.0283,  0.0448,
        -0.1140,  0.0709, -0.4378,  0.0628, -0.2184,  0.0144, -0.1378,  0.0036,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0255, -1.3050, -0.9546,  0.0171, -0.3345,  0.0297, -0.0833, -0.3875,
         0.1454, -0.2782, -0.0063, -0.1688, -0.3032, -0.0376,  0.0165, -0.0289,
         0.0736, -0.2615,  0.0437, -0.8000, -0.0753,  0.1135,  0.1741,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2440, -1.1058,  0.1382, -0.0243, -0.1015, -0.1827,  0.0406,  0.0313,
         0.0111, -0.0149,  0.0179, -0.0269, -0.0928,  0.0277,  0.0199, -0.0445,
        -0.4200, -0.2203, -0.0844,  0.0472,  0.0219, -0.1980, -0.0192, -0.1092,
        -0.2829, -0.0927, -0.0481,  0.0053, -0.0206, -0.2361, -0.1958, -0.0791,
        -0.1931, -0.1874, -0.1700, -0.2049, -0.0031,  0.0568, -0.0390,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1273, -0.4230,  0.0231, -0.2087, -0.8377, -0.0417, -0.0357,  0.0320,
         0.0288, -0.0720,  0.0604,  0.0347, -0.0310, -0.0245, -0.0014, -0.3655,
        -0.2530, -0.0333, -0.0336,  0.0891, -0.3839, -0.0136, -0.0502, -0.0632,
        -0.5856, -0.7822, -0.0489, -0.1548,  0.0272, -0.2826, -0.1193,  0.0204,
         0.0334, -0.0271, -0.0269,  0.0301, -0.0993, -0.3969,  0.0558, -0.0389,
        -0.1133,  0.1571, -0.0552,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0628e-01, -2.2625e+00, -1.8205e-01, -1.6086e-02,  5.3502e-02,
         4.0163e-03, -5.7959e-02, -8.4617e-02, -5.6747e-01, -2.0282e-01,
         8.2307e-02,  1.1004e-02, -1.3412e-01, -3.9557e-02,  5.3966e-02,
        -2.1396e-02, -2.0999e-01, -3.3865e-01, -8.2428e-02, -3.1487e-02,
         2.5353e-02,  3.5270e-03, -5.7790e-02, -2.3285e-02,  7.7354e-02,
        -2.5134e-02,  1.2802e-02, -1.3294e-02,  6.6030e-04, -6.0437e-03,
         4.8286e-02, -1.8300e-01, -2.5904e-01,  1.0163e-01, -1.6649e-02,
         4.3801e-02, -1.9819e-01, -2.4555e-01, -2.3111e-01,  5.0047e-03,
        -1.8303e-01, -3.2449e-01, -2.9658e-02,  8.5325e-02, -3.5759e-02,
        -4.5078e-03, -9.2642e-03,  1.9434e-02,  1.8280e-02, -3.4972e-02,
         8.5886e-02, -1.8740e-02,  3.6377e-02, -3.8848e-02,  6.2022e-02,
        -2.4653e-01,  1.0258e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7781e-01, -3.4329e+00,  4.6184e-01,  5.5350e-02, -4.4843e-02,
        -1.8777e-03, -2.8856e-01, -4.9377e-01, -1.5473e-01,  8.8916e-03,
        -5.9025e-01,  1.1449e-01, -3.8515e-03,  1.2798e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6136, -1.5208, -0.2785,  0.1849, -0.0902, -0.1973, -0.0613,  0.0727,
        -0.1814, -0.1463, -0.2825, -0.8316,  0.0772, -0.1265, -0.2565,  0.0511,
        -0.1635, -0.2403, -0.0180, -0.2450, -0.4029, -0.0255, -0.1342,  0.0269,
         0.1040,  0.1247,  0.0325,  0.0826, -0.0967,  0.0269,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1266, -1.4527, -0.1256, -1.2135, -1.0171,  0.4232,  0.1034, -0.3189,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.1196, -2.0151, -0.2612,  0.0080, -0.1378, -0.5120,  0.0626, -0.0299,
         0.0802,  0.0415,  0.0659, -0.0563, -0.3501, -0.3529, -0.0593,  0.0155,
        -0.0130,  0.0342, -0.0271,  0.0600, -0.0219,  0.0066,  0.0206, -0.1219,
        -0.3600, -0.0531, -0.0941,  0.0147, -0.2597, -0.5284,  0.0189,  0.0491,
        -0.0672,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4819e-01, -1.8795e+00, -8.6148e-01, -2.6499e-01,  4.9282e-03,
        -1.4260e-01, -1.8222e-01, -1.6448e-01,  7.6820e-02, -2.4935e-01,
         3.4239e-02, -4.0962e-02, -1.1341e-01,  8.3442e-02,  1.0496e-01,
        -3.0929e-02,  5.9812e-02,  1.0633e-02, -8.9189e-02,  2.0486e-02,
         1.7915e-02,  8.5938e-03, -3.9774e-03,  1.1998e-02,  1.1222e-01,
        -4.9352e-02, -1.0809e-01, -3.7894e-01,  2.4459e-02, -2.8875e-02,
         1.7024e-02, -4.9308e-02, -1.9840e-01, -3.4432e-01,  7.9127e-02,
         4.8386e-02, -7.9893e-02,  8.3706e-02, -4.2892e-05,  3.3278e-02,
        -6.8929e-05, -3.4927e-03, -7.7298e-02, -2.7838e-01, -3.0145e-02,
        -2.2936e-02,  3.5311e-02,  7.0128e-03,  2.9157e-02, -1.2420e-04,
         4.4120e-02, -1.7808e-01, -3.4622e-02, -8.0628e-02, -3.0978e-02,
         1.5405e-03,  8.9337e-03, -3.7161e-03,  3.2370e-02, -2.1255e-01,
        -2.0958e-02, -6.3386e-02, -4.0940e-01,  6.2542e-02,  1.5681e-02,
        -7.4395e-04, -5.2091e-02,  7.1108e-02,  3.8160e-02, -4.4012e-02,
         1.2626e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1836e-01,  1.0362e-01, -4.8687e-02, -3.0042e-02, -3.0031e-01,
        -9.8147e-02, -4.6206e-01, -8.8228e-02,  3.4757e-03, -3.8496e-03,
         8.4678e-04,  4.3543e-04,  3.5361e-02, -1.6542e-02,  8.4075e-02,
         3.5886e-02,  3.1384e-02,  5.9454e-02,  1.3167e-01, -5.9638e-01,
        -1.4201e-02,  5.5323e-02,  2.1881e-01, -5.2652e-01,  1.0124e-01,
        -3.7562e-01, -7.1234e-02,  1.2851e-01,  6.7712e-03, -6.5139e-03,
         8.9435e-02, -1.0764e-01, -4.1833e-01, -5.6057e-01, -1.3152e-01,
        -9.0555e-02, -5.0487e-01, -5.6501e-02,  3.9819e-02,  2.9969e-02,
        -4.1367e-02,  4.1484e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6909e-02,  4.1938e+00, -1.7071e-01,  2.9706e-01,  7.2433e-02,
        -1.2497e-01, -2.3179e-01, -9.2689e-02,  4.6324e-01,  9.9287e-01,
         1.8526e-01,  3.6136e-01,  4.9731e-01, -2.6926e-04, -1.0635e-01,
         1.1332e-01,  6.7014e-02,  9.1940e-01,  8.6716e-01,  7.1020e-02,
         2.5180e-01,  3.1100e-01, -4.8176e-02, -1.0568e-01,  4.3674e-01,
         1.0683e+00, -1.2639e-01,  3.6402e-02,  1.2087e+00,  1.1516e-01,
         5.3896e-03,  2.2279e-01, -2.3867e-02,  3.1572e-02,  2.6836e-01,
        -1.3266e+00,  6.3997e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1999,  5.0981,  0.2889,  1.2418, -0.3087, -0.2058, -0.1326, -0.3650,
        -0.1079, -0.1092,  0.3018, -0.2687,  0.2394,  0.0716,  0.2715,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3728, -0.5381,  0.0477, -0.1435, -0.6710, -1.1690,  0.0368, -0.5946,
        -1.4329, -0.2713, -0.1588, -0.1116, -0.3010, -0.0262,  0.3897,  0.2164,
        -0.2412, -0.2662, -0.2909,  0.5864,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1076,  0.0201,  0.0160,  0.0258, -0.5092, -0.0866, -0.0012, -0.0512,
        -0.0205, -0.0775, -0.2318, -0.2924, -0.0587, -0.0436, -0.1609, -0.1679,
        -0.0363, -0.0795,  0.0212, -0.1600, -0.0134,  0.0154, -0.0287, -0.0124,
         0.0124, -0.2690, -0.3040,  0.0518, -0.0119, -0.1283, -0.2923, -0.0591,
         0.0094, -0.0193, -0.1279, -0.1711,  0.0401, -0.1352,  0.0016, -0.0127,
        -0.0372,  0.0146, -0.1144,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5915e-01, -8.2274e-02, -2.5822e-01, -5.4973e-02, -4.0960e-01,
        -2.0181e-01, -3.0752e-01, -6.4279e-01, -1.5346e-01, -1.0887e-02,
         1.1086e-02,  2.2533e-02,  6.8884e-02, -3.7492e-01, -7.2628e-01,
        -6.0530e-03,  1.6813e-02,  1.1888e-01, -5.8015e-04, -1.8451e-01,
        -3.9328e-01, -6.3749e-02,  2.7934e-02,  1.4328e-02, -1.4618e-01,
        -9.3806e-02,  3.7809e-02,  2.4587e-02, -7.0002e-01, -2.4325e-02,
        -1.6120e-02, -3.3543e-01, -5.1878e-02, -3.3430e-01, -3.7513e-01,
        -2.9954e-02, -6.1456e-02,  6.4939e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2376, -2.2814, -0.5452, -0.7938,  0.1122, -0.4708, -0.1099,  0.1541,
        -0.2283, -0.3825, -0.1105,  0.0241, -0.2544,  0.0420,  0.1521, -0.1845,
         0.1837, -0.4781, -0.0271,  0.0793,  0.0678, -0.0506,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0296, -0.7874, -0.0168,  0.0953, -0.0577, -0.1710, -0.0796,  0.1235,
         0.0397, -0.4724, -0.0280,  0.0626, -0.0802,  0.0503, -0.1245, -0.2561,
        -0.4724, -0.1592, -0.0107, -0.2815, -0.0081,  0.0588, -0.2223, -0.0307,
        -0.0677, -0.0067, -0.0046,  0.0517, -0.0041, -0.1530,  0.0179, -0.0634,
        -0.0130, -0.0679, -0.1520, -0.3487,  0.1108, -0.0100, -0.0543, -0.1795,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0168, -0.0400,  0.0346,  0.0571, -0.0393, -0.2522, -0.0492,  0.0299,
        -0.2850,  0.0012,  0.0232, -0.0332, -0.1042, -0.6995, -0.6529,  0.0626,
        -0.0094,  0.0806, -0.5678,  0.0703,  0.0057, -0.0518, -0.0293, -0.0121,
        -0.0474, -0.3470, -0.0362, -0.3033, -0.4447,  0.0268, -0.1452, -0.0672,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3254, -2.5505, -0.2082, -0.0400, -0.0415, -0.0503, -0.1605, -0.4794,
        -0.5954,  0.0103, -0.0112, -0.1619, -0.1279,  0.0348, -0.0246, -0.1423,
        -0.1883, -0.1403,  0.0066,  0.0236, -0.1038,  0.0490, -0.0192, -0.3606,
        -0.0444,  0.2402, -0.2610, -0.0753,  0.0512, -0.0356,  0.0231, -0.0727,
        -0.0304,  0.1387, -0.2030,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.0461, -0.3385, -0.6881, -0.7264, -0.0451, -0.1073, -0.9002, -0.7186,
        -0.2362, -0.9099,  0.0168, -0.1429,  0.0553, -0.2176, -0.0933, -0.0667,
        -0.1472, -0.0465,  0.0368,  0.1371,  0.1299,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0211, -1.5435, -0.0591, -0.4216, -0.7479, -0.2317, -0.1238, -0.3398,
        -0.5187,  0.1025, -0.0658,  0.0663, -0.0482, -0.1241, -0.2301, -0.0131,
        -0.2973,  0.1661,  0.1221,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0817e-01, -1.2471e-01, -8.2261e-01, -1.0329e-01, -4.0510e-01,
        -7.5047e-02,  7.5347e-03,  1.1622e-01,  2.7259e-05, -4.8702e-01,
        -7.2376e-01, -2.0311e-01, -3.2059e-01, -7.0059e-01, -4.7476e-02,
        -2.2268e-01,  3.0258e-02, -3.5209e-01,  2.9798e-01, -2.7903e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0121, -2.1840,  0.0941, -0.4118, -0.0090, -0.0224,  0.0821, -0.2354,
        -0.0933, -0.1308,  0.0814, -0.0999,  0.1320, -0.5358, -0.1894, -0.2481,
        -0.4721, -0.0465, -0.1090, -0.2967,  0.0460,  0.2291,  0.2160,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1465, -1.4748, -0.6844, -0.2123,  0.0753, -0.3298, -0.0345, -0.2740,
        -0.5687, -0.6297, -0.1223, -0.3481, -0.0197, -0.3808,  0.0535, -0.0053,
         0.0233, -0.0711, -0.0747,  0.0607, -0.1390, -0.0594,  0.1075, -0.2715,
         0.1482,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4547e-01, -2.6853e+00, -3.5005e-01, -1.3298e-01, -6.2941e-02,
        -9.6247e-03,  4.2395e-04, -5.3753e-03,  2.2348e-01,  6.0184e-02,
        -5.7795e-01,  3.7240e-02, -8.3950e-02, -4.1883e-02, -9.7954e-02,
        -4.6386e-02,  7.6714e-03, -6.1096e-03,  7.3303e-02, -1.8937e-01,
        -8.5961e-01, -3.2173e-01, -3.9557e-01, -1.3474e-01, -1.3341e-01,
        -1.1791e-01, -5.9078e-02, -2.1367e-02, -1.0622e-01, -2.9308e-01,
        -1.6539e-01,  4.5965e-02, -1.1100e-01, -8.7933e-02,  3.3937e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2463, -0.0032, -0.0597, -0.5049, -0.1239,  0.0667, -0.4479,  0.1030,
        -0.6128, -0.0061, -0.0693, -0.1983, -0.0028, -0.5646, -0.6111, -0.8020,
        -0.1066, -0.5270,  0.0788,  0.0443, -0.0942,  0.1783,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4819, -0.1059, -0.0069,  0.1602,  0.0867,  0.3061, -0.1131,  0.1198,
         0.2260,  1.9691, -0.0997,  0.2304, -0.1790, -0.0803,  0.0495,  0.1179,
         0.2871,  0.0612, -0.0317,  0.1547,  0.3138,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4160, -2.7876, -0.2897, -0.1901, -0.1565,  0.0217, -0.3457, -0.6660,
         0.2131,  0.0575, -0.1486, -0.0329, -0.0436, -0.6554,  0.0444, -0.5422,
         0.2835,  0.1711, -0.2702,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2431e-01,  9.7302e-02,  9.1492e-04,  4.4914e-02, -8.7416e-03,
        -3.9897e-01,  3.7303e-02, -5.5128e-02, -7.1436e-03, -3.9073e-02,
        -3.2282e-02, -1.2799e-01, -5.6471e-02, -1.0551e-01, -8.9208e-03,
         3.5223e-03, -1.2484e-01, -2.9018e-01, -5.7498e-02, -1.2746e-01,
         3.9975e-04, -1.1028e-01, -1.5395e-01, -8.3846e-03, -3.0153e-01,
        -3.6695e-02, -1.5192e-02, -1.5924e-01, -6.7952e-02, -4.1979e-01,
         2.3557e-02, -3.9360e-02, -2.0222e-01, -3.6719e-01, -1.3056e-02,
        -2.7938e-02, -4.0886e-02,  4.2904e-02, -1.5977e-02, -1.4432e-01,
        -1.4777e-02,  4.1079e-02, -6.5743e-03, -1.9553e-02,  1.7214e-02,
        -1.2639e-02,  5.9322e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1582, -1.5548, -0.6045, -0.0343,  0.0497, -0.0104, -0.0282,  0.0941,
        -0.2719,  0.1489,  0.0552,  0.0395,  0.0185, -0.4495,  0.1184,  0.0742,
        -0.2298,  0.0976, -0.0180,  0.0181,  0.0111, -0.3456, -0.6200, -0.3934,
         0.0291,  0.0944, -0.1582, -0.0206, -0.0282,  0.0703,  0.0499,  0.0634,
        -0.2322,  0.2057,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4952e-01, -2.4134e+00, -1.0616e-03,  9.5803e-02, -1.5507e-01,
        -3.4643e-01, -1.4397e-01, -3.0280e-02, -4.7697e-03, -2.5712e-01,
         2.7376e-02,  1.6994e-02, -4.8354e-01, -7.1772e-01, -3.1567e-02,
        -5.2595e-03, -1.2033e-01, -1.7946e-01, -2.5405e-02, -5.2999e-01,
         6.8689e-03, -2.0591e-02, -9.7582e-03,  5.4486e-02,  1.1193e-02,
         3.6714e-02, -1.3024e-02, -1.1019e-01, -3.2791e-02, -1.5471e-01,
         2.0856e-02, -7.1071e-02,  9.5681e-02,  1.6663e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-2.0417e-01, -2.5060e+00, -3.7384e-02, -1.3867e-01, -2.4214e-01,
        -6.4532e-01,  5.3736e-02,  5.5814e-02,  2.1225e-03, -6.9365e-01,
         1.8691e-01, -2.7023e-01, -4.0018e-01, -9.6784e-01, -2.6172e-01,
         2.9912e-02,  1.7667e-01, -1.2721e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2031, -0.0764, -0.0649, -0.0735, -0.0648, -0.0437,  0.0320, -0.1011,
        -1.1708, -1.8746, -0.3566, -0.1358, -0.0869, -0.1025, -0.0217, -0.1882,
        -0.0202,  0.0805, -0.0382, -0.9384,  0.0257,  0.0832, -0.1627,  0.0333,
        -0.0345, -0.0231, -0.1253,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1527, -0.0339, -0.0802,  0.0438,  0.0465,  0.0394,  0.0123, -0.1595,
        -0.6869,  0.1822,  0.0832, -0.2158, -0.9127, -0.2527,  0.0124, -0.0435,
        -0.0028, -0.0833, -0.0647, -1.5332, -0.0875, -0.0113, -0.3835, -0.1169,
        -0.0890, -0.1893,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1443, -2.3089, -0.3845, -0.6204, -0.8245, -0.1923,  0.0203, -0.0228,
        -0.3723, -0.1850, -0.4104, -0.2732,  0.2517,  0.0534, -0.0934, -0.6108,
        -0.2933, -0.3082, -0.0615, -0.0779,  0.4825,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2468, -1.1319, -0.8877, -1.1033, -0.2736, -0.0476, -0.4287,  0.0088,
        -0.1258,  0.1242,  0.0847, -0.3765,  0.0518, -0.0220,  0.0469,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3603,  0.1851, -0.0247, -0.0879, -0.0955, -0.0224, -0.1326,  0.2772,
        -0.1824, -0.0786, -0.2713,  0.0733,  2.6948,  0.8248,  0.1064,  0.1584,
        -0.1046,  0.2442,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5429, -1.5125, -0.6403, -1.0333, -0.1351, -0.0471,  0.1433, -0.2593,
        -0.0942, -0.4348, -0.5764, -0.2519,  0.2092, -0.2536, -0.0540,  0.0784,
         0.0617,  0.0932,  0.4427,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0962e-01, -7.0644e-01, -6.6463e-01,  5.1633e-02, -3.1689e-01,
        -1.4449e-01,  1.7781e-01, -6.2785e-02, -9.6979e-02,  1.1167e-03,
         8.5744e-03, -3.6763e-02, -1.2804e-01,  3.4596e-02,  7.7954e-03,
         7.5185e-02, -3.5786e-01, -1.2490e-02,  4.4264e-02,  6.6131e-02,
        -2.8793e-01,  6.4429e-02, -2.1370e-01, -4.6629e-02, -3.2923e-01,
         2.8615e-02, -1.8396e-01,  7.7885e-02, -2.9466e-02, -9.2573e-02,
        -1.7279e-04, -1.1557e-02, -1.9671e-01,  1.6309e-02, -1.9166e-01,
         8.2557e-03, -3.1070e-01, -5.3434e-02, -5.2799e-02, -2.4977e-03,
         6.8163e-03,  1.3978e-03,  8.0000e-02,  3.2055e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1787, -1.7623, -0.2397, -0.5735, -0.7063,  0.0361, -0.1500, -0.0320,
        -0.6018, -0.0626, -0.1014,  0.0253,  0.1007,  0.0062,  0.0043,  0.0059,
         0.0885, -0.2104, -0.1803, -0.2616, -0.2016,  0.0157, -0.0051, -0.0256,
         0.0227, -0.0378,  0.0167, -0.3144, -0.0597,  0.0394, -0.0269, -0.1858,
        -0.0628, -0.0315, -0.0157,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6965e-01, -2.0496e+00, -1.1795e-01, -4.7436e-01, -8.2374e-02,
        -1.1681e-01, -6.0245e-01, -3.9928e-01, -2.1024e-02, -1.3426e-02,
        -2.9430e-03, -2.4026e-01, -8.0071e-01,  2.9347e-02,  1.5091e-03,
        -4.1733e-01,  4.2030e-02, -3.6254e-02,  7.5591e-03, -1.2960e-02,
         4.8883e-02,  3.8975e-02,  8.4589e-02,  3.4056e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4949, -1.6968, -0.0760, -0.3890, -0.2115,  0.0072,  0.1783, -0.0064,
        -0.6625, -0.1166, -0.2320,  0.1174, -0.0735, -0.2152, -0.4601, -0.0735,
         0.1038,  0.1304, -0.3974, -0.0464,  0.0073, -0.3169,  0.0703, -0.2264,
        -0.0565, -0.3320, -0.1765, -0.0871, -0.0499, -0.0378, -0.1555, -0.0064,
         0.1002,  0.0640,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3708e-01, -4.9841e-04, -1.6821e-01,  1.7894e-03,  4.2589e-02,
        -4.5813e-02, -8.4226e-01,  8.5103e-02,  1.0075e-01,  2.5375e-02,
         1.2621e-03,  8.2717e-02, -5.6451e-01, -1.0475e-01, -5.3199e-01,
        -7.7800e-01, -1.4727e-01, -2.5694e-02,  2.8305e-02,  5.7966e-02,
        -5.0827e-02, -4.3021e-02,  6.7819e-03, -2.1413e-01, -1.7345e-01,
        -8.2256e-02, -1.8569e-01, -2.5626e-01, -3.8282e-02, -6.3937e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-1.6999e-01, -1.9672e-01,  4.8364e-01,  1.9087e+00,  1.7223e-01,
        -1.7958e-01,  2.9780e-02,  3.6352e-01,  6.2816e-01,  1.2763e+00,
        -7.5851e-02, -4.2865e-04, -7.6687e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0825, -0.0128,  0.2310, -0.0576,  0.1001, -0.1669, -0.4102, -0.8638,
        -1.1375, -0.0788,  0.1012, -0.4941,  0.0439, -0.2515, -0.4239,  0.2118,
        -0.0482, -0.1615,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0137, -1.2303, -1.3822, -0.1316, -0.5658,  0.0593, -0.3432, -0.1350,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0946, -1.1013, -0.0459, -0.1764, -0.1151, -0.0312, -0.0864, -0.1022,
        -0.0217, -0.1516, -0.0177,  0.0986, -0.1387, -0.0244, -0.0153, -0.0318,
         0.0130,  0.0156,  0.0121, -0.0030,  0.0635, -0.0920, -0.3162, -0.1204,
        -0.0709, -0.0964,  0.0160, -0.0588, -0.4074, -0.1606,  0.0342, -0.3956,
        -0.0561, -0.6040, -0.0314, -0.1428, -0.2111, -0.0573,  0.1172,  0.0620,
        -0.1209, -0.1000, -0.0958,  0.0453,  0.1523, -0.2773,  0.2077],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3079, -0.7180, -0.0537, -2.6600, -0.9664, -0.2019,  0.3224, -0.1718,
         0.2551,  0.4778,  0.1027,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4486,  3.7474,  0.2479,  0.2626, -0.0664, -0.0053,  0.3514,  1.3610,
        -0.3019,  0.5477,  0.2111,  0.1382,  0.5123, -0.1681,  0.3135,  0.0434,
        -0.0327, -0.3508,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0442,  0.7693,  3.2674,  0.2387,  0.0221, -0.1880,  1.2571,  0.0538,
         0.4672,  0.3709,  0.1621, -0.1336, -0.0379, -0.0073, -0.0270,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8554, -2.8838, -0.4898, -0.0638,  0.1695, -0.3320, -1.2723, -0.2027,
         0.5208, -0.7418, -0.7498, -0.0546, -0.1360,  0.4027,  0.0868,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2158,  0.0225, -0.0681,  0.0283, -0.0635,  0.0012,  0.0678,  0.0357,
        -0.4874, -0.2767, -0.0786, -0.6492, -0.8360,  0.0081,  0.0169, -0.0444,
        -0.2768, -0.1471,  0.1301,  0.1274, -0.4549, -0.8318,  0.0127,  0.1418,
        -0.2165, -0.0238,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3542,  0.4685,  0.1639, -0.0025, -0.1960, -0.4423,  0.0319,  1.7425,
         1.8730, -0.2847,  0.1594, -0.1340,  0.1264,  0.1777,  0.4342,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3977,  2.7274,  0.0376,  0.1492,  0.2814,  0.2041,  0.3862, -0.0925,
        -0.0238,  0.7265,  0.0191, -0.0316,  0.2006,  0.0627,  0.0212, -0.0379,
         0.2052,  0.0336,  0.3089, -0.0402, -0.0245, -0.0929, -0.1742, -0.0352,
        -0.0823,  0.0813,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5182e-01, -6.5670e-02,  1.2732e-01,  1.6583e-01, -1.0470e-01,
        -1.3701e-01, -8.0889e-05,  1.3297e-02, -1.8063e-01, -3.0629e-01,
        -9.0036e-01, -1.8122e-01, -5.3469e-02, -1.1665e-01, -7.5338e-01,
        -6.8342e-01, -6.9630e-02, -1.7505e-01, -4.0312e-02, -5.3962e-01,
        -4.0239e-01, -2.6685e-01, -2.3866e-01, -2.2495e-01, -2.2626e-01,
         4.4125e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.9483e-01, -4.1808e+00,  7.6534e-02,  2.2110e-01, -3.2074e-01,
        -7.7154e-02, -1.8986e-01, -8.0995e-01, -7.7044e-02, -2.4080e-01,
        -4.7892e-02, -2.4921e-01, -5.5769e-01,  1.3044e-01, -1.5845e-01,
         1.6834e-03,  5.5267e-02,  1.6019e-01, -1.1777e-01, -2.3468e-02,
        -1.4129e-02, -3.5038e-01,  5.0067e-02, -3.1429e-01, -6.2076e-01,
         1.9692e-01, -2.9539e-01, -6.2841e-02, -1.0861e-02,  6.9575e-02,
         4.9262e-02, -3.8480e-01, -2.5848e-02,  5.0038e-02, -1.1313e-01,
         1.7322e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4783,  0.9214,  0.4715,  0.1621,  0.4625, -0.0013,  0.5155,  0.6186,
        -0.1294,  0.0052,  0.4873,  0.0643,  0.0718, -0.1442,  0.1187,  0.2469,
        -0.0260,  0.0095, -0.0420, -0.1311,  0.4033,  0.3356, -0.0345,  0.0445,
         0.2467,  0.4214, -0.0647,  0.3890,  0.3132,  0.0513,  0.0683,  0.2763,
         0.3943, -0.1179,  0.1698, -0.1179,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0678, -0.2562,  0.0203, -0.0188,  0.2601, -0.9384, -0.1260, -0.0516,
         0.0047, -0.0571, -0.1660, -0.4267, -1.0394,  0.2462,  0.0053, -0.1878,
        -0.0096,  0.0037, -0.0195,  0.3481,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6544, -1.2786, -0.1888, -0.2008,  0.1499, -0.0510, -0.2638, -0.7996,
        -0.1894, -0.2988,  0.0148, -0.0427, -0.0041, -0.0957, -0.5956, -0.4526,
        -0.1934, -0.2926, -0.0998, -0.0890, -0.0428,  0.0039,  0.0239,  0.1165,
        -0.0963,  0.0572,  0.0065,  0.0165, -0.0388, -0.3309, -0.5549, -0.3260,
         0.0831, -0.5095, -0.0546,  0.0462, -0.2103,  0.1756,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3292, -1.5528, -0.8976, -0.7695, -0.1340,  0.0156,  0.1018,  0.0658,
        -0.0086,  0.0628, -0.2057, -0.3597, -0.0505,  0.0794,  0.0206, -0.1858,
         0.0428, -0.2832,  0.0644,  0.0474,  0.0431,  0.0345,  0.1247,  0.0362,
        -0.0434, -0.0672, -0.1290, -0.2188, -0.5281,  0.0271, -0.1576, -0.1353,
         0.0578,  0.0260, -0.2616,  0.0733, -0.0981, -0.1236,  0.0025, -0.1222,
        -0.0628], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0538, -4.4575, -0.1381, -0.0801, -0.2612, -0.7380, -0.1715, -0.0822,
        -0.1941,  0.0818,  0.0066, -0.4515, -0.7004,  0.1153, -0.0062, -0.3483,
         0.1252, -0.2792, -0.0355, -0.0264,  0.0898, -0.0371,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2051,  0.0573,  0.0434, -0.0140,  0.0363, -0.0645, -0.0202,  0.0607,
        -0.1280,  0.0494, -0.0523, -0.3497, -0.0025, -0.1292,  0.1135, -0.3856,
         0.0875, -0.4014, -0.1385, -0.4860, -0.7278,  0.1316,  0.1845, -0.4289,
         0.0588,  0.1087, -0.0314, -0.1945, -0.6646, -0.1289, -0.2976, -0.4290,
        -0.0033,  0.2238,  0.0764,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0142, -0.6410, -1.2093, -0.1444, -0.8496,  0.0040,  0.0843, -0.0445,
         0.0456, -0.0211, -0.1420,  0.1335, -0.1133,  0.1620, -0.0682, -0.0417,
         0.1836, -0.2629, -0.5759,  0.0143, -0.4078, -0.0224, -0.3029, -0.3533,
        -0.0903, -0.0905, -0.5465, -0.3879,  0.2002,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1240, -1.8772, -0.3435, -0.4021, -0.0583, -0.3124,  0.0945, -0.1542,
        -0.1199, -0.0465, -0.3086, -0.2427,  0.0250, -0.0284, -0.0451, -0.3824,
        -0.2137, -0.1607, -0.3077,  0.0212, -0.0029, -0.1043, -0.1690,  0.0681,
         0.0157,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0485,  0.1513,  0.0378,  0.2580, -0.5941,  0.1104, -0.1350, -0.6189,
        -0.7697, -0.0637, -0.0053,  0.0801,  0.0465,  0.1610, -0.6229, -0.4807,
         0.2874, -0.4008, -0.0868, -0.0385, -0.0638, -0.1762,  0.1985,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0578, -0.0105,  0.0018, -0.0527, -0.0471, -0.0669, -0.0256,  0.0119,
         0.0263, -0.0315,  0.0447, -0.0505, -0.5382, -0.5294, -0.0628, -0.0185,
         0.1654, -0.2537,  0.0357,  0.0072, -0.0694, -0.0245, -0.0075, -0.1099,
         0.0188, -0.0352, -0.8379, -0.0748, -0.6835, -0.1595, -0.3601, -0.0437,
        -0.0644, -0.1532, -0.2420, -0.0987, -0.1093,  0.0439, -0.3129,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1273,  0.1229,  0.5923, -1.2851, -1.3172, -0.3772, -0.4569, -1.2311,
        -0.6595,  0.4104,  0.2115, -0.6645,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 6.7218e-01, -1.0413e+00, -1.6300e+00, -2.1475e-02, -9.8463e-02,
        -1.1545e-01, -2.7227e-01,  9.2335e-02,  2.8640e-03, -7.7005e-02,
         5.2927e-02,  1.1641e-01,  2.4344e-02, -1.5984e-02, -1.1193e-01,
         4.1527e-03, -1.3962e-01, -3.1054e-01, -1.7456e-02, -6.2664e-02,
        -1.0729e-01, -1.3145e-01,  8.2920e-02, -2.6227e-02, -4.0891e-02,
         1.4268e-02,  7.0582e-02, -5.9364e-02, -4.5989e-02,  1.6138e-02,
         1.4829e-01,  1.1643e-01, -8.4094e-02,  6.2561e-02, -5.7316e-03,
        -3.2790e-02, -2.3762e-01,  7.6803e-04,  1.1081e-02,  6.1563e-03,
         6.0395e-02,  2.2027e-02,  1.6455e-02,  8.3570e-03,  2.1154e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0816e-01,  3.8128e+00,  3.6230e-01,  5.0216e-01, -7.7418e-01,
         1.2628e-01,  3.8297e-02,  3.5148e-01,  9.0270e-01,  9.9881e-02,
         3.0885e-03, -2.4737e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3392,  3.9558, -0.4753,  0.5904,  0.1303, -0.1293, -0.1092, -0.3212,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6635e-02,  1.8617e+00,  1.8023e-01, -3.3471e-02,  3.6187e-01,
        -2.5023e-04,  4.6087e-02,  1.1085e-01,  5.2813e-01,  7.9565e-01,
         2.7921e-01,  1.0847e-01,  1.1102e-01,  2.8350e-02,  2.0016e-02,
        -5.3064e-03, -1.2466e-01, -1.1245e-01, -3.9999e-02, -7.5244e-02,
        -1.0227e-02, -3.7361e-02, -1.4359e-02, -5.5033e-02, -3.6534e-02,
         1.5847e-01, -6.1127e-02, -5.2895e-02,  8.8246e-02, -2.1310e-02,
         5.4049e-03, -2.5554e-02, -3.6601e-02, -3.8472e-03, -2.2489e-02,
         1.3718e-02,  9.6612e-03, -5.1037e-02, -3.9540e-02, -1.0568e-02,
        -1.1120e-01,  4.0045e-02,  7.3174e-02,  4.7263e-02, -3.3227e-01,
        -4.5589e-01,  5.7494e-01,  1.2779e-01,  3.7912e-01,  9.2341e-01,
         4.2383e-02, -3.6851e-02,  2.0381e-01, -1.3204e-01,  1.4686e-01,
        -8.0631e-02, -9.1340e-02, -6.9044e-02,  4.3766e-02, -3.6213e-01,
         6.8157e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0655, -0.0779,  0.1520, -0.1791, -0.6004, -0.0586, -0.1589,  0.0855,
         0.0547,  0.0333, -0.0731, -0.3736,  0.0299, -0.3061, -0.2186,  0.0358,
         0.0156,  0.0362, -0.1713,  0.0554, -0.6717, -0.5399, -0.0503, -0.1992,
        -0.2634, -0.2797,  0.1165, -0.0484, -0.1325, -0.1933, -0.0616,  0.1217,
        -0.1757,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2459,  0.1498,  0.9377,  0.2902,  0.0827, -0.0289,  0.3574, -0.1840,
        -0.0982, -0.0319,  0.0247,  0.0263,  0.1868,  0.0535,  0.0896, -0.3376,
        -0.1174,  0.3871,  1.1568,  0.0176, -0.1484,  0.0129, -0.2230,  0.3384,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1289, -0.4964, -0.8710, -0.3718, -0.0606, -0.5188,  0.0205, -0.3994,
        -0.0372, -0.4638, -0.4771, -0.4925, -0.7132,  0.0069,  0.0401, -0.0769,
         0.0954, -0.1115,  0.0444,  0.0813, -0.2213, -0.1200,  0.0292,  0.1529,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3221, -0.2355, -0.5955, -0.5279,  0.0558,  0.1344, -0.1022, -0.4329,
        -0.0410, -0.0394,  0.0240, -0.0835, -0.0332, -0.0033, -0.3140, -0.6176,
        -0.4906,  0.0012,  0.0504, -0.1620, -0.6394,  0.1095, -0.1931, -0.4264,
         0.0351,  0.0019, -0.2440,  0.0202,  0.1175, -0.0859, -0.1845, -0.0759,
         0.1010,  0.0776,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4428e-01,  1.5546e-01, -1.0494e-01,  2.0137e-02, -3.2629e-01,
         2.0149e-02, -2.6312e-02, -1.3239e-03, -6.1138e-04, -6.6362e-03,
         1.4737e-01,  2.5623e-01, -1.3895e-01, -8.3714e-01,  1.6674e-01,
        -2.0411e-02, -7.8962e-01, -1.3172e-01, -3.6956e-01, -2.1165e-01,
        -4.2734e-01, -5.7705e-01,  1.6569e-01, -3.8271e-01, -1.3946e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6878, -0.0393, -0.2291, -0.0051, -0.1099, -0.0057, -0.7537, -0.5717,
        -0.5945, -2.0021,  0.2527, -0.2051, -0.2523, -0.0121, -0.0705,  0.2423,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1634,  1.6462, -0.0191,  0.9143,  0.3530,  0.4335,  1.0368,  0.1097,
        -0.4208,  0.0507, -0.0448, -0.1817,  0.1543, -0.0185, -0.0951, -0.0908,
         0.1719,  0.4352,  0.1766,  0.0815, -0.0537,  0.0269,  0.0100,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.1107e-01,  1.1384e-01,  1.4761e-02,  8.1748e-01,  2.4318e+00,
         5.1890e-01, -4.4877e-01, -2.3299e-02,  1.0777e-01, -9.7705e-02,
        -4.7262e-01, -2.0630e-01,  1.3837e-01, -1.9367e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.4552,  1.3866,  0.7638,  1.0766,  0.2055,  0.1803,  0.3093, -0.2686,
         0.2341,  0.1058,  0.8868,  0.7508,  0.3735,  0.0215,  0.0423, -0.3207,
        -0.0511,  0.6382,  0.0986,  0.3934,  0.1251,  0.1790, -0.0894, -0.1217,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3732,  0.0234, -1.0934,  0.0865, -0.9332,  0.0440, -1.6482,  0.1007,
         0.0325,  0.0034, -0.0040,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1945, -0.0854,  0.1863, -0.0160,  0.1473, -0.4994, -0.8480,  0.0839,
        -0.4253, -0.0700, -0.1821,  0.0674, -0.2248, -0.1441, -0.3733, -0.1930,
        -0.2936, -0.2605,  0.1071,  0.0481,  0.0252,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2575, -0.0079,  0.1376, -0.0029,  0.0600,  0.0937,  0.2983, -1.4989,
        -1.4925, -1.3953,  0.0072,  0.3489, -0.0526, -0.3955,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3663,  0.0470,  0.1525, -0.1180,  0.0561,  0.1064,  0.0301, -0.1633,
         0.1034, -0.2449,  0.5297,  0.2720,  1.9167,  0.1109,  0.0629,  0.1446,
         0.0372, -0.0130, -0.0453, -0.0044,  0.1575,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4024, -1.8752, -0.0155, -0.8126,  0.4457, -0.5153, -0.0578, -0.6740,
         0.0772, -0.3236, -0.1215, -0.2578,  0.1279,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0214, -0.8619, -0.1352, -0.0330, -0.0695, -0.0149, -0.2326, -0.6801,
        -0.0488, -0.0553,  0.0402,  0.1253, -0.0162,  0.0255, -0.0051, -0.2140,
        -0.0436, -0.0689,  0.0118, -0.0162, -0.0409, -0.0414,  0.0168,  0.0272,
        -0.8768, -0.0603, -0.3689, -0.3444, -0.2489,  0.0046, -0.0461,  0.0338,
         0.0273,  0.0066,  0.0300,  0.0377, -0.1188, -0.0034,  0.0333, -0.0485,
        -0.1311,  0.0157,  0.0044,  0.1018, -0.0130], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9935e-01,  4.9863e-01,  6.2610e-01,  1.3583e-01,  8.4230e-03,
        -3.1941e-02,  1.3870e-02,  3.2263e-01,  9.7048e-05, -4.5683e-02,
         6.4025e-01,  1.3883e-02, -1.7682e-02, -5.7776e-02,  4.2924e-01,
         9.4736e-02,  1.0251e+00,  1.3197e-01,  5.6513e-03,  3.4515e-02,
         6.1372e-02,  7.9833e-01,  8.2119e-02,  5.1658e-01, -6.3679e-02,
        -8.4657e-02,  2.4631e-01,  1.3504e-01,  4.6214e-01,  1.3874e-02,
        -3.8753e-02,  9.0294e-03,  2.1070e-01,  2.3243e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2189, -0.4226,  0.1921,  0.6302, -0.0365,  0.0195, -0.0555, -0.1021,
        -0.5147,  0.1130,  0.1284, -0.0483, -0.1416, -0.1594,  0.0317, -0.3244,
         0.0250, -0.1516,  0.2353,  3.0083,  1.6701, -0.0595, -0.2419, -0.1430,
         0.2101,  0.4484,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3644, -3.5611,  0.0220, -0.3945, -0.0358, -0.1918,  0.1138, -0.3180,
        -0.2517, -0.0222, -0.0336,  0.0548, -0.0282, -0.0913, -0.3428, -0.3813,
        -1.0370, -0.0962, -0.2132, -0.1774, -0.4118, -0.0350,  0.1834,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1536, -0.7698, -0.0039, -0.3085,  0.0197, -0.0148, -0.1417, -0.3264,
        -0.4737, -0.0593, -0.0061, -0.1039, -0.0579, -0.0171, -0.7331,  0.0759,
        -0.0731, -0.1399, -0.3163, -0.0936, -0.0879, -0.0184, -0.0072,  0.1572,
         0.0945, -0.1036, -0.4332, -1.0037, -0.1936, -0.1341,  0.0827, -0.0609,
         0.1057, -0.0037, -0.1462, -0.0823, -0.1485,  0.1583, -0.1638,  0.0894,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1325,  0.1510,  0.0216, -0.1431, -0.5535, -0.0846, -0.0191, -0.3691,
         0.0744, -0.0937, -0.0360,  0.0125, -0.1448,  0.0480, -0.1671, -0.5541,
        -0.4273, -0.0987, -0.1210, -0.2237, -0.0616, -0.3790, -0.3974, -0.1025,
        -0.0245,  0.0651, -0.0792, -0.3128, -0.0495, -0.0953, -0.2933, -0.0271,
        -0.3296, -0.3793,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.2418, -0.3331, -0.3257, -0.1768, -0.6184, -0.0410,  0.1598, -0.0768,
         0.0130,  0.0268, -0.0633, -0.2089,  0.1813, -0.1290, -0.7895,  0.0279,
         0.0743, -0.0467, -0.3473, -0.0113,  0.0222,  0.0950, -0.0262,  0.1230,
        -0.2546, -0.6376, -0.1527, -0.1749, -0.0043,  0.0713,  0.1201,  0.0076,
         0.0442, -0.0088, -0.5867, -0.0407, -0.1651, -0.2327, -0.0442, -0.2340,
        -0.1921, -0.1015, -0.1421, -0.0340, -0.0212,  0.0018, -0.0116, -0.0010,
        -0.0850, -0.0653, -0.2130,  0.0257], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0449, -0.0918,  0.0120,  0.0285,  0.8982, -0.0251, -0.0534, -0.1191,
        -0.1400,  0.0291, -0.0200, -0.1133,  0.1069,  0.9876,  0.2000,  0.2434,
         1.2224,  0.0555,  0.6607,  0.1419,  0.1641, -0.0570,  0.0935,  0.6121,
         0.0385, -0.0032, -0.1046,  0.0722,  0.0393, -0.0022, -0.0304,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2376, -2.5599, -0.0867, -0.2687, -0.0294,  0.0326, -0.0548, -0.0650,
        -0.0998,  0.3007, -0.0402, -0.5185, -0.0832, -0.1749, -0.4488, -0.3825,
        -0.0117, -0.5890, -0.6700, -0.7108, -0.1905,  0.0295, -0.0936,  0.1907,
         0.3294,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1619,  0.1168,  0.0958, -0.0883,  0.0905, -1.2181, -2.0482, -0.2361,
         0.3454, -0.2322,  0.1297, -0.3132,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2987e-01, -3.0032e+00, -1.8483e-01, -2.8719e-01, -5.3977e-01,
        -8.3210e-02,  1.7149e-02, -2.2882e-02,  6.0486e-02,  9.0862e-02,
        -3.7384e-03, -3.8954e-02, -3.3487e-01, -4.4468e-01, -4.3514e-01,
        -3.0242e-01,  6.6580e-02, -4.8922e-02,  3.5780e-02,  1.0259e-01,
        -8.5115e-02, -3.6434e-02, -2.0977e-01,  7.4331e-02,  1.2053e-01,
        -3.4929e-01,  5.6907e-02, -2.9574e-03, -8.1455e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1946,  4.2972,  0.2175, -0.2308,  0.0293,  0.8953,  2.3177, -0.0321,
        -0.1582,  0.1793, -0.2484,  0.3055,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5329e-01, -2.0881e+00, -4.3951e-01, -5.7007e-01,  1.6740e-04,
        -7.4994e-01, -8.6050e-01, -1.0100e-01, -3.6216e-01, -5.0453e-02,
         2.5108e-02,  4.5551e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6675,  0.5700, -1.5681,  0.1271, -0.5296, -0.1112,  0.0803, -0.1041,
        -1.3747, -0.1292,  0.1217, -1.0445, -0.1768, -0.1447, -0.2067, -0.8165,
        -0.0082,  0.0205, -0.0228,  0.3120,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0647,  1.6948,  0.1592,  0.5651,  1.1169,  0.2086,  0.2021, -0.0923,
         0.2964,  0.1305,  0.0398,  0.1825,  0.0515,  0.0740,  0.0551,  0.0028,
         0.1881,  0.0117,  0.0790,  0.1554,  0.3851, -0.2157, -0.1075,  0.1345,
         0.1721, -0.0744,  0.1504,  0.4254,  0.3839, -0.0452, -0.0612,  0.0515,
         0.3419,  0.7613, -0.0753,  0.1452,  0.0261,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0404,  4.9305,  0.2858, -0.0071,  0.2704,  0.0799,  0.2848,  0.1892,
         0.0245,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0483,  0.0897,  0.2494, -0.6282, -0.0116,  0.0255, -0.4234, -0.4036,
         0.0508,  0.0384,  0.0851,  0.0853, -0.3433, -0.7503, -0.0947, -0.0542,
         0.0513, -0.0933, -0.1325, -0.4005,  0.0044,  0.0796, -0.0444, -0.0218,
         0.1033,  0.1940,  0.0189,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6251,  2.4379, -0.1097,  0.7007,  0.0792,  0.1223, -0.0847, -0.0616,
        -0.0033, -0.1589, -0.1583,  0.4693,  0.1468, -0.0744,  0.0197, -0.0663,
         0.2071, -0.1074, -0.0198, -0.0238, -0.0982,  0.3650,  0.4583, -0.0197,
        -0.0156,  0.1814,  0.0732,  0.1771,  1.0352,  0.0618, -0.0564,  0.0060,
        -0.0473, -0.1306,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.0812,  0.0334,  0.0155,  0.0069, -0.3399, -0.0886,  0.0574,  0.0650,
         0.0678,  0.1530,  0.0409, -0.4603,  0.0149,  0.0451,  0.0351, -0.2560,
        -0.4103,  0.0180, -0.2233, -0.0373,  0.0703,  0.0144, -0.1895, -0.0513,
         0.0215,  0.0434, -0.0125, -0.0184,  0.0329, -0.0441, -0.0145, -0.5519,
        -0.7015, -0.0178, -0.0931, -0.2720, -0.2822,  0.0369, -0.0523,  0.0089,
        -0.1030, -0.3413,  0.0385,  0.1765,  0.0358,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3437, -0.0259, -0.1624, -0.1136, -0.1333, -1.1774, -1.4525, -0.1477,
        -0.1723, -1.0887, -0.2801, -0.3155, -0.6718,  0.0774, -0.0258, -0.0178,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8236e-01,  5.2153e-02,  1.1445e-01, -6.6024e-04, -9.4461e-02,
        -1.6456e-01, -5.7579e-03, -2.1301e-01, -9.3640e-01, -8.9119e-01,
        -2.7712e-01, -6.2711e-01, -8.0590e-02, -6.9736e-01, -4.1347e-01,
         4.3701e-02, -9.7516e-02, -1.8572e-03,  4.7920e-02, -7.9918e-02,
         4.1274e-02,  1.2439e-01,  1.5321e-02, -1.6981e-01, -3.2134e-02,
         7.6488e-02,  2.1385e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1041,  0.0636, -0.2794, -0.0225, -0.1329, -0.0164, -0.1371, -0.0123,
        -0.0887, -0.0148,  0.0153, -0.0645, -0.1527,  0.0174, -0.0673, -0.0221,
        -0.0961,  0.0154,  0.0667, -0.1143,  0.0290, -0.0870, -0.1189, -0.1321,
        -0.0431,  0.0131, -0.2345, -0.3379,  0.0774, -0.2284,  0.0486, -0.2174,
        -0.1519, -0.0115, -0.0584, -0.1177, -0.0480, -0.3412, -0.0451,  0.0036,
        -0.0950, -0.1565, -0.0468, -0.0133,  0.0846, -0.0225], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5562e-01, -2.4745e-01,  1.4846e-01, -2.8271e-01,  9.2055e-02,
         4.3538e-02,  2.4754e-02,  5.3661e-01,  1.3860e-01,  4.2631e-02,
         5.4676e-02,  7.7516e-02,  7.6059e-03, -1.5464e-01,  7.7116e-02,
         7.3760e-03, -1.8591e-03, -2.1807e-04,  5.8219e-01,  1.2553e+00,
        -1.0066e-01,  3.1386e-01,  4.2600e-03,  1.6282e-01,  9.3147e-02,
        -2.5341e-02,  5.9118e-01,  6.4306e-02,  5.4481e-02, -1.4648e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4369,  0.0250, -0.1379,  0.0556, -0.1509, -0.1861,  0.3686,  1.2894,
         0.3802,  0.2185,  0.5602,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6102, -2.2521, -0.1591, -0.2051, -0.2902, -0.1134, -0.2121, -0.6994,
        -0.0268, -0.3119, -0.0563, -0.0513,  0.2919, -0.0272, -0.0905, -0.5077,
        -0.0929, -0.3736, -0.0571, -0.4209,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0934, -1.7520, -1.1286, -1.0286, -0.1699, -0.4439, -0.8196,  0.3404,
         0.0843,  0.2403,  0.0387,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3274, -1.9243, -0.9299, -0.9075, -0.2661, -0.0706, -0.2672, -0.4214,
        -0.6666,  0.0299, -0.0724, -0.1307, -0.2189, -0.1233, -0.0890, -0.2412,
        -0.1123, -0.0316, -0.0848, -0.0298,  0.3713,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7317, -1.5920, -1.0520,  0.2513, -0.2925, -1.4405,  0.0384,  0.1423,
        -0.4717, -0.1537, -0.1455, -0.2641,  0.0790, -0.0297,  0.0244, -0.1074,
        -0.1491,  0.2580,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0145, -0.6079, -1.0104, -0.1491,  0.0177,  0.0355,  0.0763,  0.0332,
         0.0127, -0.0308, -0.0187,  0.0452,  0.0185, -0.2654, -0.3053, -0.0088,
        -0.1766, -0.3073, -0.1405, -0.0367, -0.0859, -0.2500, -0.5598, -0.0718,
        -0.1311,  0.0624, -0.2629, -0.0811, -0.0812, -0.3145, -0.0126, -0.0116,
         0.0266,  0.0163, -0.0426,  0.0312,  0.0118,  0.1527,  0.0329,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2341e-01, -2.4647e+00,  3.9193e-02, -5.7881e-01, -7.3586e-02,
        -5.7466e-03,  2.6529e-02,  3.0747e-02, -1.1313e-01, -1.7502e-01,
        -2.6082e-01, -5.6237e-01,  2.1707e-02,  3.3829e-03,  2.7635e-02,
         1.3748e-02, -2.2320e-01,  7.1601e-02, -7.9457e-02,  1.9593e-02,
        -2.6081e-02,  1.5477e-02,  2.8178e-02,  4.8644e-02,  4.5197e-02,
         2.5186e-02, -1.3106e-01, -3.1336e-02, -4.0236e-01, -8.4700e-01,
        -1.5977e-03, -1.3838e-01, -8.0753e-02,  5.9472e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.4316,  0.0145, -0.9123, -1.2732, -0.1789, -0.3561, -0.0748,  0.1283,
        -0.4630,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0693, -1.1410,  0.0219, -0.5011, -0.1172, -0.8105,  0.1087, -0.2972,
        -0.3656,  0.1595, -0.3322, -0.2457, -0.1277, -0.0477, -0.0304, -0.7159,
        -0.0954, -0.7197, -0.0307,  0.0261,  0.0873,  0.0162,  0.0530,  0.0786,
        -0.2653, -0.0234, -0.0687,  0.0479, -0.0548,  0.0025,  0.0591, -0.2486,
         0.1023, -0.1898,  0.0432, -0.0084,  0.0797, -0.0026,  0.2319,  0.0943,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1140, -0.4754, -0.6094, -0.1387, -0.3810, -0.0995,  0.0315, -0.0425,
        -0.2809, -0.2086, -0.0172,  0.0320,  0.0021, -0.0425,  0.0298, -0.0074,
         0.0400, -0.0628, -0.1488,  0.0430, -0.0468, -0.0158,  0.0405, -0.1067,
        -0.0149, -0.1038, -0.2124, -0.0334, -0.2689, -0.4448,  0.0074, -0.0275,
        -0.0050,  0.0241,  0.0247,  0.0214, -0.1469, -0.3079,  0.0012,  0.0419,
         0.0258,  0.0149,  0.0364,  0.0309,  0.0219,  0.0458, -0.0410,  0.0136,
        -0.0070, -0.1108,  0.0220, -0.0292,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2107, -1.6964, -0.2364, -0.3644, -0.1065, -0.1928, -0.5541, -0.2443,
        -0.4774, -0.0103, -0.0863, -0.4622, -0.3787, -0.0743, -0.2795, -0.3583,
        -0.2913, -0.0422, -0.2661,  0.0877, -0.1390,  0.0254,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3334, -4.1958, -0.0394, -0.1267,  0.0285, -0.0421,  0.1827, -0.5002,
        -0.1901, -0.1015,  0.0708, -0.2462,  0.1277, -0.1574, -0.0600, -0.4788,
        -0.2003,  0.1932,  0.0781,  0.2171,  0.1267,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5688e-02, -9.9089e-01, -1.6755e-01, -2.2240e-01, -1.2646e-01,
        -8.4780e-02, -1.9283e-01,  2.2088e-02, -1.3570e-01, -4.5401e-02,
        -3.1512e-03, -2.7415e-02,  3.3322e-03,  8.3768e-03,  1.6823e-04,
        -8.4623e-03,  1.7476e-02, -6.6272e-03, -9.0697e-02,  3.3816e-02,
         1.5137e-02,  6.1074e-02, -1.1025e-02, -1.9932e-02, -1.5718e-02,
        -2.3462e-02, -1.1851e-01, -1.3735e-01, -1.0689e-02, -2.2569e-01,
        -3.4511e-01, -2.5394e-01, -1.3351e-02, -1.2540e-01, -7.2740e-02,
         1.4627e-02,  6.1623e-02, -1.0026e-02,  7.9692e-02,  2.2929e-02,
        -2.9562e-01, -2.4001e-02, -8.8765e-03,  5.6916e-02, -1.3083e-01,
         1.0975e-02, -2.1806e-01, -1.3783e-01, -6.3456e-03,  5.3838e-02,
        -1.1487e-01, -1.9709e-02, -2.1158e-02,  1.2289e-01, -2.9255e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0662, -0.0647,  0.0962, -0.0771, -0.0520, -0.1094, -0.3174, -0.0028,
         0.0174,  0.0162,  0.0458,  0.0164,  0.0383, -0.0833, -0.2655, -0.0392,
         0.0093, -0.0825, -0.0483, -0.3062, -0.0935, -0.0578, -0.1611,  0.0324,
         0.0088,  0.0535, -0.0212, -0.3341, -0.0979,  0.0174, -0.0212,  0.0210,
        -0.0440, -0.3576,  0.0013, -0.0938,  0.0233, -0.1969, -0.1825, -0.1513,
        -0.0407, -0.0360, -0.0935,  0.0817,  0.0133,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1389, -1.5877, -0.5546, -0.6468, -0.0344, -0.1228, -0.2188, -0.0934,
        -0.6629, -0.0049,  0.0656, -0.3444, -0.1274, -0.5410, -0.3178,  0.0123,
        -0.1124, -0.1194, -0.1807, -0.1948, -0.3154,  0.0756, -0.0939,  0.3738,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0908,  0.0460,  0.0169,  0.0623, -0.0397, -0.0395, -0.2555, -0.2320,
         0.0550,  0.0225,  0.0213,  0.0067, -0.0363, -0.3532, -0.0100,  0.1719,
        -0.2744, -0.0802, -0.1431, -0.3197, -0.0635, -0.0458, -0.2640, -0.3867,
         0.0187, -0.2312, -0.2427,  0.0402, -0.1148, -0.2947, -0.0293, -0.0837,
        -0.1550,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2059, -2.7377, -0.2474, -1.0231, -0.1793, -0.0981, -0.0630,  0.0056,
        -1.0440, -0.1132,  0.1068, -0.0636,  0.2664, -0.5818, -0.0162, -0.3510,
        -0.3677,  0.0089, -0.1132,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0788, -0.1149,  0.0889, -0.1725, -0.3987, -0.0459, -0.2000, -0.2947,
        -0.6725, -0.0291, -0.0383,  0.0462, -0.0849, -0.0835, -0.0084, -0.0097,
         0.0699, -0.4167, -0.2695,  0.0352, -0.2990, -0.3781, -0.0429,  0.0589,
        -0.0063,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3010, -1.5297, -0.6946, -0.1365,  0.1484, -0.7290, -1.0068,  0.5514,
        -0.1574, -0.1063,  0.0703,  0.0108, -0.3569,  0.0313, -0.0502, -0.2108,
        -0.5646,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.6273e-01,  1.2785e-01, -6.6077e-02, -1.2064e-03, -5.9075e-02,
        -3.9320e-02,  3.8417e-02, -4.3578e-02,  8.0598e-02, -6.5160e-01,
        -1.4347e-01, -7.1145e-02,  1.8203e-01, -3.2055e-02,  5.3749e-02,
         2.7179e-02, -7.7546e-01, -8.3658e-01, -1.3624e-01, -4.3796e-01,
        -2.5512e-01, -7.4743e-01, -7.7406e-01,  3.2331e-03, -1.5627e-01,
         2.6034e-04,  5.0566e-02,  1.1532e-01, -1.1659e-01, -1.3020e-02,
        -1.7280e-01,  3.4652e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0091, -0.3472, -0.7556, -0.0194, -0.0452,  0.0442, -0.2121, -0.5691,
        -0.0080,  0.0461,  0.0507, -0.0110, -0.1352,  0.0263, -0.0694,  0.0123,
        -0.1164, -0.0370, -0.2477, -0.0363, -0.2975, -0.0221, -0.2364, -0.4760,
        -0.0901, -0.1746, -0.0354, -0.0661,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5239,  0.0879, -0.0148, -0.0450,  0.0699,  0.9125,  1.4051,  0.1563,
         0.2388,  0.3993,  0.0170,  0.5816, -0.0729, -0.0961,  0.0660,  0.6627,
         0.0467,  0.5674,  0.1113, -0.0855, -0.0884,  0.0562,  0.3324,  0.1271,
         0.4855,  0.0766, -0.0921,  0.1128, -0.1763,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2690e-01, -9.3871e-01, -3.6586e-02, -4.0452e-01, -1.2707e+00,
        -5.1049e-02, -7.2311e-04, -5.9787e-02,  5.6476e-04,  1.1376e-01,
        -5.0678e-01,  1.7036e-03, -4.1572e-01, -1.0468e+00,  4.4486e-01,
        -2.6023e-01,  4.3995e-02, -8.9861e-01, -1.4390e-01, -2.3801e-01,
         5.8165e-02, -3.5803e-02, -2.0348e-01, -1.1840e-01, -2.0633e-01,
        -3.3557e-01, -3.9545e-01, -6.8072e-02,  1.6328e-02, -1.4040e-02,
         1.2864e-01, -1.0282e+00,  1.7210e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1311e-01, -1.8983e+00, -1.3909e-01, -4.9627e-01, -1.7766e-02,
        -4.1002e-01,  2.2414e-01, -1.8945e-02, -1.4295e-02, -3.8565e-01,
        -3.0636e-01, -1.6138e-01,  8.7013e-02,  1.6557e-02, -7.4379e-03,
         7.9521e-02, -3.8426e-01,  4.4566e-02, -6.8897e-01, -1.6319e-02,
        -5.0763e-01, -1.4323e-03, -9.3121e-02, -1.9890e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1868, -0.8472, -1.1482,  0.0861, -0.3768,  0.0274, -0.1789, -0.3112,
         0.1367, -0.2645, -0.0321, -0.1971, -0.2363, -0.0705, -0.0649,  0.0779,
         0.1015, -0.3328,  0.0089, -0.7507, -0.3337, -0.0073,  0.0332,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1697, -0.9284,  0.0973, -0.0369, -0.1787, -0.4039,  0.1046,  0.0606,
         0.0969, -0.0511, -0.0267,  0.0179, -0.0715,  0.0422, -0.0033, -0.0334,
        -0.3757, -0.2080, -0.2370, -0.0825, -0.0349, -0.1118, -0.0102, -0.1028,
        -0.3085, -0.1227, -0.0398,  0.0348, -0.0646, -0.4091, -0.3927, -0.1369,
        -0.1728, -0.1052, -0.2423, -0.3319, -0.0142, -0.1378,  0.1111,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0029, -0.1200, -0.0055, -0.1529, -0.4673, -0.0201,  0.0436,  0.0290,
        -0.1459,  0.0648,  0.0142,  0.0191,  0.0318,  0.0065,  0.0931, -0.1579,
         0.0350, -0.0136,  0.0227, -0.1007, -0.3457, -0.0511,  0.0336, -0.2615,
        -0.4898, -0.4690, -0.0949, -0.1586, -0.0259, -0.3365, -0.1074, -0.0973,
        -0.1445, -0.0774, -0.0030, -0.0217, -0.0267, -0.4429,  0.0795, -0.0175,
        -0.0610,  0.1041, -0.0340,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7933e-01, -1.4978e+00, -5.6674e-02,  3.5997e-02, -1.0946e-01,
         1.1132e-01, -9.5641e-02,  1.9719e-02, -4.2735e-01, -8.1769e-02,
         4.9208e-02,  4.3219e-02, -1.8224e-01, -9.1755e-04,  1.6693e-02,
        -5.8440e-02, -2.1506e-01, -4.2435e-01, -2.8299e-02, -1.3393e-02,
        -8.3452e-03,  6.0689e-02, -1.7423e-02,  1.3292e-02,  4.0963e-02,
         3.4765e-02, -2.8895e-02,  2.3699e-02,  7.0312e-02,  7.5665e-02,
         3.0240e-02, -2.6838e-01, -4.5951e-01, -9.6424e-02, -1.5341e-02,
         1.2292e-03, -1.9762e-01, -1.5019e-01, -2.8731e-01, -4.0259e-02,
        -2.8503e-01, -3.5163e-01, -7.4776e-02,  1.3709e-02, -1.7872e-02,
         2.0264e-01, -7.4977e-03,  2.4079e-02,  1.5837e-03, -3.4289e-02,
         1.1411e-01,  4.8278e-02, -1.5513e-01, -3.1263e-02, -1.2135e-01,
         8.6100e-02,  5.0208e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0057,  4.1805, -0.1931,  0.1878,  0.4437,  0.1197,  1.2064,  1.1037,
         0.0971,  0.4474,  1.0272,  0.3822, -0.2443, -0.1009,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0898, -1.6838, -0.3486,  0.0526,  0.0166, -0.1669,  0.0411, -0.0152,
        -0.3115,  0.0222, -0.3386, -0.8340,  0.1304, -0.1914, -0.3796, -0.0753,
        -0.2280, -0.1465,  0.1140, -0.3606, -0.6648, -0.1324, -0.2333,  0.0329,
         0.0976,  0.0109,  0.0035,  0.0665, -0.0217,  0.0699,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4233, -2.3388, -0.2509, -1.4907, -0.8150,  0.2032, -0.5498, -0.0769,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0794, -1.3281, -0.1900, -0.1832, -0.3712, -0.7252, -0.0228,  0.1144,
         0.0216, -0.0931,  0.0542, -0.0230, -0.2167, -0.5647, -0.0372, -0.0071,
         0.0647, -0.0394,  0.0016, -0.0137,  0.1405, -0.0461,  0.0235, -0.0056,
        -0.4810, -0.1717, -0.2234, -0.1338, -0.4505, -0.3038, -0.0189,  0.2389,
        -0.0422,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1418e-01, -1.5811e+00, -3.6799e-01, -1.3567e-01, -9.0121e-03,
        -9.0719e-02, -3.1736e-02, -2.8029e-04, -1.4290e-03, -1.0350e-01,
        -4.0924e-02, -8.8813e-02, -2.1248e-01,  5.6448e-02,  9.9192e-02,
         2.9413e-02, -8.8528e-04,  6.8993e-02, -1.0065e-01,  3.7210e-02,
         4.4165e-02,  8.9203e-02, -1.8846e-02,  5.0978e-02,  4.8312e-02,
        -7.3781e-02, -7.4638e-02, -4.2837e-01,  2.4757e-02, -4.8470e-02,
        -9.9053e-03, -6.9497e-02, -2.1486e-01, -4.4760e-01, -1.0151e-02,
        -2.7529e-02, -1.9976e-01, -7.7363e-03,  1.1681e-02,  3.9894e-02,
         7.9893e-04, -4.2868e-03, -2.2422e-02, -2.2244e-01, -2.7199e-02,
        -2.4634e-02,  1.5933e-02,  2.2683e-02, -4.6054e-03, -7.2268e-04,
         2.9956e-02, -1.7914e-01,  3.8716e-03, -1.8550e-01, -4.9693e-02,
         4.5322e-02,  1.1715e-02,  3.9136e-02, -5.6750e-03, -1.1241e-01,
        -3.0885e-02, -1.2376e-01, -3.7071e-01,  8.7025e-04,  1.9221e-02,
         1.5859e-02, -2.8599e-02,  7.4014e-02,  9.1239e-02,  3.4824e-02,
         2.3633e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4223, -0.2291, -0.1710,  0.0122, -0.2902,  0.0127, -0.6723, -0.0653,
         0.0170,  0.0628, -0.0042,  0.0189,  0.0606,  0.0780, -0.0419,  0.0073,
        -0.0119,  0.0802, -0.1098, -0.7836, -0.1734,  0.1200,  0.0187, -0.2870,
         0.0507, -0.2887, -0.0665,  0.0601,  0.0382,  0.0378, -0.0195, -0.2575,
        -0.4814, -0.5641, -0.0637, -0.0582, -0.3827, -0.1210,  0.0152, -0.0810,
         0.0133, -0.0173,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6632e-01, -1.4597e+00,  1.8059e-02, -1.5492e-01,  6.4312e-03,
        -5.2489e-02,  7.3427e-02,  1.2435e-01, -3.5633e-02, -2.4143e-01,
        -6.1871e-03, -2.2527e-01, -3.9267e-01,  1.2084e-02, -9.3806e-02,
        -4.0003e-02, -9.6412e-02, -4.0179e-01, -5.5516e-01, -5.6170e-02,
        -4.3857e-02, -1.6906e-01, -3.6396e-02,  4.0337e-02, -1.6240e-02,
        -3.2070e-01, -7.6256e-02, -3.3374e-02, -6.1798e-01, -6.4876e-02,
         3.2143e-02, -5.3942e-03, -4.8984e-02, -9.8732e-02, -9.9428e-02,
        -5.5189e-04, -1.5148e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6183, -3.4680, -0.3900, -0.8733, -0.1480,  0.2481,  0.0570, -0.1998,
        -0.2979, -0.0231, -0.4195,  0.1700, -0.2936, -0.1621,  0.1826,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0514, -0.5003,  0.0678, -0.0195, -0.8742, -1.0941, -0.0818, -0.6037,
        -1.2984, -0.1149,  0.1421,  0.2930, -0.2012,  0.0133,  0.1484, -0.0458,
        -0.1412, -0.0876,  0.3067,  0.0144,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1198, -0.0273,  0.0127, -0.0267, -0.5734, -0.0774,  0.0641, -0.0410,
         0.0768,  0.0333, -0.2500, -0.2573, -0.1327, -0.0430, -0.1974, -0.1663,
         0.0191, -0.1025,  0.0647, -0.1425, -0.0429, -0.0019, -0.0335,  0.0084,
         0.0640, -0.3065, -0.4233, -0.0409,  0.0304, -0.1232, -0.3078, -0.0131,
         0.0043,  0.0020, -0.1608, -0.2799,  0.0353, -0.1440, -0.0178, -0.0282,
        -0.0309,  0.0625, -0.0160,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1433, -0.0406, -0.1648,  0.0543, -0.2764, -0.0421, -0.1404, -0.6131,
        -0.2258,  0.0093,  0.0220,  0.1114,  0.0509, -0.6092, -0.7983,  0.0028,
         0.0613,  0.0046, -0.0108, -0.0189, -0.3487, -0.0664,  0.0322, -0.0109,
        -0.0168, -0.0505, -0.1360, -0.0566, -0.3984, -0.0702, -0.0126, -0.3575,
        -0.0728, -0.3242, -0.5061, -0.1455,  0.1085,  0.1799,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2283, -1.8137, -0.5130, -0.6778, -0.1187, -0.4196, -0.0874,  0.0122,
        -0.2723, -0.5950, -0.0427, -0.1525, -0.3862, -0.0991, -0.0390, -0.0045,
        -0.1066, -0.3298,  0.0154,  0.1081,  0.2008,  0.0354,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3455e-01, -1.0674e+00, -1.3058e-01,  2.0664e-02,  1.8557e-01,
        -3.9589e-02, -8.5263e-02,  4.1327e-02, -1.3431e-01, -5.0904e-01,
        -6.8488e-02,  6.6471e-02, -3.6674e-02,  3.9106e-02, -2.3106e-01,
        -4.8362e-01, -7.3730e-01, -1.1437e-01, -8.8613e-02, -3.5316e-01,
         5.1594e-02, -1.1464e-03, -1.5307e-01, -2.8289e-02, -7.6340e-02,
        -2.4637e-04,  8.8694e-03,  6.4853e-02, -5.8548e-02, -2.9178e-01,
        -1.4812e-02,  2.9819e-02,  8.7864e-02, -1.1520e-01, -1.6314e-01,
        -4.2422e-01,  8.8466e-02, -2.0932e-01,  2.2567e-01, -1.3353e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1478, -0.1395, -0.0042,  0.0210, -0.0199, -0.2349,  0.0691,  0.0852,
        -0.3251, -0.0358, -0.0013, -0.0234, -0.1699, -1.3178, -0.7234, -0.0326,
         0.0604, -0.0362, -0.4847, -0.0276, -0.0219,  0.0624,  0.1039, -0.1931,
        -0.0504, -0.3444, -0.0896, -0.4767, -0.4694, -0.0648,  0.1452,  0.0257,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2392, -2.2275, -0.1304, -0.0531, -0.1348,  0.0414, -0.0439, -0.6145,
        -1.0813, -0.1275,  0.2192, -0.2806, -0.0986, -0.0170,  0.0672, -0.0355,
        -0.5109, -0.0989,  0.0124,  0.0079, -0.0199,  0.0254, -0.0392, -0.7530,
        -0.0659,  0.1300, -0.3724, -0.1374,  0.0512, -0.1626, -0.0319, -0.0630,
        -0.0794,  0.1427,  0.0035,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.0645, -0.3470, -1.6126, -0.8306,  0.0968, -0.2865, -1.0448, -0.9175,
         0.0340, -0.6412,  0.0652, -0.2685, -0.0543, -0.2636, -0.1476, -0.0067,
        -0.1190, -0.0176,  0.0312, -0.0529,  0.2056,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0216, -1.8667, -0.0815, -0.3890, -0.5148, -0.2421, -0.0422, -0.1083,
        -0.2518,  0.0388, -0.0405,  0.0203,  0.0458,  0.0347, -0.3253,  0.0376,
        -0.2445,  0.0501, -0.0042,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2190, -0.6517, -0.5065, -0.1906, -0.2151,  0.0368,  0.0809,  0.0730,
        -0.0186, -0.7939, -0.9004, -0.1374, -0.2990, -0.7051, -0.0662, -0.4858,
         0.0242, -0.2711, -0.0377, -0.1720,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1264, -1.9901,  0.0716, -0.3622, -0.0469, -0.0875, -0.0369, -0.3504,
        -0.0705, -0.1191, -0.1056, -0.0660,  0.0063, -0.2272, -0.1043, -0.1059,
        -0.3728, -0.0551, -0.0825, -0.2689,  0.0742, -0.2552, -0.1121,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0101, -1.1407, -0.3823, -0.0151, -0.0903, -0.3437, -0.0957, -0.1073,
        -0.3985, -0.6729, -0.1358, -0.3868, -0.0818, -0.2979,  0.0816,  0.0523,
         0.0348,  0.0019, -0.1704, -0.0412, -0.2131,  0.0413,  0.1581, -0.1594,
         0.3780,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0365e-03, -3.2359e+00, -2.6521e-01, -5.4903e-01, -1.4222e-01,
        -9.0156e-02, -7.4363e-02, -2.6330e-01,  4.1953e-01, -2.8595e-01,
        -5.3738e-01,  1.0739e-02,  5.8211e-03,  5.1041e-02,  1.3664e-01,
        -1.5721e-01,  7.4861e-04,  1.8121e-02,  8.8846e-02, -1.2579e-01,
        -5.3815e-01, -6.8490e-01, -1.7447e-01, -4.1595e-01, -1.6084e-01,
        -7.7912e-02,  1.9684e-02, -6.4937e-02, -1.0123e-01, -1.9352e-01,
        -1.6747e-01,  5.9279e-02, -1.8178e-01,  1.4081e-02, -2.7040e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0588, -0.0912, -0.1093, -0.6716, -0.1101, -0.1785, -0.4946, -0.1364,
        -0.8623, -0.0142, -0.1547, -0.0991, -0.0050, -0.2932, -0.6889, -0.8699,
        -0.0340, -0.8204,  0.3392,  0.1117, -0.1911,  0.0816,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0860, -0.0674,  0.2838, -0.0248,  0.0093,  0.1845, -0.0433, -0.0047,
        -0.1388,  1.9186,  0.1533,  0.0983,  0.0162,  0.0541, -0.2290,  0.5255,
         0.0109,  0.0371,  0.0577,  0.0404, -0.0574,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0820, -4.0891, -0.1364, -0.4101, -0.2083,  0.0196, -0.9212, -0.9942,
        -0.0900,  0.2119,  0.1385,  0.2398, -0.0660, -0.5086, -0.0897, -0.3795,
         0.4284,  0.0746,  0.1548,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0872, -0.0736, -0.4202, -0.0082, -0.0281, -0.4029,  0.0398, -0.0249,
         0.0455, -0.0219, -0.0376, -0.1075, -0.0193, -0.0877,  0.0590,  0.0044,
        -0.2658, -0.3990,  0.0041, -0.1539, -0.0673, -0.1578, -0.1449,  0.0088,
        -0.2354,  0.0497, -0.0154, -0.1340, -0.0094, -0.4786, -0.0739, -0.1616,
        -0.1893, -0.5790,  0.0239, -0.0734,  0.0343,  0.0164,  0.0195, -0.2106,
         0.0557,  0.0450,  0.0094, -0.0423,  0.0323,  0.1069, -0.0522],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4225e-02, -1.7280e+00, -1.1161e+00, -1.2038e-01,  1.3536e-01,
        -7.7062e-02, -5.9936e-02,  4.0760e-02, -4.5751e-01,  7.4671e-02,
         6.9663e-02, -1.5136e-03,  7.1333e-02, -2.4806e-01,  9.5526e-02,
        -5.3545e-02, -8.6766e-01,  2.3620e-03,  2.1561e-02, -1.1946e-01,
        -1.6241e-01, -1.7110e-01, -2.1619e-01, -7.6984e-01,  1.0841e-01,
         6.4663e-02, -7.2132e-02, -4.4204e-02,  9.4520e-02,  1.0384e-01,
         1.5760e-01,  1.0241e-01, -1.7855e-02, -2.4282e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7041e-03, -3.0380e+00, -1.5352e-01,  1.7684e-01, -1.0432e-01,
        -2.9393e-01, -2.3773e-02,  5.3123e-02,  1.9831e-02, -1.5697e-01,
        -1.1906e-01, -6.1073e-02, -3.9794e-01, -7.6167e-01,  2.3841e-02,
        -2.5990e-03,  4.5927e-02,  3.2348e-02,  1.0861e-01, -5.5740e-01,
        -3.7348e-02, -5.0007e-02, -4.6891e-02,  4.5539e-02, -1.1308e-01,
         6.4320e-02, -6.7788e-02, -1.7062e-01, -4.4995e-02, -4.0444e-01,
        -8.7768e-02, -5.1725e-02,  1.3858e-04,  9.7299e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.9946, -3.4560,  0.0553,  0.2688, -0.1662, -0.7304,  0.1902, -0.2279,
         0.0250, -0.8483, -0.3369, -0.1095, -0.2671, -0.6134, -0.2335, -0.1108,
         0.0800, -0.1736,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3725,  0.0801,  0.0925, -0.1095,  0.0279,  0.0250,  0.0083, -0.0143,
        -0.8547, -1.5261,  0.0562, -0.0713,  0.0281,  0.0439,  0.0984,  0.0945,
         0.0292, -0.1164,  0.3436, -1.2895, -0.0645,  0.1709, -0.1578,  0.0165,
         0.0522, -0.1293,  0.1631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.8171e-02,  3.7566e-02, -3.3782e-02,  7.2868e-04, -1.6091e-01,
        -1.3312e-01, -1.1698e-02, -3.9427e-01, -9.9883e-01,  8.2205e-03,
         1.3388e-01, -7.2252e-02, -1.0705e+00, -1.7423e-01,  5.5599e-02,
        -7.7951e-03, -1.2692e-01, -7.7092e-02, -8.8644e-03, -1.1075e+00,
        -2.9253e-02,  6.8577e-02, -2.2878e-01,  6.7949e-03, -2.8579e-01,
        -1.3319e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0543, -2.0391,  0.0199, -0.5991, -0.8291, -0.1662, -0.1013, -0.1624,
        -0.5041, -0.3389, -0.4646, -0.2214,  0.0941, -0.1028, -0.2135, -0.5586,
        -0.3242, -0.3217, -0.0128, -0.0142,  0.6508,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0734, -1.9160, -0.8823, -1.2001, -0.1246, -0.0673, -0.6239, -0.0332,
        -0.1706, -0.3176, -0.0743, -0.4375,  0.1404,  0.0647, -0.3385,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7014,  0.1035,  0.0214,  0.0633,  0.3671,  0.2509,  0.0851,  0.0973,
         0.1161,  0.0745, -0.3934, -0.1035,  1.1754,  0.4488,  0.5592, -0.0541,
        -0.1050, -0.1723,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0464, -2.3158, -0.8881, -0.7925, -0.0627, -0.1365,  0.0632, -0.1361,
         0.0782, -0.2534, -0.6741, -0.4176,  0.3664, -0.2394, -0.0646, -0.1534,
         0.1655, -0.1001,  0.3350,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4239e-01, -1.0557e+00, -7.3427e-01,  1.1335e-01, -1.2078e-01,
        -1.9479e-01,  1.2545e-01,  7.8012e-02, -8.1250e-02, -1.5574e-02,
        -1.7353e-02, -3.0653e-02, -1.1124e-01,  4.1671e-02, -1.1933e-02,
         5.0137e-02, -3.6651e-01, -2.5053e-02,  3.3352e-02,  5.6904e-02,
        -4.2667e-01,  2.5901e-02, -2.2646e-01, -2.6615e-02, -3.0633e-01,
        -1.9635e-02, -1.4939e-01,  1.1302e-01,  2.1392e-02, -2.3930e-01,
        -6.6643e-05,  1.7341e-01, -2.6198e-01, -1.4195e-02, -2.0336e-01,
        -1.6477e-01, -3.0320e-01,  1.7855e-03,  2.2153e-02, -5.4969e-02,
         4.8418e-02, -2.2146e-02,  7.5777e-02, -2.0997e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3948e-01, -9.9910e-01,  7.6980e-03, -6.3521e-01, -8.0268e-01,
         2.8288e-02, -4.1622e-01, -4.1169e-02, -4.2124e-01,  1.2212e-02,
        -1.3444e-01,  1.9900e-01,  4.2269e-02, -1.5093e-02, -3.7729e-02,
         3.3699e-02,  2.0841e-01, -1.9009e-01, -1.7853e-02, -1.3913e-01,
        -2.6753e-01,  1.7187e-02, -2.4039e-02,  6.2211e-02,  9.3269e-02,
         5.7616e-03, -5.8053e-02, -5.1964e-01, -1.8459e-02,  9.3105e-02,
         9.9593e-04, -1.7036e-01,  1.6520e-02,  2.8434e-03, -6.5390e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6360e-01, -2.8859e+00, -1.1915e-01, -4.2278e-01, -1.1499e-01,
         1.1888e-01, -6.4919e-01, -3.7530e-01, -1.4845e-01, -2.8743e-02,
        -4.5064e-02, -3.3042e-01, -1.0374e+00, -1.1385e-03, -1.2303e-01,
        -6.1363e-01, -2.6272e-02, -1.7013e-02,  7.4646e-02,  1.6390e-02,
         7.4901e-02,  1.0303e-01, -7.7071e-03,  2.7752e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5353, -1.8504, -0.0544, -0.3219, -0.0687, -0.0528,  0.0942, -0.2192,
        -0.6388, -0.0883, -0.1957,  0.0607, -0.0056, -0.3189, -0.3547, -0.0134,
         0.1768,  0.0194, -0.0397, -0.0475,  0.0181, -0.2641,  0.0896, -0.3164,
         0.0405, -0.3927, -0.3614, -0.1402, -0.0873, -0.1283, -0.2007, -0.0701,
         0.0598, -0.0604,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2514e-02,  7.0732e-02, -4.9616e-02, -5.2369e-02,  9.0056e-02,
        -1.6900e-02, -8.6030e-01, -2.2242e-01,  5.5834e-02,  1.1656e-03,
         1.0630e-01,  1.0334e-01, -5.8750e-01, -5.0772e-03, -5.4367e-01,
        -6.5688e-01, -1.2690e-01,  1.0865e-01,  4.2098e-02,  3.6580e-02,
         5.6818e-02, -7.8048e-02, -1.4549e-02, -3.6997e-01, -9.6841e-02,
         6.5110e-04, -1.2722e-01, -1.5535e-01,  6.5745e-02, -4.6415e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.6451,  0.1645, -0.0060, -1.6990, -0.3730, -0.2020, -0.1456, -0.1100,
        -0.9703, -0.8142, -0.0692,  0.0695,  0.3946,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1165,  0.1529, -0.1795,  0.2252,  0.0064, -0.2594, -0.3785, -1.1907,
        -1.4440,  0.0931,  0.1156, -0.3680,  0.0680, -0.2591, -0.3318,  0.1243,
         0.2436, -0.1303,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3840, -0.8686, -1.5239, -0.1236, -1.0532,  0.0754, -0.2756, -0.0487,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2026, -1.6515, -0.1215, -0.1789, -0.3262, -0.1875, -0.0782,  0.0069,
         0.0479, -0.0970, -0.0402,  0.1525, -0.1500, -0.0704, -0.0192, -0.0748,
         0.0293, -0.0098,  0.0400, -0.0131,  0.0789,  0.0431, -0.3504, -0.3336,
        -0.0439, -0.1820, -0.0133,  0.0097, -0.4514, -0.0938,  0.1098, -0.2718,
        -0.0775, -0.5180,  0.0036, -0.1460, -0.2628, -0.0571,  0.0521,  0.0168,
        -0.1049, -0.0558, -0.1094,  0.0661,  0.0825,  0.3340,  0.0070],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1820, -2.0313,  0.6385, -1.7419, -0.2079, -0.1246, -0.5161, -0.2527,
        -0.0812, -0.6464,  0.2630,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0458, -2.2930, -0.4100, -0.2827, -0.3557, -0.0983, -0.3393, -0.9655,
         0.0870, -0.4454,  0.0866, -0.0518, -0.5880, -0.1718,  1.3080,  0.0610,
        -0.0166, -0.1719,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4488, -1.0860, -2.1497, -0.1252,  0.1500, -0.0149, -1.1985,  0.0732,
        -0.5770, -0.3864,  0.0273, -0.0110, -0.1554,  0.0387,  0.0161,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7581, -2.5883, -0.3232,  0.0260,  0.2293, -0.5778, -0.6988, -0.2227,
        -0.2018, -0.7266, -0.8209, -0.0497, -0.0599,  0.0494,  0.2485,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0326, -0.0523, -0.0396, -0.2739,  0.1054, -0.0484,  0.0127, -0.1146,
         0.6565,  0.0690, -0.1354,  0.7512,  1.0749,  0.2199, -0.0616,  0.3603,
         0.3570, -0.1708,  0.0522, -0.0817,  1.3173,  1.4780, -0.2947,  0.2089,
         0.0019, -0.1536,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9064e-01,  1.0420e-01,  3.6901e-01,  2.2582e-01, -1.3244e-01,
         1.0540e-01,  1.2955e-01, -1.2948e+00, -2.7227e+00, -1.9165e-01,
         2.4235e-01, -9.0982e-02,  2.0356e-01,  3.0259e-01,  3.0857e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2071, -1.6062,  0.0613, -0.6449, -0.6022, -0.0630, -0.4869, -0.0100,
        -0.0058, -0.7508, -0.0498, -0.0885, -0.2694, -0.0642, -0.0648,  0.0330,
        -0.1866, -0.0715, -0.5590, -0.1452, -0.1346,  0.0307,  0.1090,  0.0141,
         0.0847,  0.0699,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2983, -0.0319,  0.0852,  0.1004, -0.0431, -0.1336,  0.0780,  0.2125,
        -0.0857, -0.2671, -0.4402, -0.1236, -0.0909,  0.0956, -0.6233, -0.6285,
        -0.0464, -0.2146, -0.0328, -0.3338, -0.0929, -0.1304, -0.0824, -0.1710,
         0.1042, -0.0950,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-3.8164e-01, -2.5120e+00,  1.7744e-01,  4.0085e-02, -1.1914e-01,
        -1.0501e-01, -2.7873e-01, -5.6763e-01, -5.8863e-02, -3.2505e-01,
        -1.2692e-01, -1.5578e-01, -5.6922e-01,  5.1091e-02, -2.2305e-01,
         9.8236e-03,  6.8789e-02, -2.9525e-02, -1.9490e-01,  1.3097e-02,
        -6.4719e-02, -1.7105e-01,  1.5179e-03, -1.9945e-01, -5.1585e-01,
         6.6209e-02, -3.6575e-01, -7.4914e-02,  3.8126e-02,  4.4904e-02,
        -2.3630e-02, -2.4404e-01,  1.2082e-02, -1.6023e-02, -2.9736e-02,
         4.7435e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1977, -1.0100, -0.3540, -0.0613, -0.3415,  0.0088, -0.4308, -0.5253,
         0.0360,  0.1397, -0.2039, -0.0011, -0.0656,  0.0601, -0.0191, -0.0942,
         0.0041, -0.0207,  0.0764, -0.0646,  0.0055, -0.2521, -0.1030, -0.0615,
        -0.3099, -0.2684, -0.0470, -0.4460, -0.4253,  0.0491, -0.1247, -0.2828,
        -0.3738,  0.0813,  0.0822,  0.0380,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4385, -0.2149,  0.1076, -0.0871, -0.0502, -0.4763, -0.0202,  0.1619,
        -0.1163,  0.0131, -0.2807, -0.8872, -1.3174, -0.2415, -0.1383, -0.0811,
         0.0784, -0.2145,  0.0525, -0.3086,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9678e-02, -1.0341e+00, -1.7114e-01, -2.0806e-01,  6.1432e-02,
        -1.7943e-02,  3.4922e-02, -2.1816e-01, -2.9829e-02, -1.6067e-01,
         1.2539e-03,  1.3352e-02, -8.5286e-03,  2.2127e-02, -4.0759e-01,
        -3.4897e-01, -2.5644e-02, -3.2889e-01, -7.8619e-02, -3.8950e-02,
        -6.5656e-03, -2.5083e-02,  4.9087e-02,  1.6720e-01,  9.4019e-02,
         3.1715e-02, -1.2057e-02, -2.6545e-02,  5.0896e-02, -1.6022e-01,
        -3.7019e-01, -9.5887e-02, -7.3464e-02, -3.9528e-01,  4.6830e-02,
        -1.8526e-02, -2.1810e-04,  7.5534e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2472e-01, -1.1375e+00, -7.7722e-01, -2.7948e-01, -9.7366e-03,
        -5.6403e-02, -4.1069e-02,  9.3134e-02,  1.4075e-01,  4.8131e-02,
        -2.9767e-01, -3.8051e-01,  6.3571e-03,  6.1127e-02, -3.8475e-02,
        -2.5439e-01,  3.2995e-02, -5.9123e-01, -4.6056e-02,  5.0162e-03,
        -1.9582e-03,  5.5219e-02,  1.1886e-01, -3.6853e-02,  3.6379e-02,
        -2.9704e-02, -1.5554e-01, -2.1208e-01, -5.3952e-01,  5.9899e-02,
        -5.7741e-02, -7.0924e-02, -6.7549e-03,  5.7408e-02, -1.6462e-01,
        -7.9190e-05,  4.1004e-03,  6.0323e-02, -1.2770e-02,  1.6898e-01,
         4.2020e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2289, -2.5496, -0.1722,  0.0459,  0.0432, -0.7404, -0.0058, -0.0327,
        -0.2541, -0.0189,  0.0419, -0.4864, -0.6018,  0.1553,  0.2301, -0.3608,
        -0.0276, -0.6084, -0.0600,  0.2441,  0.1336, -0.1133,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2783,  0.0257,  0.0155,  0.0386,  0.0995, -0.1084,  0.0739,  0.0211,
        -0.0072,  0.0244, -0.0927, -0.3126, -0.1319, -0.2282, -0.0232, -0.3954,
         0.1537, -0.3453, -0.0367, -0.3680, -0.4774,  0.0745,  0.2122, -0.3054,
         0.0081,  0.0614, -0.0768, -0.2550, -0.4372, -0.1702, -0.2801, -0.1847,
        -0.1016,  0.0186,  0.0640,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1701e-01, -1.3522e-01, -1.3562e+00, -1.3276e-02, -4.4589e-01,
        -7.8172e-02,  4.5272e-02,  3.5265e-02,  2.7256e-02,  1.1397e-03,
        -1.8817e-01,  2.3437e-01,  5.4521e-02,  1.8098e-02,  4.4770e-02,
        -1.7080e-02,  3.4192e-01,  9.5694e-03, -4.3806e-01,  1.9849e-02,
        -3.1958e-01,  4.9219e-02, -1.5834e-01, -3.1837e-01,  3.2539e-02,
         8.1395e-02, -4.3755e-01,  2.2281e-02,  1.4618e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0030, -2.7509, -0.5553, -0.1663, -0.1661, -0.4398,  0.0231, -0.0709,
        -0.1832,  0.1111, -0.2290, -0.2976,  0.1397, -0.0587,  0.0563, -0.3885,
        -0.5045, -0.1143, -0.3167,  0.0309, -0.3484, -0.2459,  0.0155, -0.0149,
         0.2878,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2993, -0.1022,  0.0301,  0.1914, -1.4342,  0.0487,  0.0902, -0.6893,
        -0.9504, -0.1886,  0.3318,  0.3258,  0.0326,  0.1464, -0.5751, -0.8815,
         0.2406, -0.2020,  0.0124,  0.0920, -0.2804,  0.1059, -0.1808,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0076,  0.0690,  0.0564, -0.0588,  0.0751,  0.0633,  0.0463, -0.0219,
        -0.0536,  0.0067, -0.1088, -0.0407, -0.5121, -0.7350,  0.0298, -0.1503,
        -0.0853, -0.2218, -0.0066,  0.0064,  0.0817, -0.0730, -0.0480,  0.0514,
         0.0275, -0.0058, -0.5926, -0.0432, -0.3364,  0.0679, -0.2934, -0.0554,
        -0.0724, -0.0599, -0.3109,  0.1029, -0.1368,  0.0075, -0.0706,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2985,  0.1714,  0.0729, -1.1153, -2.0951, -0.0207, -0.5130, -0.9167,
        -1.3499, -0.3514, -0.2417, -0.3210,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.7995e-01, -5.5849e-01, -1.9744e+00, -1.4130e-01, -1.6115e-01,
        -7.1491e-02, -3.2881e-01,  1.2013e-01, -9.9414e-02, -2.4702e-01,
        -2.7888e-02,  5.2285e-02, -2.9553e-01, -2.0223e-01, -3.4468e-01,
         6.7418e-02, -4.3790e-02, -3.4540e-01, -5.9889e-02, -1.1280e-01,
        -3.2684e-02, -2.2166e-01,  5.4542e-02,  1.1929e-02,  2.5623e-02,
        -4.5918e-02,  1.9085e-01, -6.5490e-03,  3.7503e-02, -4.6744e-02,
         2.2021e-02,  1.0365e-01, -4.1683e-01,  4.8102e-02,  2.7933e-02,
        -8.0252e-04, -5.7294e-01, -5.4991e-02, -5.9976e-03,  3.6300e-02,
         3.3532e-02,  2.8795e-02,  9.6523e-02,  8.4466e-02, -5.2819e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0169, -3.0248, -0.2403, -0.7103,  0.1224, -0.0345,  0.0544, -0.1562,
        -0.7593, -0.0389, -0.2033,  0.2443,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0077, -3.6120, -0.1497, -1.4128, -0.1769,  0.1198, -0.6212,  0.4717,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0845e-01, -1.8798e+00, -5.2658e-02,  8.7415e-02, -3.3875e-01,
         1.0701e-02,  1.2470e-01, -1.0950e-01, -2.5444e-01, -6.5389e-01,
        -6.4591e-02, -1.2901e-01, -5.9591e-02, -2.5141e-03, -7.4955e-03,
        -1.0522e-02,  7.0039e-02, -9.7237e-02,  1.3631e-02, -3.3428e-02,
         4.8570e-02,  1.6716e-02, -1.2422e-01, -2.5262e-02, -6.0080e-02,
        -9.9575e-02, -1.1919e-02, -8.4651e-02, -2.0815e-01, -1.1268e-03,
        -5.0325e-02, -1.1325e-01,  3.7780e-02, -2.2883e-02, -1.2443e-01,
         1.7945e-04,  8.4502e-02, -2.8640e-02, -5.2683e-02,  1.3691e-02,
        -7.1743e-02, -3.0250e-02, -5.8632e-02, -3.6249e-02, -1.1612e-01,
        -4.6052e-02, -5.3937e-01, -6.5075e-02, -6.6303e-01, -8.8948e-01,
        -2.5901e-01, -6.6660e-02, -3.8987e-01,  4.4927e-02, -1.2517e-02,
        -6.8210e-02, -1.1495e-01, -7.0263e-02,  1.9137e-02,  1.8646e-01,
         5.4175e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1152, -0.0199,  0.0118, -0.2879, -0.5032,  0.0017, -0.3181, -0.0178,
        -0.0239,  0.0033, -0.0317, -0.3713, -0.0232, -0.4317, -0.4991,  0.0556,
        -0.0646,  0.0190, -0.2508, -0.1663, -0.2893, -0.3764, -0.1506, -0.0136,
        -0.1298, -0.1329, -0.0185, -0.0615, -0.1168, -0.0651, -0.0931,  0.0091,
        -0.1249,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2187, -0.1712, -1.4069, -0.8076,  0.0488,  0.1802, -0.4694, -0.0653,
         0.0642, -0.1371, -0.0488,  0.0203, -0.4390,  0.0054, -0.0135,  0.2844,
        -0.0807, -0.3688, -1.2421,  0.1310, -0.0772, -0.3077,  0.3042, -0.1797,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0361, -0.6532, -0.6177, -0.3599,  0.0368, -0.5087, -0.0230, -0.3953,
        -0.0327, -0.1304, -0.4306, -0.2131, -0.6086, -0.0354,  0.0400, -0.0245,
         0.0356, -0.0123,  0.2103,  0.1624, -0.2203, -0.0456, -0.0492,  0.0644,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1414, -0.0329, -0.4953, -0.8662, -0.0499, -0.0118, -0.0144, -0.2111,
        -0.0285,  0.1423,  0.0266, -0.0307,  0.0432,  0.0168, -0.2503, -0.4153,
        -0.3384,  0.0053, -0.0287, -0.1201, -0.4049,  0.0330, -0.3152, -0.2677,
         0.0611,  0.0023, -0.3727, -0.0812,  0.0646, -0.1721, -0.0163, -0.1079,
         0.1004,  0.2751,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4749e-02, -5.7284e-02, -4.7052e-04,  3.3632e-02, -4.3694e-01,
        -1.3909e-01, -3.1271e-02,  9.8632e-03, -5.3547e-02, -9.2332e-02,
        -1.6815e-01,  1.6014e-03,  2.0460e-02, -1.1102e+00,  1.2198e-01,
        -4.1525e-01, -1.0199e+00, -7.8088e-02, -4.0432e-01, -1.5191e-02,
        -3.7613e-01, -8.8363e-01,  1.7083e-01, -1.5509e-01, -9.4840e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0159, -0.0544, -0.3757, -0.1444, -0.3133, -0.0665, -1.3224, -0.7105,
        -0.6681, -1.7903, -0.1969, -0.2281, -0.1716, -0.0099, -0.2335, -0.2891,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1014, -1.8054, -0.1740, -0.7986, -0.2684, -0.5594, -1.1757, -0.2595,
         0.2646, -0.0840,  0.0221,  0.0888, -0.1449,  0.0604,  0.0706,  0.0302,
        -0.1522, -0.5368, -0.2442, -0.0687,  0.0647, -0.0486, -0.0690,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2932, -0.4379,  0.4519, -1.8564, -2.4335, -0.0609,  0.3779,  0.1758,
        -0.0385,  0.0174, -0.4810,  0.1039, -0.0106,  0.3192,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.1989, -0.7764, -0.6239, -0.9906, -0.1156,  0.0123, -0.4494,  0.2084,
        -0.2760, -0.2361, -0.6305, -0.9074, -0.0883, -0.0203,  0.0368,  0.1071,
        -0.1350, -0.4083, -0.0363, -0.4391, -0.0647,  0.0445, -0.4421,  0.1138,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1106, -0.4295, -1.1727, -0.1310, -0.5463, -0.0481, -1.3436, -0.1320,
        -0.2366, -0.3572, -0.3712,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0766,  0.0706, -0.0061,  0.1072,  0.1143, -0.3240, -0.7023, -0.0849,
        -0.5118,  0.0081, -0.2603, -0.0046, -0.1929,  0.0951, -0.1950, -0.0016,
        -0.1239, -0.3925,  0.0058, -0.0251, -0.0952,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0099, -0.0198,  0.0238,  0.4427, -0.0268,  0.1684, -0.0084, -2.0836,
        -0.3853, -1.3950, -0.2804, -0.3579,  0.4680, -0.7356,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7002e-01,  1.9486e-01,  1.7874e-01,  1.0958e-01,  1.0660e-01,
         1.3955e-01,  2.6893e-02, -1.0255e-01, -4.7435e-02,  9.5027e-02,
         3.1924e-01, -4.2088e-01,  1.5095e+00, -4.8562e-02,  4.3011e-02,
        -1.0962e-03,  1.9195e-01, -3.1492e-02, -1.2080e-02, -2.2155e-01,
         2.0598e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1441, -3.1215,  0.0927, -1.2098,  0.2508, -0.5994,  0.0263, -0.6884,
        -0.0356, -0.3105, -0.3548, -0.6883, -0.0123,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2750, -1.3395,  0.0418, -0.0104,  0.1139, -0.0478, -0.2571, -0.4589,
        -0.1135,  0.1082,  0.0214, -0.0988, -0.0920, -0.0208,  0.0381, -0.1271,
         0.0363, -0.0318, -0.0329, -0.0801,  0.0450,  0.1757,  0.0740, -0.1887,
        -0.7571,  0.2678, -0.2899, -0.5496, -0.4529, -0.0306, -0.0755,  0.0030,
         0.0478, -0.0491,  0.0071, -0.0070, -0.3222,  0.0176, -0.0216, -0.0997,
        -0.1176,  0.0015,  0.0552, -0.2797, -0.0991], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2091, -0.3281, -0.4758, -0.0049, -0.0517,  0.0496,  0.1150, -0.1977,
        -0.0675, -0.0041, -0.4995, -0.0160, -0.1011,  0.0697, -0.4422, -0.3204,
        -1.0431,  0.0218,  0.0528, -0.0668,  0.0571, -0.5493,  0.1367, -0.4681,
         0.0201, -0.0275, -0.1845, -0.1851, -1.0478, -0.1017, -0.0771, -0.1266,
        -0.2762,  0.0066,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3545,  0.2343,  0.0723, -0.1927, -0.0218, -0.0807,  0.0958,  0.1498,
         0.4456,  0.0517, -0.0801, -0.1189, -0.0951,  0.0189, -0.0052,  0.0651,
        -0.0428,  0.1670,  0.0063, -1.6404, -2.6703,  0.0050, -0.0187, -0.0045,
        -0.3996,  0.1391,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1713e-02, -2.6499e+00, -5.7592e-02, -9.0917e-01,  1.1432e-02,
        -1.1196e-01, -1.2679e-01, -5.1309e-01, -5.4993e-02, -1.3942e-01,
         2.0264e-01, -1.9146e-03, -4.1329e-02,  8.7971e-03, -2.9229e-01,
        -5.7098e-01, -6.5747e-01, -3.8428e-02, -3.0849e-01, -1.3250e-01,
        -1.9529e-01,  5.3305e-02,  1.4537e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0771, -1.3818, -0.1745, -0.1868,  0.0407,  0.0224, -0.0700, -0.6062,
        -0.6258, -0.1055,  0.0946,  0.0210,  0.1353, -0.0743, -1.0744,  0.1519,
        -0.0264, -0.2428, -0.4402,  0.0480,  0.0530, -0.0364,  0.0468,  0.0406,
         0.0866,  0.0070, -0.3296, -0.7420, -0.0169, -0.1100, -0.1116,  0.0790,
        -0.0053,  0.0604,  0.0260,  0.0071, -0.0808,  0.1545, -0.0399,  0.2092,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0073, -0.0881, -0.0571, -0.2287, -0.6699, -0.0899,  0.2624, -0.5089,
        -0.0353, -0.1890, -0.0248, -0.0136, -0.2092, -0.0877, -0.1562, -0.3322,
        -0.3019, -0.0253, -0.0077, -0.2063, -0.0471, -0.2233, -0.1503,  0.0159,
        -0.0686, -0.0538, -0.0558, -0.2330,  0.0010,  0.0235, -0.3066, -0.0196,
        -0.1527,  0.0210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 5.2645e-01,  6.1432e-02, -2.9656e-01, -1.4905e-01, -5.4462e-01,
        -3.7437e-03,  1.3008e-01,  4.7043e-02,  1.5632e-03,  4.6211e-02,
         5.7288e-02, -1.5814e-01,  1.7417e-02, -3.1478e-02, -1.0703e+00,
         1.5470e-01, -4.7168e-05,  3.4484e-02, -3.9059e-01, -7.9670e-02,
         1.4397e-02, -1.2562e-02,  3.2174e-02, -5.1619e-03, -3.5765e-01,
        -7.0287e-01,  4.5199e-02,  1.6027e-01,  5.8457e-02,  6.7070e-02,
         7.8225e-02,  3.7374e-03,  8.4744e-02, -2.1564e-02, -1.7299e-01,
        -6.4642e-02, -7.4140e-02, -2.8897e-01, -3.1597e-02, -5.4436e-02,
        -1.1623e-01, -3.4161e-02, -1.2756e-01, -5.2250e-03,  7.5046e-03,
        -8.1816e-03,  7.3648e-03, -7.4098e-03, -9.9215e-02, -1.0031e-01,
         4.2192e-02, -3.9437e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3005,  0.0773,  0.0728, -0.0049, -0.9761, -0.0995, -0.2162, -0.0214,
        -0.0252, -0.0085,  0.0633,  0.0840, -0.0449, -0.7611, -0.0660, -0.7047,
        -0.8237, -0.0768, -0.3594,  0.0825, -0.1398, -0.0903, -0.2471, -0.5862,
        -0.0426, -0.0553,  0.1940, -0.0815, -0.0565,  0.0987, -0.0064,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2821, -0.8052,  0.0278, -0.0587,  0.0929,  0.1570, -0.0543,  0.1072,
        -0.1062,  0.3212,  0.0319, -0.3811,  0.1356,  0.0963, -0.3007, -0.4055,
         0.0956, -0.4320, -1.2057, -1.1015, -0.4206, -0.3515, -0.1919, -0.2170,
         0.1614,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0636, -0.1942,  0.0891, -0.4053, -0.1105, -1.5433, -2.1639, -0.1282,
        -0.0053,  0.3356,  0.7240, -0.1415,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0157e-02, -2.1849e+00, -2.1243e-01, -7.0675e-01, -7.6266e-01,
        -5.4859e-03,  1.5649e-01, -8.8203e-03,  3.5066e-02,  9.6839e-03,
         3.4790e-02, -1.2376e-01, -4.9812e-01, -6.9434e-01, -1.9234e-01,
        -5.3695e-01, -1.8851e-01, -2.2286e-02, -3.4954e-02,  1.6039e-03,
        -1.9170e-01, -5.1747e-02, -2.0228e-01,  4.9844e-02,  9.1376e-02,
        -3.7552e-01, -5.2564e-03,  8.8270e-02, -4.2259e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4611, -2.4519,  0.1999,  0.2549,  0.1357, -0.8427, -0.8973,  0.2563,
        -0.0064,  0.4138, -0.4232, -0.0961,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4342, -3.2040, -0.5079, -0.8583,  0.3077, -1.0741, -0.8425,  0.1089,
        -0.5939, -0.0728, -0.1216,  0.1030,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7462,  0.4087, -0.9393,  0.0176, -0.7204,  0.1242,  0.0706, -0.1147,
        -0.7471, -0.0613,  0.0252, -0.7056,  0.0239, -0.0068, -0.3497, -0.7401,
        -0.1095,  0.0448,  0.1433, -0.1648,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0871, -1.7856, -0.0315, -0.5244, -1.0882, -0.1132, -0.1773, -0.0244,
        -0.2713, -0.0511, -0.1076, -0.3666, -0.2362, -0.1221, -0.1069, -0.0396,
        -0.3144,  0.1349,  0.0063,  0.0311, -0.1447,  0.1277,  0.0024, -0.0059,
        -0.0830,  0.0454, -0.2075, -0.4537, -0.2583, -0.0047, -0.0236, -0.0684,
        -0.3925, -0.4854,  0.0669, -0.2075,  0.1817,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7288, -6.6435, -0.5326,  1.8569, -0.4678, -0.0455, -0.4217,  0.4452,
         0.3754,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4773,  0.1233, -0.0879, -1.1712, -0.0471, -0.3800, -0.6410, -0.4016,
         0.0654, -0.0211,  0.0398, -0.3500, -0.3858, -0.7543,  0.2252, -0.0571,
        -0.0914,  0.1471, -0.2117, -0.2854,  0.0188,  0.0381, -0.1364,  0.0152,
         0.1000,  0.2933, -0.0858,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6469e-01, -1.9758e+00,  8.4349e-03, -5.6697e-01, -1.3294e-01,
        -2.5449e-01,  7.8356e-05,  7.4320e-03, -1.1860e-02,  4.8594e-02,
         3.0717e-01, -4.9460e-01, -4.1806e-02, -1.4383e-03, -4.5728e-02,
        -1.3575e-02, -6.4925e-01, -4.8851e-02, -1.0902e-01, -2.2832e-02,
         5.9376e-02, -3.5496e-01, -3.4244e-01,  3.0036e-02, -2.5333e-03,
        -7.5943e-02, -2.7764e-02, -3.3709e-01, -5.9287e-01, -9.8454e-03,
         9.9416e-02,  1.3069e-02, -3.2484e-01,  2.7908e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.5244, -0.0199, -0.0290,  0.0571, -0.3640, -0.1725, -0.0037,  0.0637,
         0.0576,  0.1358, -0.0232, -0.5111, -0.0162,  0.0077,  0.0222, -0.3146,
        -0.4558,  0.0732, -0.1634, -0.0660,  0.1003, -0.0188, -0.2452,  0.0028,
         0.0165, -0.0461, -0.0079, -0.0663,  0.0130, -0.0753,  0.0800, -0.4101,
        -0.6734, -0.0391, -0.1199, -0.2449, -0.3909,  0.0033,  0.0784, -0.0693,
        -0.1370, -0.4882,  0.0713, -0.0925, -0.0392,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0015,  0.1127, -0.0435, -0.0431, -0.1474, -0.9754, -1.1873, -0.0807,
        -0.0433, -0.5873,  0.0138, -0.2197, -0.6572, -0.2065, -0.0181, -0.0696,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8309,  0.0463,  0.1770, -0.1306,  0.0747, -0.0415, -0.1800,  0.0423,
         1.4857,  1.6847,  0.2935,  1.3606, -0.3095,  0.3712,  0.4926,  0.1266,
         0.1446,  0.0233, -0.1449, -0.0292,  0.0566,  0.0508,  0.0934,  0.0851,
         0.2433, -0.1303, -0.2412,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8755e-01,  5.6510e-02, -2.6411e-01,  1.2630e-02, -2.5762e-01,
        -6.5502e-02, -1.1642e-01, -3.8327e-02, -1.0424e-01, -3.2227e-02,
         5.9808e-05, -4.6378e-02, -1.4852e-01,  1.4833e-03, -9.4513e-02,
        -2.9787e-02, -1.3144e-01,  6.0114e-02, -8.0763e-03, -1.2338e-01,
         1.0804e-02, -7.5153e-02, -6.4771e-02, -2.6167e-01, -1.9623e-02,
        -3.7248e-02, -1.3404e-01, -3.3033e-01, -3.0147e-02, -1.6239e-01,
        -4.9512e-02, -1.8949e-01, -1.8955e-01,  8.9973e-03, -6.8228e-02,
        -3.0986e-01, -1.2962e-01, -1.8804e-01,  2.1575e-02,  4.4676e-02,
        -4.6979e-02, -1.1752e-01, -1.8957e-02,  3.6047e-03,  2.9794e-02,
        -3.7368e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2474e-01, -1.2112e-01, -1.5478e-01, -1.0288e-01, -1.4912e-01,
         1.2936e-02, -1.5723e-01,  5.5708e-01,  1.1809e-02, -3.5784e-02,
        -2.0168e-01,  1.0401e-01, -9.2421e-03, -5.5666e-02, -1.9834e-01,
         3.1896e-03,  2.0849e-02,  1.9439e-01,  5.5124e-01,  1.7068e+00,
        -4.3589e-03, -1.2975e-01, -2.2372e-04,  1.0624e-01, -1.6980e-02,
         1.1448e-01,  3.5574e-01, -9.2192e-02, -1.1778e-02, -3.8003e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1622, -0.3809, -0.4759, -0.0700,  0.1564,  0.0800,  0.3263,  2.2025,
         0.4051,  0.2463,  0.1827,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5470, -2.1889, -0.0640, -0.2765, -0.5994,  0.2005, -0.4971, -0.8543,
         0.0860, -0.2418,  0.0852,  0.0326, -0.4601, -0.1293, -0.1623, -0.8358,
        -0.2523, -0.1115,  0.2189,  0.5035,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7614, -2.1933, -1.4199, -0.4348,  0.3513, -0.3741, -0.6127,  0.0519,
        -0.1501, -0.2193,  0.0766,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0963,  1.7598,  1.0272,  1.2590,  0.1092, -0.1282, -0.0428,  0.4137,
         0.5717,  0.1304, -0.0149,  0.0836, -0.0147,  0.1604, -0.0805,  0.3109,
        -0.0507, -0.1215,  0.0985, -0.0239, -0.3247,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1765, -2.7492, -1.1823, -0.0492,  0.1113, -1.5054,  0.0703, -0.3165,
        -0.2930,  0.0442, -0.1804, -0.3932, -0.1330, -0.0765, -0.0981, -0.0384,
         0.2273,  0.0306,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2576e-01, -6.3110e-01, -1.1689e+00, -1.0297e-01,  1.7074e-02,
         6.5900e-02, -1.3131e-02,  9.9325e-03, -5.1456e-04, -1.8688e-02,
         1.8191e-02,  5.0555e-02,  3.6615e-02, -2.0708e-01, -2.4219e-01,
         1.4769e-01, -1.5637e-01, -2.3049e-01, -1.5412e-01, -1.8663e-01,
        -4.6122e-02, -1.9057e-01, -4.8500e-01, -1.3120e-02, -1.9368e-01,
        -1.7224e-01, -2.2749e-01, -7.6577e-02, -2.6943e-01, -3.6505e-01,
         1.4332e-02, -6.7554e-02,  4.9235e-02,  2.4186e-02, -7.8850e-02,
        -1.7872e-03, -1.1422e-02,  2.5724e-01,  1.0053e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0412, -2.7745, -0.2475, -0.4687, -0.0500, -0.0342,  0.0851,  0.1670,
        -0.0031, -0.1540, -0.1158, -0.8739,  0.0329,  0.0136, -0.0415, -0.0095,
        -0.5576, -0.0434,  0.0848, -0.0610, -0.0686, -0.0527,  0.0359,  0.0261,
        -0.0700, -0.0158, -0.3051, -0.3441, -0.5703, -0.7941, -0.1198, -0.1593,
        -0.1152, -0.2791,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.1676, -0.7332, -1.1996, -1.3913,  0.0476, -0.4622,  0.2738, -0.3616,
         0.2849,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1369e-01, -1.6342e+00, -2.2172e-01, -5.4049e-01, -2.3444e-01,
        -6.1650e-01, -1.2229e-02, -2.1320e-01, -2.9997e-01, -1.1947e-01,
        -1.3308e-01, -1.1604e-01, -6.2646e-02, -1.4093e-01, -1.1201e-01,
        -3.9167e-01, -2.2245e-01, -7.4481e-01, -1.8358e-01, -1.2710e-01,
        -2.1396e-02, -5.4995e-04, -1.8771e-02,  2.9380e-02, -2.7276e-01,
        -3.6015e-02,  2.6017e-02, -2.5310e-02, -6.7535e-02, -1.2069e-03,
        -1.6072e-01, -3.4669e-01,  3.1561e-02, -3.1645e-01,  3.9760e-03,
        -5.9174e-02, -2.1142e-02,  1.2439e-01, -9.0031e-03, -5.0813e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5317e-03, -2.0500e-01, -1.0490e+00, -5.9555e-02, -3.5961e-01,
        -6.3958e-02, -1.1688e-02,  6.6416e-03, -2.2569e-01, -4.8277e-01,
         6.9765e-02,  3.0285e-02,  1.5264e-02,  1.7346e-02, -5.2005e-02,
         2.1788e-02,  1.6871e-02,  2.3308e-02, -1.8616e-01,  6.1112e-02,
        -2.7173e-01,  1.8527e-02, -1.9442e-02, -8.7081e-02, -1.2176e-02,
        -1.3467e-01, -2.1574e-01,  1.3443e-02, -2.9733e-01, -4.1202e-01,
        -4.5770e-03, -1.0387e-01, -1.8478e-02,  4.7653e-02,  1.6348e-02,
         1.0235e-01, -4.0073e-01, -3.6984e-01, -1.0689e-02,  5.1891e-02,
         4.2083e-02,  2.8793e-02, -7.1148e-05,  1.2671e-02, -1.0983e-02,
         1.7766e-02, -1.8939e-02,  1.6001e-02,  2.3239e-02,  5.6078e-02,
         3.2948e-04, -1.5977e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0782, -1.5718, -0.0111, -0.2624, -0.0446, -0.0836, -0.5186, -0.0291,
        -0.1547, -0.1877, -0.0478, -0.1397, -0.4458,  0.0268, -0.2449, -0.2817,
        -0.2718, -0.0204, -0.3896,  0.0564,  0.1265,  0.2938,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4813,  3.8157, -0.2149,  0.2050, -0.0290,  0.0755, -0.2200,  0.7428,
         0.2825,  0.0393,  0.3108,  0.7373,  0.1622, -0.2236, -0.0203,  0.3130,
         0.2780, -0.0083, -0.2731, -0.1135,  0.4811,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1355, -1.8978, -0.4196, -0.2146, -0.0043, -0.0223, -0.1349,  0.0591,
        -0.2910, -0.0667,  0.0276, -0.0142, -0.0172,  0.0348,  0.0796, -0.0056,
         0.0606,  0.0240, -0.2540,  0.0342,  0.0764, -0.0498, -0.0210,  0.0398,
         0.0058, -0.0631, -0.0816, -0.2528, -0.0036, -0.2452, -0.5772, -0.4391,
         0.0098, -0.2243, -0.3559, -0.1470,  0.1064,  0.0789,  0.0713, -0.0290,
        -0.4427, -0.0895, -0.0961, -0.0078, -0.3165,  0.1253, -0.1537, -0.1352,
        -0.0282,  0.1019, -0.1796, -0.0928, -0.0390, -0.0458,  0.0515],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3501, -0.0146,  0.1357, -0.1055, -0.0132, -0.1431, -0.3723, -0.0447,
        -0.0088,  0.0405,  0.0006,  0.0708,  0.0140, -0.1153, -0.2772, -0.0937,
        -0.0350, -0.1583, -0.0076, -0.2800,  0.0028, -0.1749, -0.2656,  0.0386,
         0.1285,  0.0145, -0.0429, -0.5330,  0.0257, -0.0211,  0.0726, -0.0230,
        -0.0990, -0.3283, -0.0262, -0.0610, -0.0141, -0.1367, -0.1186, -0.1304,
        -0.0210,  0.0021,  0.0460,  0.0688, -0.0122,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2136, -1.5353, -0.5309, -0.5089, -0.1271, -0.1411, -0.1176, -0.0427,
        -0.3850, -0.0524,  0.0392, -0.2283,  0.0090, -0.4504, -0.3948, -0.0585,
        -0.1535, -0.1862, -0.1383, -0.2084, -0.2733,  0.1201, -0.0247,  0.2458,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0565,  0.1001,  0.0804,  0.0905,  0.0658,  0.0155, -0.0835, -0.2281,
         0.0189,  0.0215, -0.0104,  0.0157, -0.0376, -0.2716, -0.0137,  0.2087,
        -0.2245, -0.1862, -0.2347, -0.2844, -0.0775,  0.0154, -0.3376, -0.4654,
        -0.2244, -0.1491, -0.2836, -0.0726, -0.1301, -0.3150, -0.0304, -0.0949,
        -0.0593,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1509,  3.7708,  0.1452,  0.9120, -0.0427, -0.0381,  0.1588,  0.1012,
         1.1638, -0.0334, -0.0371,  0.0117, -0.4899,  0.6093,  0.1797,  0.6296,
         0.0877, -0.0340,  0.2667,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2986, -0.1650,  0.0878, -0.0375, -0.6886, -0.0741, -0.2772, -0.4809,
        -0.7899,  0.0178,  0.0327,  0.2606,  0.0468,  0.1361, -0.0227, -0.0051,
        -0.1269, -0.4441, -0.2368, -0.1341, -0.3531, -0.2281, -0.1442,  0.0846,
        -0.0794,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0142, -2.2279, -0.7231, -0.1125, -0.3748, -1.0200, -1.5223,  0.1956,
        -0.2393, -0.3648,  0.0588, -0.1207, -0.2676, -0.0365,  0.0356, -0.1326,
         0.1041,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 4.1699e-01,  1.3121e-01, -1.7963e-01, -6.9726e-04,  1.6542e-03,
        -7.9990e-02, -7.5458e-03, -1.3852e-01, -1.2621e-02, -5.3789e-01,
        -2.3866e-01,  1.3470e-02,  2.1444e-01,  2.0640e-02,  5.7792e-02,
        -2.3664e-01, -6.9775e-01, -1.4339e+00,  2.5350e-01, -7.1023e-01,
         1.1210e-01, -9.0325e-01, -1.2297e+00, -1.7928e-01, -7.1935e-02,
        -8.7477e-02, -5.4908e-02,  5.8278e-02,  1.2127e-01, -1.2297e-01,
        -3.1333e-01,  5.3184e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0580, -0.8762, -1.0800, -0.1585,  0.1077, -0.0371, -0.4025, -0.6567,
        -0.0676,  0.0407,  0.0027,  0.0758,  0.5173, -0.1146,  0.0270,  0.0573,
        -0.1732, -0.1593, -0.3560, -0.0813, -0.4017, -0.1089, -0.3530, -0.8892,
        -0.0129, -0.0525, -0.0719, -0.2531,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1128,  0.2644,  0.1147, -0.0954,  0.0893,  0.8294,  1.3191,  0.3540,
        -0.0791, -0.1018,  0.0048,  0.4839,  0.1013,  0.2399, -0.0666,  0.9940,
        -0.0939,  0.4686, -0.0635,  0.0194, -0.0046, -0.0513,  0.1051,  0.2417,
         0.3913, -0.1921, -0.2882, -0.3072, -0.1749,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4098e-02, -9.4772e-01, -9.2468e-02, -3.4263e-01, -7.9481e-01,
         1.8741e-01, -1.6125e-01,  5.6723e-02, -4.1980e-02,  5.2499e-02,
         2.0361e-01,  5.0984e-04, -1.3358e-01, -7.6768e-01,  3.5110e-01,
        -2.4502e-01,  6.1279e-02, -7.7708e-01, -9.0505e-02, -1.1026e-01,
        -5.2631e-02,  1.8366e-02, -2.1017e-01, -4.0304e-03, -1.2959e-01,
        -3.5626e-01, -3.0659e-01,  1.0706e-02, -2.9543e-02, -7.4632e-02,
        -6.3041e-03, -1.8313e-01,  1.5911e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5038, -2.1008, -0.1177, -0.6445, -0.0366, -0.4906,  0.0858, -0.1704,
        -0.0629, -0.2747, -0.4294, -0.0245, -0.0297,  0.0343, -0.0360,  0.0849,
        -0.3245,  0.0918, -0.4518,  0.0023, -0.7978, -0.0460,  0.2406,  0.1583,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1578, -1.1681, -0.6861,  0.0132, -0.2225,  0.0262, -0.1922, -0.3755,
         0.0559, -0.2531,  0.0389, -0.1885, -0.4641, -0.1073, -0.0816,  0.0189,
         0.1060, -0.1817, -0.0473, -0.1462, -0.1510,  0.1801, -0.1854,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4188, -0.9801, -0.0766, -0.1211, -0.1237, -0.2305,  0.0316,  0.0134,
         0.1067,  0.0041, -0.0036, -0.0720, -0.1356,  0.0167, -0.0077, -0.1320,
        -0.3722, -0.2721, -0.2282,  0.0560, -0.0026, -0.2503, -0.0162, -0.1063,
        -0.2667, -0.0690, -0.0425, -0.0578, -0.1200, -0.3448, -0.1802, -0.0825,
        -0.1734, -0.1050, -0.1873, -0.3532,  0.0039,  0.0590,  0.0163,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0101, -0.5233, -0.1376, -0.0405, -0.6607, -0.1267,  0.0510,  0.0171,
         0.0339,  0.0625, -0.0079,  0.0616,  0.0241,  0.0539,  0.0886, -0.3621,
        -0.0371, -0.0189, -0.0055, -0.0186, -0.3516, -0.0477, -0.0301, -0.0827,
        -0.4593, -0.6773, -0.1644, -0.1944, -0.0705, -0.2205,  0.0074,  0.0304,
         0.0770, -0.0835, -0.1121,  0.0083, -0.1482, -0.4493,  0.0489,  0.0262,
        -0.0166,  0.0618,  0.0388,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0557, -1.1961, -0.0193, -0.0105,  0.1146,  0.0042, -0.0879, -0.1203,
        -0.4487, -0.0590, -0.0505, -0.0132, -0.2201, -0.0426, -0.0478, -0.0231,
        -0.3315, -0.4569, -0.0761, -0.0484, -0.0138,  0.0442, -0.1593, -0.0950,
         0.0986, -0.0758,  0.0231,  0.0148, -0.0172,  0.0117, -0.0622, -0.1797,
        -0.3817,  0.1174, -0.0514, -0.0199, -0.1438, -0.1412, -0.3233, -0.0352,
        -0.2315, -0.3375, -0.0208,  0.0194, -0.0231, -0.1087, -0.0090,  0.0163,
         0.0249, -0.0085,  0.0684, -0.1331, -0.0031,  0.0142, -0.0295, -0.1332,
         0.1086], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3277, -4.0353, -0.0111, -0.3588, -0.4246, -0.2927, -0.8974, -1.0242,
         0.2520, -0.2697, -0.3726,  0.0666,  0.0399,  0.4920,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2331, -1.5584, -0.3703, -0.2509, -0.0868, -0.3735,  0.0107, -0.0637,
        -0.3310,  0.0039, -0.4599, -0.9901, -0.1434, -0.1405, -0.4282, -0.1285,
        -0.1018, -0.2871,  0.1503, -0.4392, -0.6398, -0.1045, -0.3469,  0.0481,
         0.1251,  0.1334, -0.0424,  0.0023, -0.3375, -0.0551,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7306, -0.8109,  0.3036, -1.9003, -0.5977,  0.2370, -0.3953, -0.0607,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-0.1283, -1.1292, -0.0558, -0.0774, -0.2709, -0.6298, -0.0100,  0.2036,
         0.0860, -0.0412,  0.0989,  0.0088, -0.3263, -0.5619, -0.0323, -0.0080,
         0.0020,  0.0169,  0.0128,  0.0187,  0.0458, -0.0035,  0.1315,  0.1121,
        -0.4025,  0.0382, -0.1515,  0.0060, -0.5324, -0.8133,  0.0174,  0.1869,
        -0.1923,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3980e-01, -1.6940e+00, -6.3281e-01, -2.2407e-02, -7.4748e-02,
        -1.2071e-01, -7.5124e-03, -1.0813e-01, -8.9054e-03, -1.8904e-01,
        -6.8699e-02, -1.3894e-01, -1.1175e-01, -5.0210e-02, -2.2850e-03,
        -4.8793e-03,  1.9962e-02, -1.0048e-02, -1.1563e-01,  3.5046e-02,
        -6.3812e-03,  3.7172e-02,  2.5272e-02,  5.7431e-03,  5.8708e-02,
        -1.8108e-02, -1.1943e-03, -3.1822e-01,  2.9780e-02, -3.9197e-03,
         3.1004e-02, -4.1567e-02, -1.6583e-01, -2.0772e-01,  1.2061e-01,
        -2.7942e-02, -1.5710e-01,  3.8239e-03, -9.6220e-03,  2.1090e-02,
        -3.2935e-02,  3.6534e-02, -2.8463e-02, -2.2668e-01, -4.6655e-02,
         7.8259e-03, -1.3851e-02, -1.5630e-03, -6.3084e-02, -5.1776e-03,
         2.5122e-02, -1.4580e-01, -8.6715e-02, -1.7342e-01, -5.1524e-02,
         5.6282e-02,  1.2658e-03, -3.6634e-02,  4.1223e-03, -1.2777e-01,
        -2.1744e-03, -1.1075e-01, -2.2006e-01,  8.6991e-03, -5.9429e-03,
        -2.5316e-02, -5.0140e-02,  2.1614e-03,  5.3134e-02, -3.9478e-02,
        -6.4697e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0916, -0.0533, -0.2174,  0.0119, -0.4473, -0.0599, -0.5939, -0.0777,
         0.0020, -0.0348, -0.0430, -0.0317,  0.0090,  0.0441,  0.1008,  0.0791,
         0.0098,  0.0264, -0.0190, -0.4277, -0.0671,  0.0511,  0.0828, -0.2431,
         0.0301, -0.2885, -0.1265,  0.0239,  0.0036, -0.0399,  0.0564,  0.0421,
        -0.4067, -0.4688, -0.1362, -0.0501, -0.5401, -0.2572,  0.0168, -0.0115,
        -0.0317, -0.1015,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8062e-02,  2.2401e+00,  1.3490e-01,  9.2551e-02,  2.6353e-02,
         3.0431e-02, -1.8356e-01,  8.9623e-02, -1.3070e-02,  5.2795e-01,
         1.1513e-01,  3.0074e-01,  8.2558e-01,  1.4177e-02,  2.3882e-02,
        -2.6356e-02,  1.5437e-01,  5.4908e-01,  1.0530e+00, -1.2905e-01,
         8.2486e-02,  2.0694e-01, -1.7615e-01,  3.3571e-02, -2.0881e-03,
         4.0848e-01,  3.4622e-02,  6.1765e-02,  6.6479e-01,  4.3964e-02,
         5.4876e-02, -3.0817e-03, -1.5024e-01, -5.0467e-02,  2.5763e-02,
         1.9948e-01, -1.3779e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2063, -2.7977, -0.2848, -0.6016,  0.1178,  0.0998, -0.1921, -0.0952,
        -0.0638,  0.0694, -0.2070,  0.2063, -0.4981, -0.1648, -0.0208,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1335, -0.1059,  0.1002,  0.2243, -0.6786, -1.3996,  0.0467, -0.9702,
        -1.3094, -0.1774, -0.1039,  0.2651, -0.3193,  0.0652,  0.0249,  0.0522,
        -0.0618, -0.1422, -0.0811,  0.0082,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0485, -0.0197, -0.0042, -0.0307, -0.5851, -0.0397, -0.0236, -0.0856,
         0.0293,  0.0032, -0.1899, -0.3553, -0.0379, -0.0233, -0.2203, -0.1008,
         0.0025, -0.0968,  0.0640, -0.1435, -0.0203, -0.0067,  0.0281, -0.0156,
         0.0270, -0.2362, -0.2927,  0.0090,  0.0190, -0.2322, -0.3542, -0.0690,
        -0.0038, -0.0125, -0.2078, -0.2600,  0.0275, -0.1931,  0.0142, -0.0607,
         0.0540,  0.0756, -0.0022,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4841e-02, -6.5121e-02, -4.6231e-02,  2.8603e-02, -2.0817e-01,
        -8.0196e-02, -2.5758e-01, -5.9532e-01, -2.6048e-02, -4.1448e-04,
         1.3876e-01,  1.8071e-02,  3.0914e-02, -6.1101e-01, -7.2664e-01,
        -4.0877e-02,  3.5618e-02,  3.1635e-02,  3.5494e-02, -1.1488e-01,
        -3.7753e-01, -3.7426e-02,  2.8517e-01,  4.4858e-02, -2.4710e-02,
        -8.5203e-02, -3.5007e-02,  6.6586e-03, -7.9909e-01, -8.1360e-02,
         1.6406e-01, -7.5005e-01, -1.0983e-01, -7.4205e-01, -8.2001e-01,
        -1.6124e-01,  1.1818e-01, -6.7380e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5614, -2.3175, -1.0516, -0.8183, -0.0701, -0.4642, -0.0499,  0.0744,
        -0.2127, -0.5635, -0.0193,  0.0828, -0.2139, -0.0915,  0.0588, -0.1763,
        -0.1000, -0.4305,  0.0087,  0.1192, -0.2719,  0.2487,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6386, -0.7025,  0.0806,  0.1243,  0.1471, -0.0915,  0.0494,  0.1722,
        -0.1960, -0.8150, -0.0906,  0.1490, -0.0326,  0.0351, -0.3932, -0.4104,
        -1.0106, -0.1037, -0.0914, -0.7529, -0.0274,  0.0014, -0.6634, -0.1298,
        -0.0117, -0.0185, -0.1108,  0.0940,  0.0342, -0.1987, -0.0708, -0.0529,
         0.0072, -0.0462, -0.2442, -0.6805,  0.1643,  0.0824, -0.2299,  0.0159,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0527,  0.0478, -0.0579,  0.0023, -0.1160, -0.2322, -0.1530, -0.1303,
        -0.3926, -0.2711, -0.0693, -0.0812, -0.0586, -0.6993, -0.4714,  0.0985,
         0.0340,  0.1091, -0.3012, -0.0702, -0.0120, -0.0205,  0.0038, -0.0720,
        -0.0103, -0.8712,  0.0802, -0.2975, -0.4839, -0.1308, -0.1176, -0.4139,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8203e-01, -2.5664e+00, -1.1310e-01,  2.6713e-02, -3.9127e-02,
         1.0490e-02, -9.9357e-02, -4.3039e-01, -7.0578e-01,  4.5148e-02,
        -1.8403e-02, -2.0897e-01,  6.6830e-03,  8.8928e-02,  7.6496e-02,
        -1.0883e-01, -3.3385e-01, -4.5233e-02, -1.3453e-03,  4.2667e-02,
        -5.3238e-02, -1.5579e-01, -7.7688e-02, -8.3622e-01, -1.2888e-01,
         1.8448e-02, -4.1606e-01, -9.4968e-02, -6.9982e-02, -1.6726e-01,
        -1.3418e-03,  5.6238e-02, -1.0196e-01,  4.8019e-02,  5.9945e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.7188, -0.2124, -0.7532, -0.5597, -0.2082, -0.3252, -1.4328, -0.7925,
        -0.1079, -0.7037,  0.0376, -0.2803,  0.1725, -0.4245, -0.0815,  0.0407,
        -0.1460, -0.0580,  0.1396, -0.3626,  0.1085,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1036, -1.9607,  0.0593, -0.4017, -0.8914, -0.4199, -0.2167, -0.1591,
        -0.0557,  0.1725,  0.0495,  0.0922, -0.0240,  0.1411, -0.1269,  0.1007,
        -0.2066,  0.2506, -0.1067,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1120, -0.2448, -0.5213,  0.1226, -0.3212, -0.0305,  0.0524,  0.0049,
         0.0486, -0.2238, -0.6774, -0.1321, -0.4137, -0.9751, -0.0335, -0.3033,
        -0.0122, -0.3076,  0.0479,  0.1111,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2985, -2.1850, -0.1307, -0.5011,  0.0788, -0.0104, -0.1538, -0.3594,
         0.0075, -0.0877, -0.0584, -0.0926,  0.0256, -0.3475, -0.0432, -0.2981,
        -0.4192, -0.0596, -0.2060, -0.3681, -0.0077, -0.0076, -0.3174,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2641, -1.3451, -0.8214, -0.5779, -0.0375, -0.3080,  0.0288, -0.3952,
        -0.4402, -0.8367, -0.2277, -0.4883, -0.1572, -0.6123,  0.0888, -0.0438,
         0.1208, -0.0250, -0.1828, -0.2793, -0.3236, -0.1397,  0.0976, -0.0441,
         0.3367,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5548, -2.5456, -0.1340, -0.2219,  0.0888,  0.0223, -0.1993,  0.0108,
         0.1068, -0.0597, -0.2993,  0.0087,  0.0700,  0.1454,  0.1230, -0.2868,
         0.0170, -0.2119,  0.0929, -0.0263, -1.4111, -1.0663, -0.6172, -0.4650,
        -0.1226, -0.1926, -0.0919, -0.1349, -0.0045, -0.1483, -0.2056,  0.0094,
        -0.1076,  0.0371,  0.2728,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2566, -0.0139, -0.0976, -0.6467, -0.0848,  0.1215, -0.3831,  0.0415,
        -0.8605, -0.1020,  0.4047, -0.0255, -0.1363, -0.4269, -0.3845, -0.6903,
        -0.1876, -0.4640, -0.1732, -0.0646, -0.0177,  0.1137,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5279,  0.0379, -0.0779, -0.0361, -0.1791,  0.2164,  0.0468, -0.1891,
         0.0923,  1.3735,  0.0238, -0.0278, -0.0412,  0.3565, -0.2017,  2.1281,
         0.1416, -0.0570,  0.5479,  0.4350,  0.5779,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0023, -2.2520, -0.0832, -0.0881, -0.0705,  0.0041, -0.9020, -0.8046,
         0.1760, -0.0274, -0.2375,  0.0360, -0.0357, -0.4688, -0.0811, -0.2470,
         0.0419, -0.0080,  0.0681,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0702e-01,  4.5588e-02, -1.1626e-01,  6.8137e-02,  2.5662e-02,
        -4.9570e-01, -4.0616e-03, -1.0085e-02, -1.5303e-02, -6.1878e-03,
         2.5390e-02, -5.6147e-02, -1.7389e-02, -1.9998e-01,  5.9486e-02,
         8.1784e-02, -1.3654e-01, -2.5381e-01,  3.9247e-02, -1.5676e-01,
         3.7724e-02, -1.1163e-01, -2.3477e-01, -4.1045e-02, -2.2647e-01,
         1.9486e-02, -1.2147e-01, -2.6672e-01, -4.7352e-04, -5.2403e-01,
         1.7225e-02, -2.4168e-02, -5.0401e-01, -5.3239e-01,  9.4108e-03,
        -5.3714e-02,  5.5412e-02,  5.5662e-02, -1.0331e-01, -2.4511e-01,
         3.3231e-02,  5.0920e-02,  2.8241e-02, -2.4033e-02,  6.8220e-03,
         6.9981e-02, -6.1502e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2700, -1.7903, -1.1875, -0.1152,  0.0477,  0.0170, -0.0819,  0.0354,
        -0.3583,  0.1154,  0.0237, -0.0728,  0.0401,  0.0084, -0.0470, -0.0170,
        -0.5721,  0.0307,  0.0048, -0.1600,  0.0540, -0.1935, -0.5758, -1.2465,
         0.1894,  0.1543, -0.2829, -0.0426, -0.0201,  0.0129,  0.1013,  0.0730,
        -0.0630, -0.0830,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1484, -3.1301, -0.0178,  0.1940, -0.0059, -0.3981, -0.0968, -0.0446,
        -0.0586, -0.2582, -0.0880,  0.0599, -0.7520, -0.8151,  0.0178, -0.0051,
        -0.2210, -0.0148, -0.1436, -0.1355,  0.0402,  0.0485, -0.0657, -0.0174,
        -0.0270,  0.0986,  0.0372, -0.1190, -0.1084, -0.3433, -0.1026,  0.1517,
        -0.3034,  0.2648,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.0336,  4.1993, -0.3010,  0.2947,  0.4049,  0.5561, -0.1977,  0.1347,
         0.1237,  1.0304,  0.2223, -0.1223,  0.5391,  1.0496,  0.1260,  0.2195,
         0.1346,  0.0832,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3493,  0.0286,  0.1699, -0.3799,  0.1139,  0.0349, -0.1331, -0.1348,
         1.0183,  2.5450, -0.3441,  0.0726,  0.1083,  0.0135, -0.0891,  0.3959,
         0.2081, -0.0409, -0.3559,  1.4940,  0.1862, -0.0141,  0.1078, -0.0695,
         0.0438, -0.2820, -1.0514,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6896e-01, -1.4443e-01, -1.0877e-02,  1.1801e-01, -5.8907e-02,
         2.3418e-01, -8.7767e-03, -6.2773e-02, -9.3512e-01, -2.4586e-01,
        -1.2671e-01, -5.4896e-02, -1.2972e+00, -3.4223e-01,  3.0804e-02,
        -8.5493e-03, -5.9637e-02,  3.4379e-02,  7.9100e-02, -1.1640e+00,
         8.2356e-02, -1.2623e-03, -3.4610e-01,  8.5865e-02, -1.1663e-01,
        -3.0987e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3681, -1.4068, -0.0375, -0.6056, -1.0091, -0.2389, -0.1262, -0.0247,
        -0.4364, -0.2545, -0.5161, -0.2533,  0.0148, -0.0026, -0.0931, -0.4321,
        -0.2087, -0.3125, -0.0339, -0.0120,  0.5726,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2396, -2.2595, -1.6674, -1.3654, -0.0651,  0.2209, -0.3890, -0.0238,
        -0.2654, -0.0254, -0.0594, -0.3076,  0.3431,  0.3987,  0.0564,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6374,  0.1001, -0.1524,  0.1516,  0.1875,  0.2017,  0.0303, -0.1282,
         0.2643, -0.4682, -0.1041, -0.4781, -3.0635, -0.5470,  0.2751,  0.0731,
        -0.1905, -0.1978,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6936, -2.7233, -1.0178, -1.1301, -0.0595, -0.0615,  0.1407, -0.6410,
        -0.2349, -0.5128, -0.7309, -0.3125,  0.1774, -0.3412, -0.2156,  0.2115,
        -0.0243,  0.0299,  0.0211,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1002, -1.1641, -0.7426, -0.1597, -0.0727, -0.2420, -0.0324,  0.0629,
        -0.1238, -0.0172,  0.0069,  0.0141, -0.0710,  0.0197,  0.0185,  0.1656,
        -0.3106,  0.0338,  0.1386,  0.1615, -0.1757,  0.0361, -0.2380,  0.0438,
        -0.3597, -0.0774, -0.2994,  0.0402,  0.0158, -0.2609, -0.0835, -0.1038,
        -0.2208, -0.0028, -0.2580, -0.1200, -0.2338, -0.0115, -0.2024, -0.0118,
        -0.0626,  0.0070, -0.1128, -0.0624], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2961, -1.9452, -0.1171, -0.6280, -0.6269, -0.1751, -0.3401, -0.1068,
        -0.4311, -0.0470, -0.3154, -0.0178,  0.0119,  0.0475,  0.1030, -0.0344,
         0.2373, -0.7198, -0.0882, -0.2391, -0.3617,  0.0083,  0.0402,  0.0501,
         0.0242,  0.0545, -0.0206, -0.4139,  0.0797,  0.0092, -0.0029, -0.1129,
        -0.0405, -0.0702, -0.0581,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9599e-01, -4.5596e+00,  6.8818e-03, -5.8315e-01, -1.4623e-01,
         1.2683e-01, -7.5413e-01, -6.7720e-01, -1.2447e-01,  5.7582e-02,
        -1.5676e-02, -5.0504e-01, -7.7167e-01,  2.0281e-02,  2.6700e-02,
        -5.7872e-01, -7.4622e-02, -6.4121e-02, -7.2116e-02, -1.0057e-03,
         1.8257e-01, -1.3093e-01, -1.3429e-02,  3.4020e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2634, -2.2717, -0.2750, -0.3920, -0.0683, -0.1101,  0.1826, -0.1196,
        -0.8384, -0.1291, -0.1841, -0.1018,  0.0412, -0.1547, -0.4913, -0.0196,
         0.1980, -0.0949, -0.5226, -0.0628,  0.0087, -0.3758,  0.0938, -0.2429,
         0.0157, -0.2096, -0.1155, -0.0890, -0.0287, -0.1289, -0.2305,  0.0339,
        -0.1259,  0.0371,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0724e-02,  2.1967e-01, -3.7866e-02, -1.0580e-01,  1.0125e-01,
         2.5230e-02, -8.6714e-01, -5.0491e-02,  1.9600e-01,  5.7444e-02,
         2.5267e-01, -4.2049e-02, -7.5392e-01, -1.5079e-01, -8.1512e-01,
        -1.2144e+00, -1.0232e-01, -4.2411e-02, -1.3921e-02, -1.0117e-01,
         1.3739e-02,  1.9118e-02, -1.5305e-01, -3.3572e-01, -1.6826e-01,
        -1.0129e-03, -4.7669e-01,  1.1164e-02, -7.9376e-02, -1.4663e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.2967,  0.1627, -0.1397, -1.1107,  0.2864,  0.0570, -0.0541, -0.1205,
        -0.5423, -1.5245, -0.2084, -0.0269, -0.1727,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0144,  0.2052,  0.3933,  0.1145, -0.0121, -0.4251, -0.4416, -0.7719,
        -0.9946,  0.0514, -0.0434, -0.7806,  0.0482, -0.4094, -0.8749, -0.0507,
         0.0917,  0.0792,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2180, -1.1277, -1.7244, -0.0305, -0.5398,  0.2186, -0.0647,  0.0976,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3770e-03, -1.8159e+00,  1.2099e-02, -4.1363e-01, -2.8642e-01,
        -6.7233e-02, -1.7835e-01, -2.0631e-02,  3.1565e-02, -1.2083e-01,
         7.5775e-02,  4.2387e-02, -6.2665e-02,  6.6016e-02,  5.2508e-02,
        -7.0250e-02, -2.1493e-02,  1.0600e-02,  3.6858e-02, -2.3563e-02,
         2.4507e-02,  8.7732e-02, -3.0302e-01, -2.9964e-01,  2.7327e-02,
        -1.4840e-01,  2.4980e-02, -8.4502e-02, -6.5046e-01, -1.3608e-01,
         7.1123e-02, -4.8950e-01,  4.6002e-03, -6.7807e-01, -9.4446e-02,
        -2.8808e-01, -3.9841e-01, -1.1951e-01,  5.3478e-02,  1.2300e-03,
        -2.1856e-01, -1.3712e-01, -1.7689e-01, -6.1327e-03,  1.2103e-01,
         1.2774e-01,  2.0185e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5096, -0.1453,  0.5714, -1.8959, -0.1353, -0.1731, -0.4030, -0.2713,
         0.2389, -0.2174, -0.0594,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3173,  2.4630, -0.0073,  0.3831,  0.1542,  0.0259,  0.5859,  1.3914,
         0.2526,  0.9290,  0.1826, -0.2343,  0.6801, -0.0365, -0.1579,  0.1053,
        -0.3061,  0.2042,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5486, -1.8610, -2.0868, -0.3175,  0.0787,  0.0150, -1.6122, -0.1257,
        -0.7139, -0.2892, -0.0154, -0.2746, -0.2429,  0.1282,  0.4259,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0735, -2.1027,  0.1921, -0.1976,  0.1140, -0.5109, -1.2057, -0.5041,
         0.1235, -0.5173, -0.4552, -0.1095, -0.1095, -0.0730,  0.0280,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3189,  0.0787,  0.2037,  0.2232,  0.0795, -0.0071, -0.1206, -0.0279,
        -0.6527, -0.3891, -0.1728, -0.6681, -0.7308, -0.1884, -0.0331,  0.0406,
        -0.3114,  0.1582, -0.0666,  0.0431, -0.7887, -1.4387,  0.1169,  0.5432,
        -0.1092, -0.0546,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0872,  0.1968,  0.2081,  0.6973,  0.2431, -0.2776, -0.0875, -1.4550,
        -2.8488, -0.0215,  0.2276, -0.2255,  0.3473,  0.2696,  0.0156,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4853, -1.9135, -0.0169, -0.4891, -0.7016, -0.1499, -0.6918, -0.0181,
        -0.0637, -1.3730,  0.0651,  0.1709, -0.2849, -0.0872,  0.0636,  0.0309,
        -0.0816, -0.0332, -0.3764,  0.0346, -0.1981,  0.0024,  0.2341,  0.2110,
         0.1530, -0.0784,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2762, -0.0047,  0.0124,  0.0353, -0.0321, -0.1557, -0.0130,  0.0936,
        -0.2810, -0.3522, -0.5229,  0.0231, -0.0629,  0.1275, -0.6820, -0.3331,
         0.2658, -0.2275, -0.0070, -0.3978, -0.1915, -0.2099,  0.0399, -0.2322,
        -0.0572,  0.0206,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.5411, -4.3315,  0.0991,  0.0743, -0.1264, -0.0122, -0.3964, -0.6011,
        -0.0234, -0.3970, -0.1570, -0.3108, -0.8019,  0.1118, -0.2484, -0.3395,
        -0.0536,  0.0313, -0.2406,  0.0560, -0.1179, -0.1343, -0.1254, -0.1965,
        -0.6863,  0.2340, -0.3151, -0.1460, -0.1931,  0.0334, -0.1878, -0.4119,
        -0.0462, -0.1214, -0.1230,  0.8386,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1221, -0.8692, -0.3558, -0.0570, -0.4149,  0.0084, -0.3516, -0.5678,
         0.0272, -0.0276, -0.2436, -0.0634, -0.0613,  0.0604, -0.0076, -0.1519,
         0.0058, -0.0435,  0.1923, -0.0671, -0.0847, -0.3913, -0.0072, -0.0598,
        -0.2302, -0.5443, -0.0398, -0.4079, -0.3552, -0.0331, -0.0350, -0.2616,
        -0.4090,  0.0728,  0.0259,  0.2135,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2237,  0.3081, -0.1163, -0.1069,  0.1790, -1.2430, -0.2643, -0.0390,
         0.2271,  0.0032, -0.0060, -0.8863, -0.7812,  0.0843,  0.0537, -0.0894,
         0.1351, -0.1589, -0.0810,  0.1848,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4038, -0.4810, -0.1965, -0.5960, -0.1492, -0.1279, -0.0958, -0.2396,
        -0.1501, -0.1370,  0.0294, -0.0118, -0.0518, -0.2767, -0.6752, -0.4426,
        -0.3206, -0.5005, -0.0787,  0.0365, -0.1983, -0.0814, -0.0756,  0.0066,
        -0.1072,  0.0090, -0.0304, -0.0128, -0.0566, -0.2907, -0.6727, -0.1591,
        -0.0739, -0.5769, -0.3092,  0.0704, -0.4666, -0.0250,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4807e-01, -1.6611e+00, -1.5055e+00, -5.9693e-01, -1.5941e-01,
         1.1599e-02, -5.6972e-02, -5.2272e-02, -1.6098e-01,  1.1296e-01,
        -3.2412e-01, -5.2652e-01,  1.2642e-02, -9.2249e-02, -2.7097e-02,
        -3.6459e-01, -5.9964e-04, -3.1921e-01, -8.0691e-02,  6.6387e-03,
         1.2314e-01,  3.1757e-02,  1.3327e-01,  8.6986e-02, -8.4934e-02,
         1.6114e-03, -1.1811e-01, -3.4735e-01, -3.9105e-01,  4.0695e-02,
        -1.0782e-01, -7.5322e-02,  3.8197e-02,  1.5108e-02, -8.3280e-02,
        -4.2734e-04, -6.4734e-02, -9.0615e-03, -2.4003e-02,  1.6971e-02,
        -2.6673e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5957e-04, -3.3911e+00, -2.5073e-01,  5.0935e-01, -3.1274e-02,
        -4.5683e-01,  2.9466e-01, -3.4267e-03, -2.3183e-01, -2.9175e-02,
         5.6599e-02, -4.2702e-01, -6.7572e-01, -1.1468e-01,  1.8107e-01,
        -3.3906e-01, -2.3663e-02, -1.8742e-01, -2.8097e-02,  8.9219e-02,
         4.8493e-01, -3.9258e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0242,  0.0277,  0.0064, -0.0588,  0.0382, -0.0911, -0.0044,  0.0375,
        -0.0657,  0.0090, -0.0157, -0.1410, -0.0064, -0.2666, -0.0372, -0.4756,
         0.0562, -0.3638, -0.1020, -0.5296, -0.4907, -0.0280,  0.1929, -0.1872,
         0.0159,  0.0805,  0.0047, -0.1043, -0.6462, -0.0438, -0.2826, -0.4031,
        -0.0696,  0.3306, -0.0951,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2612, -0.4326, -1.0637, -0.1490, -0.7845,  0.0876, -0.0840,  0.1118,
        -0.1215, -0.0622, -0.4053, -0.0300, -0.0229, -0.1459, -0.0412,  0.0312,
         0.3191, -0.0743, -0.6741, -0.2037, -0.5665, -0.0746,  0.1031, -0.3006,
        -0.0148, -0.2963, -0.4769,  0.5944,  0.7096,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3837, -1.9542, -0.1554, -0.2838,  0.0029, -0.3187,  0.0522, -0.1783,
        -0.2149, -0.0328, -0.4661, -0.3557,  0.0423, -0.0261,  0.0384, -0.5566,
        -0.2008, -0.1516, -0.1975,  0.1634, -0.0845, -0.2228, -0.0316,  0.1084,
        -0.0191,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2559,  0.0598, -0.1088,  0.2059, -1.3502,  0.0948, -0.1253, -0.7825,
        -1.0557, -0.2640,  0.0104, -0.0314,  0.0152,  0.0563, -0.5959, -0.6833,
         0.2300, -0.2144, -0.0490, -0.0048, -0.0678,  0.1284,  0.2080,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3833,  0.0230,  0.0166,  0.0387,  0.1206,  0.0500, -0.0602, -0.0088,
         0.0385, -0.0199, -0.0110, -0.1725, -0.3373, -1.0283, -0.1715, -0.2441,
        -0.0056, -0.3279, -0.0829, -0.0127,  0.0353, -0.0970,  0.0513,  0.1585,
         0.0336,  0.0975, -0.8311, -0.1674, -0.4278,  0.0743, -0.2597, -0.0518,
        -0.0455,  0.0210, -0.1563,  0.4030, -0.0956,  0.3777,  0.0058,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6124,  0.0891, -0.1064, -1.2796, -1.8716, -0.0897, -0.2258, -1.1323,
        -1.1917,  0.3498,  0.1973, -0.2072,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-6.8060e-02, -7.1435e-01, -2.1727e+00,  4.6989e-03, -4.1193e-02,
        -3.0732e-01, -2.5181e-01,  1.3610e-01, -7.2474e-02, -9.3007e-02,
         3.6708e-02, -5.7485e-02, -5.7422e-02, -2.0247e-03, -4.5461e-01,
        -1.1828e-02, -9.9671e-02, -1.7185e-01,  5.3498e-02,  2.5727e-02,
        -3.6001e-02, -5.1325e-02,  1.2145e-01, -2.7582e-02,  1.2677e-02,
         1.4013e-02, -1.1059e-01, -7.6954e-02, -2.9220e-02,  2.4978e-02,
         1.4322e-02,  7.3962e-02, -1.6024e-01, -4.1085e-02,  1.9733e-02,
         3.5135e-02, -4.5332e-01, -4.0696e-02,  2.2052e-03,  1.3204e-02,
         2.2061e-02,  9.5927e-02,  2.1722e-02,  5.5061e-02, -2.3370e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2880, -6.1242,  0.3469, -0.6982, -0.0982,  0.3095, -1.0630, -0.0654,
        -0.8104,  0.0358, -0.2349,  0.1120,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1966, -5.3621,  0.4503, -0.1791, -0.4106, -0.2393,  0.5477,  0.0094,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6149e-01, -2.2572e+00, -2.4741e-01, -5.4198e-02, -4.8312e-01,
        -4.4189e-02,  9.8332e-02, -8.5662e-02, -4.3013e-01, -5.2330e-01,
        -1.9394e-01, -1.3640e-01, -3.9889e-02,  2.8708e-03, -9.6715e-03,
        -6.6942e-03,  9.5423e-02, -5.4116e-02,  1.5229e-02, -7.1187e-02,
        -1.1098e-02,  1.6818e-02, -1.5852e-02, -3.6351e-02, -8.6913e-02,
         1.6871e-02, -1.3583e-02, -2.3156e-02, -1.9709e-01, -4.5657e-02,
         6.2075e-04,  1.0315e-02,  1.3046e-02,  4.1589e-02, -7.9277e-02,
        -1.5995e-03,  7.5063e-02,  1.7965e-02,  1.9479e-02,  1.1368e-02,
        -2.8226e-02,  1.0145e-02, -2.4934e-02, -2.4875e-02, -2.9821e-02,
         1.9468e-01, -3.8451e-01, -1.0291e-02, -4.2741e-01, -1.1444e+00,
        -1.7774e-01, -6.6997e-02, -2.9490e-01, -9.5984e-02, -1.4962e-01,
        -2.3010e-02, -1.0675e-01,  1.4263e-02,  1.6383e-02,  2.8390e-02,
        -2.2629e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0329, -0.0212, -0.0288, -0.1769, -0.5696, -0.0846, -0.2231,  0.0526,
         0.0680, -0.0174, -0.0452, -0.3864,  0.0791, -0.2938, -0.5385, -0.0502,
        -0.0396, -0.0213, -0.2358, -0.1769, -0.3217, -0.4264, -0.1365,  0.0554,
        -0.2178, -0.1424, -0.0745, -0.0806, -0.1618, -0.1101,  0.0366, -0.0194,
        -0.1878,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4023, -0.3711, -1.6241, -0.2402,  0.2194, -0.0662, -0.6624,  0.0058,
         0.1584,  0.0436, -0.2095, -0.0183, -0.2569, -0.0901, -0.1323,  0.0521,
         0.1397, -0.5356, -1.6797,  0.1767,  0.2158, -0.1074, -0.0145, -0.6262,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3617, -0.7055, -1.0551, -0.2983, -0.0032, -0.4749, -0.1125, -0.2307,
        -0.1249, -0.5548, -0.4392, -0.3329, -0.9770, -0.0337,  0.1949, -0.0716,
        -0.0079,  0.0915,  0.1009,  0.0393, -0.2813, -0.0296, -0.0851,  0.0735,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6113, -0.0320, -0.5867, -0.7936, -0.0237,  0.0251, -0.0147, -0.2321,
         0.0517,  0.0743, -0.0211,  0.0080,  0.0384,  0.0010, -0.2733, -0.4981,
        -0.4509,  0.0155,  0.0500, -0.2364, -0.4375,  0.0050, -0.2048, -0.4664,
        -0.0493,  0.0950, -0.4810,  0.0664, -0.0328, -0.1615, -0.1463, -0.1014,
         0.2186,  0.1263,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1188, -0.0125,  0.0549,  0.0404, -0.7287, -0.1480, -0.0804, -0.0232,
        -0.0776,  0.0213, -0.0571, -0.1105, -0.0369, -0.8602,  0.0038,  0.1879,
        -1.6045, -0.1175, -0.4519, -0.0938, -0.4124, -0.5779,  0.1348, -0.0031,
         0.3330,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2340,  0.0381, -0.0083, -0.0145, -0.0621, -0.0249, -1.0214, -0.5895,
        -0.8483, -2.2002,  0.0604, -0.1195, -0.1503, -0.0601,  0.2361, -0.3055,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0608, -2.0548,  0.1220, -0.8145, -0.4390, -0.8161, -1.4677, -0.1171,
         0.3941,  0.0392, -0.0475,  0.1246, -0.4377,  0.0386,  0.1721, -0.1099,
        -0.3009, -0.6964, -0.2677, -0.0443,  0.0087, -0.0806, -0.0787,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0490,  0.3500, -0.2121,  0.9351,  3.3977, -0.0387, -0.4054,  0.0996,
         0.1109, -0.0949, -0.1408, -0.1758,  0.1016,  0.1556,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.6143, -1.1233, -0.4897, -1.2719, -0.1157,  0.0284, -0.3881, -0.1569,
        -0.7058, -0.1518, -0.7498, -0.6722,  0.1085,  0.0029, -0.0324,  0.2866,
         0.0255, -1.0056, -0.0186, -0.5788, -0.0270,  0.1153, -0.0383,  0.5786,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3284,  0.0263, -0.7998,  0.0389, -1.2922, -0.0315, -1.2507,  0.0363,
        -0.0494,  0.0399, -0.2472,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0318,  0.1450,  0.0307,  0.0328,  0.1599, -0.4740, -0.5880, -0.0382,
        -0.7929, -0.0190, -0.3259, -0.0252, -0.1868, -0.0729, -0.2958, -0.0852,
        -0.1257, -0.5843, -0.0565,  0.3238,  0.0220,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0118, -0.0340, -0.0501, -0.1857, -0.0648,  0.0417, -0.0388, -1.8783,
        -0.8008, -1.3810, -0.1156,  0.2511, -0.1091, -0.5323,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8826,  0.1189,  0.0580,  0.0465,  0.0579, -0.1280, -0.0834, -0.4839,
        -0.0800, -0.0671,  0.1164,  0.5414,  2.3194, -0.0089,  0.0812,  0.2045,
        -0.4126,  0.0186, -0.0256,  0.0211, -0.0781,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2220, -2.5457,  0.4081, -1.0361,  0.1403, -1.1012,  0.0165, -1.0515,
         0.0104, -0.2962, -0.2150,  0.0156,  0.0468,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4731e-02, -7.7396e-01,  2.7956e-02, -3.3889e-02, -8.5726e-04,
        -7.4313e-02, -2.8943e-01, -5.5237e-01, -1.3304e-01, -1.1289e-02,
         3.6868e-02,  7.0637e-02,  4.4146e-02, -9.7525e-03,  2.0626e-03,
        -3.7823e-02,  1.6277e-02, -1.8626e-05,  1.8529e-02,  9.3901e-02,
        -5.1854e-02,  1.0941e-01,  7.4940e-03, -8.4885e-02, -1.2180e+00,
         2.1331e-01, -6.8567e-01, -5.5562e-01, -9.2143e-01, -1.2273e-01,
        -9.3441e-02,  4.3850e-02, -5.6237e-02, -4.3728e-03, -6.3358e-02,
         7.8922e-03, -2.3397e-01,  2.3360e-02,  3.2649e-02, -7.9469e-02,
        -1.5081e-01,  2.1658e-03, -4.4080e-04,  3.8666e-02,  1.0700e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4490, -0.5057, -0.6122, -0.1853, -0.0361,  0.1967, -0.1690, -0.2147,
        -0.1658,  0.0681, -0.7045, -0.0546, -0.0782,  0.0304, -0.5081, -0.1678,
        -1.0604, -0.0307,  0.0013, -0.0128, -0.0715, -0.6133, -0.1394, -0.8802,
        -0.0676,  0.0172, -0.1305, -0.2689, -0.7276, -0.1764,  0.1275,  0.0353,
        -0.1908,  0.0728,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1006, -0.2077,  0.0425,  0.6288,  0.1873,  0.1837, -0.0238,  0.1594,
        -0.1274,  0.0916,  0.0959,  0.0383,  0.0191,  0.0431, -0.1497, -0.1191,
         0.0372, -0.2909,  0.3489,  2.2833,  2.4310, -0.1448, -0.1792,  0.2727,
        -0.1392,  0.2969,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0200, -2.5654,  0.1377, -0.4600,  0.1316,  0.0485,  0.1282, -0.5927,
        -0.1193,  0.0318,  0.0750,  0.0359,  0.0397,  0.0430, -0.1785, -0.5775,
        -1.0249,  0.0469, -0.2660,  0.0788, -0.1670,  0.2038,  0.1291,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4123e-01, -1.4191e+00, -6.2026e-02, -2.5141e-01,  6.9500e-02,
        -8.8970e-02, -1.0914e-01, -1.8924e-01, -8.6170e-01, -2.6598e-01,
        -6.5759e-03, -1.7535e-01,  7.4630e-02, -1.0244e-01, -1.1178e+00,
         4.8377e-02, -6.2695e-04, -2.9038e-01, -5.2740e-01, -1.4537e-01,
        -3.9986e-02, -3.3941e-02,  1.8169e-02, -5.4157e-02,  4.3824e-02,
         5.8420e-03, -2.4380e-01, -7.8111e-01, -1.0714e-02, -1.2209e-01,
         4.0374e-02, -1.1921e-02,  1.0549e-01,  5.3536e-02, -6.5373e-02,
         6.2484e-02, -2.1171e-01,  1.0664e-01, -1.3101e-01,  1.1097e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0710,  0.1911,  0.0014, -0.1434, -0.4844, -0.0388,  0.0492, -0.4404,
         0.0235, -0.1902, -0.0175,  0.0417, -0.2268,  0.0147, -0.2326, -0.4199,
        -0.3156,  0.0332,  0.1241, -0.2120, -0.0301, -0.2762, -0.4240, -0.0116,
        -0.0298,  0.0461, -0.0147, -0.1150, -0.0244, -0.0866, -0.4614,  0.0327,
         0.1100,  0.2162,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.4728e-01, -2.5723e-01, -2.8100e-01,  3.8077e-02, -6.0444e-01,
        -2.7711e-02,  5.0439e-02, -7.6255e-02, -2.5397e-02,  5.5977e-02,
        -4.1003e-02, -9.4893e-02,  1.8958e-01, -1.6430e-01, -9.9157e-01,
         6.0485e-02, -8.4055e-02, -9.0257e-03, -3.7997e-01,  1.9154e-02,
         1.2637e-01,  5.2488e-02, -5.0824e-02, -6.8207e-03, -3.3462e-01,
        -7.3318e-01,  4.3947e-02, -1.6043e-01, -4.1931e-02,  6.3760e-02,
         1.7227e-01,  4.5740e-02,  7.5272e-02,  5.7083e-02, -3.0581e-01,
        -3.1305e-02, -1.2321e-01, -1.8385e-01, -7.5427e-02, -1.7163e-01,
        -9.1395e-02, -1.4692e-01, -1.7508e-01, -7.5812e-02,  3.1406e-02,
         1.6680e-02, -3.0873e-02,  1.4210e-05, -6.7534e-02, -3.7155e-02,
        -2.2537e-01, -3.4078e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8698e-02,  3.9173e-02, -7.9395e-02, -2.3056e-01,  1.1635e+00,
        -1.3760e-01,  9.2100e-04, -2.7095e-01, -1.0155e-01,  2.2736e-01,
        -2.2719e-02,  5.2144e-02,  1.3190e-01,  1.4384e+00,  1.1122e-01,
         6.9747e-01,  1.0266e+00,  3.9788e-01,  3.0814e-01, -2.0616e-01,
         1.1959e-01,  9.2012e-02,  8.8541e-02,  7.3079e-01,  8.3057e-02,
         7.7327e-02, -1.8085e-02,  7.3275e-02,  2.1424e-02, -1.4971e-02,
        -9.7862e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7034e-02, -1.0607e+00,  4.9320e-02, -2.0839e-01,  7.2743e-04,
         1.0543e-01, -1.1978e-01, -4.2500e-03,  6.5207e-02,  2.8920e-01,
        -8.3461e-03, -7.1342e-01,  4.5071e-02, -6.0140e-02, -4.9192e-01,
        -4.6736e-01,  9.6719e-02, -5.2224e-01, -4.4885e-01, -7.2151e-01,
        -3.6429e-02,  3.6108e-02, -6.0193e-02,  4.1444e-02, -6.5656e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6296, -0.1942, -0.0522,  0.0414,  0.4301,  1.3129,  2.0287,  0.3040,
         0.0518, -0.1252, -0.2916,  0.1832,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8734e-02, -2.3018e+00, -1.2163e-01, -4.5847e-01, -1.1865e+00,
        -2.3199e-01,  1.3858e-01, -2.4630e-02, -1.1526e-02, -8.1413e-03,
        -7.1092e-02,  2.6910e-02, -2.3958e-01, -7.8411e-01,  1.5321e-01,
        -3.2021e-01, -2.1194e-02, -3.2249e-02, -3.4524e-02, -1.9426e-02,
        -9.3772e-02,  7.9452e-04, -1.9595e-01,  8.1814e-02,  1.2552e-01,
        -5.2756e-01, -4.1571e-02,  1.1312e-01,  1.0597e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6153, -3.4686, -0.0678, -0.2351, -0.0709, -1.0900, -1.8782,  0.1374,
        -0.0451,  0.2021, -0.1057, -0.2121,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1079, -3.3844,  0.1926, -0.4258,  0.3077, -0.9529, -1.0166,  0.0581,
        -0.6747, -0.1094, -0.2171, -0.0451,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3949,  0.5799, -0.9557, -0.1928, -1.2159,  0.3114,  0.0133, -0.2474,
        -1.0255, -0.0986,  0.0645, -0.7580, -0.0304, -0.0590, -0.1113, -0.4878,
        -0.0753, -0.0084,  0.0684, -0.1278,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2908, -1.6016, -0.0466, -0.5540, -0.5548,  0.0198, -0.2251, -0.0137,
        -0.1497,  0.0292, -0.1413, -0.2664, -0.2985, -0.0917,  0.0716,  0.0387,
        -0.1868, -0.0068, -0.0279,  0.0370, -0.1469,  0.0232,  0.0093,  0.0518,
         0.0203,  0.1691, -0.0164, -0.3391, -0.2768,  0.1497, -0.0517,  0.0533,
        -0.3218, -0.8206, -0.2697, -0.0341,  0.0225,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8207,  4.2790,  0.2672,  1.2217, -0.0330,  0.0283, -0.1178,  0.7876,
        -0.0683,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3351, -0.0175,  0.1220, -0.5921, -0.0116, -0.1779, -0.3007, -0.4777,
         0.1112,  0.0042, -0.0060, -0.1392, -0.3828, -0.7375,  0.1316,  0.0572,
        -0.0421,  0.0685, -0.3485, -0.5908,  0.0075, -0.0346, -0.0894,  0.0289,
         0.0267,  0.1514, -0.0360,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0648, -2.4159,  0.1091, -0.4384, -0.0212, -0.1104, -0.0203,  0.0046,
        -0.0446,  0.0420,  0.4722, -1.0929, -0.1507, -0.0402, -0.0131,  0.0380,
        -0.2601,  0.0484, -0.1609,  0.0828, -0.1648, -0.5243, -0.7096, -0.1194,
         0.0851,  0.0201, -0.0398, -0.2050, -0.3993, -0.1506,  0.1618, -0.0276,
         1.1084,  0.2987,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 7.2612e-03, -2.2314e-02,  3.0202e-02, -2.0970e-02, -2.7499e-01,
        -1.1580e-01, -1.3262e-03, -1.0045e-02,  6.9802e-02,  2.2092e-01,
         6.0844e-02, -5.3205e-01,  2.5797e-02, -6.8965e-02,  3.0787e-02,
        -2.2497e-01, -5.2365e-01, -1.6763e-01, -1.8234e-01, -1.1476e-02,
         7.3653e-02,  5.9219e-02, -2.5707e-01, -4.7266e-02,  5.2580e-02,
        -5.0325e-04, -3.0399e-02, -3.8332e-02,  1.3934e-02, -5.0567e-02,
        -1.8572e-02, -4.3760e-01, -5.8270e-01, -7.5901e-02, -1.1142e-01,
        -1.1288e-01, -3.8778e-01,  1.8843e-02,  3.1868e-02, -3.7928e-02,
        -1.1367e-01, -2.5799e-01,  2.6395e-02,  3.0356e-02,  1.4638e-01,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4073e-01, -9.2508e-06,  8.8872e-02,  1.3474e-03, -1.4570e-01,
        -9.4799e-01, -1.2379e+00, -9.2650e-02, -1.0791e-01, -9.1751e-01,
        -6.3327e-02, -3.9593e-02, -6.9459e-01,  8.5982e-02,  1.2841e-01,
        -1.9590e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1062e-02,  4.0271e-03, -5.6232e-02, -1.4907e-01, -5.2130e-03,
        -1.5081e-01, -4.2549e-02, -2.5590e-01, -1.0026e+00, -1.4996e+00,
        -4.8169e-01, -7.6784e-01,  1.4238e-02, -6.2196e-01, -6.3888e-01,
         2.2888e-02, -5.8972e-02,  2.6614e-02,  3.5086e-02,  5.2744e-02,
        -1.2996e-04, -5.9846e-02,  5.4519e-02,  8.9757e-03, -4.4729e-02,
        -4.9775e-02,  2.8003e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0095, -0.0757, -0.5104, -0.0338, -0.3287,  0.0167, -0.1670, -0.0626,
        -0.1765, -0.0831,  0.0497, -0.0391, -0.1575,  0.0217, -0.1749, -0.0463,
        -0.0923,  0.0412, -0.0797, -0.2715,  0.0614, -0.1455, -0.1733, -0.6305,
        -0.0477,  0.0320, -0.1171, -0.3847,  0.0007, -0.1148,  0.0446, -0.1813,
        -0.3083, -0.0567, -0.1055, -0.3087, -0.0997, -0.2682, -0.0204,  0.0459,
        -0.1455, -0.0857, -0.0174, -0.0152,  0.3698,  0.0613], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4343,  0.1119, -0.2309,  0.2183, -0.0653, -0.0313, -0.0759, -0.7939,
        -0.0608, -0.1699, -0.2440, -0.1289,  0.0761,  0.0320, -0.2522,  0.0032,
         0.0267,  0.1475, -0.6105, -1.8025, -0.2405, -0.1571, -0.0171, -0.1571,
         0.0361,  0.0563, -0.5285,  0.1134, -0.4431,  0.3605,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6039,  0.0283,  0.0056, -0.1819, -0.0581, -0.2939,  0.2570,  1.7054,
         0.1405,  0.1985,  0.4502,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4458,  3.1208, -0.0250, -0.2244,  0.8269,  0.1419,  0.4757,  0.6195,
        -0.0662,  0.1427,  0.0711,  0.0651,  0.0973,  0.3654,  0.1787,  0.5014,
         0.1111,  0.3193, -0.1033,  0.4716,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5078, -2.6070, -1.2307, -0.4513,  0.1152, -0.3597, -0.7849,  0.0511,
         0.3655,  0.0497,  0.2916,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.9780, -2.0981, -0.5952, -0.9552, -0.2675, -0.2073, -0.1358, -0.7193,
        -0.6923, -0.1422, -0.0038, -0.0531,  0.0355, -0.2305, -0.1048, -0.3759,
         0.1086, -0.1931, -0.1243, -0.2604,  0.0111,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0383e-02, -2.0392e+00, -1.1962e+00, -7.1290e-02,  1.3855e-01,
        -1.2998e+00, -2.0607e-01,  4.1747e-01, -1.1856e-01, -1.9177e-02,
        -7.0618e-02, -3.6637e-01,  9.0187e-04,  4.3386e-02,  4.0715e-02,
        -3.5429e-02,  4.3664e-02,  3.9745e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0469e-01, -7.1246e-01, -1.1796e+00,  1.4665e-01, -1.4197e-01,
         2.3101e-02,  5.0115e-02,  1.7698e-02, -5.1288e-03, -2.0689e-02,
         6.9466e-02,  7.7206e-03,  2.9914e-02, -2.7647e-01, -4.9262e-01,
        -4.4445e-02, -3.5744e-01, -5.0809e-01, -1.5601e-01,  4.0775e-02,
         2.5346e-02, -2.7298e-01, -5.9551e-01, -1.3773e-02, -1.5367e-01,
        -3.9307e-03, -1.3759e-01, -1.0210e-01, -1.5964e-01, -3.1056e-01,
        -6.9626e-03, -1.1624e-03,  1.2663e-02,  5.5341e-02, -9.1783e-02,
         2.3338e-02,  3.6410e-02,  6.4168e-03,  1.2099e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4526e-02, -3.1558e+00, -3.4103e-04, -4.0292e-01, -6.5235e-02,
         5.6516e-02,  8.3130e-02,  8.6545e-02, -4.9132e-03, -1.1226e-01,
        -4.0923e-02, -5.4739e-01, -8.1909e-02,  1.8675e-02, -3.2263e-02,
         1.3385e-02, -3.8788e-01, -5.5418e-02,  1.1468e-01, -8.2693e-02,
        -6.3999e-02,  6.0935e-02,  2.2387e-02,  7.1357e-02,  6.4774e-02,
         1.7110e-02, -9.2746e-02, -1.6515e-02, -2.0481e-01, -2.8970e-01,
        -6.7805e-02, -9.6877e-03,  3.8360e-02, -1.7379e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.8204, -0.0902, -1.7886, -1.8429,  0.0192, -0.8127,  0.3890, -0.4344,
         0.1741,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0224e-01, -1.6241e+00,  3.1455e-02, -7.3683e-01, -3.7188e-01,
        -1.1712e+00, -5.1084e-02, -1.5233e-01, -3.0341e-01, -5.5090e-02,
        -3.7761e-01, -1.5525e-01, -4.2557e-02, -1.2938e-01, -5.1325e-02,
        -6.2582e-01, -1.9187e-01, -8.7420e-01, -6.2403e-02, -9.1394e-03,
        -8.6529e-02,  1.0598e-02,  9.3344e-02, -2.3979e-02, -3.0644e-01,
         7.7921e-02, -5.0929e-02, -4.8121e-02,  7.4670e-02,  5.2024e-04,
        -5.4149e-02, -3.2199e-01,  3.2505e-02, -3.9221e-01,  1.1264e-01,
         3.0543e-02,  1.3382e-02, -3.1193e-02,  1.6096e-01,  3.9741e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0229, -0.7656, -1.7082, -0.2422, -0.4293, -0.2298, -0.0206, -0.0815,
        -0.4353, -0.3857,  0.0061,  0.0269,  0.0126, -0.0099, -0.0862,  0.0785,
         0.0124,  0.0254, -0.2443,  0.0967, -0.2970, -0.0732,  0.0494,  0.0409,
         0.0138, -0.1007, -0.2017, -0.0246, -0.3413, -0.6218, -0.0421, -0.0501,
        -0.0293,  0.0068,  0.0349,  0.0424, -0.1360, -0.3164,  0.0439,  0.0020,
         0.1141,  0.0681,  0.0072,  0.0094, -0.0062,  0.0023, -0.0594, -0.0432,
         0.0193, -0.0176,  0.2020, -0.0381,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3196, -2.7105, -0.3560, -0.3908, -0.0630, -0.0740, -0.5271,  0.0236,
        -0.3209, -0.0528,  0.1288, -0.2257, -0.5032, -0.2061, -0.2541, -0.5982,
        -0.5343, -0.0767, -0.4997, -0.2110,  0.0627,  0.0510,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5362, -4.6098, -0.2446, -0.1832, -0.1271,  0.1879, -0.0525, -0.3678,
        -0.0130,  0.0424, -0.1244, -0.2753,  0.0981, -0.0643,  0.1467, -0.8185,
        -0.2195,  0.0245,  0.1167,  0.1077, -0.2773,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0480, -1.0802, -0.0815, -0.0519, -0.0267,  0.0126,  0.0074,  0.0765,
        -0.1558,  0.0471,  0.0129, -0.0293,  0.0298, -0.0093,  0.0202,  0.0046,
         0.0183,  0.0049, -0.1467,  0.0201,  0.0845, -0.0430, -0.0228,  0.0716,
         0.0216,  0.0390, -0.0735, -0.2348,  0.0023, -0.3194, -0.4096, -0.2796,
        -0.0066, -0.2992, -0.2305, -0.0491,  0.0054,  0.0335,  0.0481,  0.0235,
        -0.5328, -0.0476,  0.0307,  0.0292, -0.2089,  0.0475, -0.2487, -0.1330,
         0.0380,  0.0681, -0.0573, -0.0287, -0.1325,  0.1518, -0.0983],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0306,  0.0406,  0.1598, -0.0105,  0.0305, -0.1347, -0.3291,  0.0071,
         0.0441,  0.0308, -0.0810,  0.0011, -0.0168, -0.1171, -0.1964, -0.0396,
         0.0323, -0.1146, -0.0130, -0.3443, -0.0994, -0.0791, -0.3440,  0.0079,
        -0.0246,  0.0404,  0.0439, -0.4904,  0.0320,  0.0147, -0.0246,  0.0101,
        -0.1374, -0.4659, -0.0181, -0.1407,  0.0267, -0.1737, -0.2897, -0.1886,
        -0.0543, -0.0154, -0.0344,  0.0671,  0.1728,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1648, -1.6477, -0.5702, -0.6583,  0.0886, -0.1060,  0.0174, -0.1880,
        -0.2186, -0.0494,  0.0029, -0.2773, -0.0154, -0.4397, -0.7522,  0.0163,
        -0.1498, -0.3682, -0.1228, -0.1207, -0.2079,  0.1631, -0.0875, -0.0170,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0116,  0.1205, -0.0044, -0.0437, -0.1127,  0.0027, -0.2723, -0.3167,
         0.0470,  0.0046, -0.1186,  0.0037, -0.0192, -0.2333,  0.0244,  0.1029,
        -0.2034, -0.1387, -0.1693, -0.3849, -0.0752,  0.0266, -0.1887, -0.3809,
        -0.0169, -0.2111, -0.3428, -0.1309, -0.1747, -0.3856, -0.0041, -0.0418,
        -0.1913,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1451, -3.1560, -0.0721, -1.0213, -0.1589, -0.1267, -0.0188, -0.1641,
        -0.8253, -0.0551, -0.0053, -0.0596, -0.0390, -0.6284,  0.0724, -0.1449,
        -0.3102, -0.0344, -0.3938,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3821, -0.1388,  0.2594, -0.0152, -0.2806, -0.0020, -0.4196, -0.6202,
        -0.9462,  0.0109,  0.0087,  0.1199,  0.0859,  0.0528, -0.0363,  0.0151,
        -0.0639, -0.1718, -0.1828, -0.0562, -0.3603, -0.5883,  0.0488,  0.1071,
        -0.0960,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2196, -2.6823, -0.8564,  0.0406, -0.0134, -0.5988, -1.3771, -0.0742,
        -0.1454, -0.2087, -0.1790, -0.0551, -0.3976, -0.0946, -0.0552, -0.0546,
         0.3924,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.2098,  0.0380,  0.0223,  0.0514,  0.0710, -0.0856,  0.0304, -0.0592,
         0.0597, -0.9254, -0.3830, -0.1879, -0.0525,  0.0359, -0.1403, -0.2760,
        -0.8208, -1.2906, -0.2460, -0.8491, -0.0685, -1.0994, -1.3220, -0.0111,
         0.2286, -0.0749,  0.0782, -0.0856,  0.0549, -0.0344, -0.0597,  0.1061,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1226, -0.8863, -0.8786, -0.0087,  0.1765, -0.0770, -0.5182, -0.7734,
        -0.0035,  0.0308,  0.0505,  0.0243, -0.0456,  0.0046, -0.1056,  0.1691,
        -0.1039, -0.0736, -0.3040, -0.2172, -0.3253, -0.0379, -0.3128, -0.5278,
        -0.0384, -0.1379, -0.0676, -0.0237,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5620, -0.1338, -0.0862,  0.0450, -0.2367, -0.9806, -1.2444, -0.2154,
        -0.1763,  0.0354, -0.0262, -0.4592,  0.0650, -0.0083, -0.1064, -0.5793,
        -0.1381, -0.3003, -0.0823,  0.1606, -0.0164,  0.0628, -0.1319, -0.0028,
        -0.6205, -0.0534,  0.1764, -0.0870,  0.4138,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0464, -1.0635, -0.0096, -0.4052, -0.7193, -0.0977, -0.0847, -0.0607,
        -0.0085,  0.1900,  0.0996, -0.0883, -0.3606, -0.5914,  0.2478, -0.3418,
         0.1731, -0.7579,  0.0596, -0.0444,  0.0994,  0.0993, -0.4052, -0.1200,
        -0.4104, -0.6211, -0.4268, -0.0204, -0.0420, -0.0062, -0.0632,  0.0603,
         0.1206,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2059, -1.8843,  0.0498, -0.4215,  0.0918, -0.3190,  0.0730, -0.2311,
        -0.1567, -0.7445, -0.3745, -0.0933, -0.0336,  0.0044, -0.0617,  0.0723,
        -0.2679, -0.0104, -0.7782,  0.1339, -0.5527, -0.0723,  0.0075,  0.0644,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0832, -1.6256, -0.5540,  0.0305, -0.3256,  0.0495, -0.0680, -0.2295,
         0.1799, -0.3323, -0.0656, -0.2167, -0.3136,  0.0617,  0.0071,  0.0351,
         0.0078, -0.3231,  0.1158, -0.5855, -0.1130, -0.1490,  0.1694,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2064, -0.5942,  0.0734, -0.0318, -0.1166, -0.1560,  0.1229,  0.0161,
         0.0071,  0.0860,  0.0453, -0.0151, -0.0536,  0.0591, -0.0012, -0.0403,
        -0.4039, -0.2747, -0.1968, -0.0774,  0.0023, -0.1358, -0.0166,  0.0099,
        -0.2008, -0.0892, -0.0232, -0.0608, -0.0605, -0.2212, -0.1762, -0.0614,
        -0.1619, -0.0215, -0.1712, -0.3466, -0.0370, -0.0431, -0.0766,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1150e-01, -4.1734e-01, -8.5543e-02, -2.7545e-01, -7.4983e-01,
        -4.5569e-02,  7.4617e-02,  6.4919e-02, -4.4144e-02,  1.6911e-01,
         7.3756e-03,  4.7571e-02, -1.5871e-02, -6.1151e-02,  2.8993e-04,
        -3.9378e-01, -1.0448e-01, -3.2242e-02,  1.1936e-02, -4.3737e-02,
        -3.8559e-01,  2.6886e-02, -4.1382e-02, -2.6032e-01, -5.4054e-01,
        -9.4059e-01, -5.8549e-02, -5.3780e-02, -2.1209e-02, -2.3874e-01,
        -3.0338e-02,  3.7308e-02,  6.2171e-03, -4.3520e-03, -1.9562e-02,
        -4.7472e-02, -8.3336e-02, -5.2231e-01,  4.6626e-03, -6.4000e-02,
        -2.7448e-02,  1.0658e-01, -4.9202e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8462e-01,  3.7450e+00,  2.4160e-01, -1.0391e-01, -1.0575e-01,
         5.1439e-02, -6.0600e-04,  7.0825e-02,  8.6058e-01,  6.0742e-02,
        -1.7686e-03, -1.3038e-01,  3.8292e-01, -3.4206e-03, -4.0957e-02,
         1.4886e-01,  4.8071e-01,  8.5588e-01,  5.6247e-02,  1.0726e-01,
        -5.0240e-02,  1.3795e-01,  2.9087e-02,  7.7931e-03, -3.3062e-02,
         5.0561e-02, -1.0869e-01,  6.5734e-02,  2.4172e-02,  6.7936e-03,
        -1.1942e-01,  3.7623e-01,  1.0142e+00, -8.7255e-03,  3.7923e-02,
        -1.2828e-01, -1.9972e-01,  7.3412e-02,  5.7952e-01,  1.4084e-01,
         4.3007e-01,  6.8751e-01,  1.2493e-01, -1.2304e-01,  4.2517e-03,
         8.5353e-02,  2.5641e-02,  5.3892e-02, -4.3808e-02,  5.1323e-02,
        -2.9975e-02,  1.7940e-02, -1.0829e-01, -4.7074e-02,  5.5023e-02,
         2.7968e-01,  8.8102e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0310,  5.7778,  0.6274,  0.0991,  0.5449, -0.1575,  0.9298,  1.0717,
         0.2116,  0.6557,  0.8473,  0.1273,  0.0718, -0.4763,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2661, -1.0851, -0.2008,  0.1507, -0.0042, -0.0408, -0.0338,  0.0036,
        -0.1206, -0.0689, -0.2409, -0.5722, -0.0775, -0.0315, -0.2237,  0.0504,
        -0.2043, -0.1867,  0.1165, -0.1850, -0.3839, -0.0327, -0.4190,  0.0128,
         0.0554, -0.0709,  0.0301,  0.1216,  0.0163,  0.0906,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0568,  2.6371, -0.1239,  2.0688,  1.2723, -0.6518,  0.0181,  0.5802,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.2317, -0.8260, -0.0286, -0.1561, -0.3146, -0.9219, -0.0174,  0.0071,
        -0.0337, -0.0739, -0.0407,  0.0567, -0.4163, -0.8881, -0.0296,  0.0322,
         0.0018, -0.0182, -0.0408, -0.0189,  0.0481,  0.0273,  0.0111,  0.2193,
        -0.5786, -0.1397, -0.3416,  0.0378, -0.5368, -0.6212, -0.0825,  0.1522,
        -0.0157,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3491, -2.1774, -0.7674, -0.2390, -0.0770, -0.1574, -0.0726, -0.0540,
         0.0963, -0.3492, -0.0486, -0.0809, -0.0944,  0.0903,  0.0410,  0.0985,
         0.0306,  0.0152, -0.1376,  0.0237,  0.0189,  0.0210,  0.0249,  0.0209,
         0.0847,  0.0797, -0.0524, -0.5940,  0.0136, -0.0531,  0.0210, -0.0350,
        -0.2753, -0.3209,  0.0393, -0.0533, -0.0980,  0.0595,  0.0089,  0.0418,
        -0.0078,  0.0249, -0.0885, -0.3328, -0.0426,  0.0305,  0.0449, -0.0094,
         0.0431,  0.0159, -0.0142, -0.2207, -0.0151, -0.1452, -0.0496,  0.1264,
        -0.0086,  0.1974,  0.0111, -0.2550, -0.0385, -0.1016, -0.3387, -0.0274,
         0.0180, -0.0251, -0.1000,  0.0244,  0.0512,  0.0658,  0.0126],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1635, -0.2371, -0.3564, -0.0254, -0.6210, -0.2472, -1.0842, -0.2761,
        -0.0464,  0.0777,  0.0043, -0.0489,  0.1256,  0.1321,  0.1523,  0.1329,
         0.0133,  0.0574, -0.1261, -0.7117, -0.1073,  0.0574,  0.0637, -0.3251,
         0.0436, -0.3046, -0.1644,  0.0526,  0.0484, -0.0782,  0.0304,  0.1002,
        -0.5154, -0.4575, -0.1606, -0.1139, -0.6009, -0.3127, -0.0112,  0.0391,
         0.0667, -0.0582,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0703, -1.6073, -0.1847, -0.0551,  0.0449,  0.0017,  0.0765, -0.0266,
         0.0196, -0.5500, -0.2147, -0.2531, -0.2967,  0.0820,  0.0092, -0.0806,
        -0.0759, -0.4380, -0.6023,  0.0526, -0.1162, -0.1973,  0.0537,  0.0220,
        -0.0336, -0.5794, -0.1352, -0.0523, -0.4650, -0.0331,  0.0221, -0.0910,
        -0.0491, -0.1328, -0.0516,  0.0642, -0.3743,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6066, -4.1561, -0.1427, -0.8938, -0.0200,  0.2081,  0.1979,  0.1637,
        -0.1297, -0.0121, -0.3266,  0.2438, -0.6252, -0.1751,  0.4184,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2614,  0.0450,  0.1289,  0.0758, -0.7886, -1.3688, -0.1102, -0.6230,
        -0.6333, -0.0501, -0.0487,  0.0980, -0.4989, -0.0698,  0.0068, -0.0305,
         0.1245, -0.1724,  0.0532,  0.0813,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6790e-01,  5.1494e-02,  2.8811e-02,  3.3043e-02, -6.9940e-01,
        -1.9767e-02, -4.0202e-02, -6.3523e-02,  6.2167e-04,  8.8102e-03,
        -1.9233e-01, -3.2437e-01, -1.2297e-01, -4.0429e-02, -2.5988e-01,
        -1.2102e-01,  1.5344e-02, -1.6302e-01,  7.5026e-02, -2.1555e-01,
        -1.8862e-02, -3.9013e-02, -2.5943e-02, -2.7328e-02,  2.1338e-02,
        -2.9916e-01, -4.4005e-01,  4.5752e-02, -3.6331e-02, -1.5651e-01,
        -3.1515e-01, -9.1204e-02,  2.6661e-02,  6.4370e-03, -1.7607e-01,
        -3.0403e-01,  3.0425e-02, -1.1039e-01, -6.3884e-02, -2.1673e-02,
        -6.8981e-05,  1.1363e-01,  6.4145e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1205e-01,  5.6072e-02, -1.4694e-01,  6.6848e-02, -3.4412e-01,
        -1.1901e-01, -2.2992e-01, -8.8365e-01, -2.3057e-01,  2.7605e-02,
         1.7257e-01,  1.1801e-01,  5.2631e-02, -6.7710e-01, -5.8261e-01,
         9.3403e-03,  1.6749e-02, -1.8350e-02,  5.3641e-02, -5.4032e-02,
        -2.2864e-01, -7.7296e-02,  2.6563e-02,  1.3291e-01, -7.1041e-02,
         2.2858e-03,  2.5839e-02, -1.4329e-01, -7.5601e-01, -2.7295e-04,
        -1.1589e-01, -3.1851e-01, -6.6894e-02, -3.4165e-01, -4.1054e-01,
        -8.1328e-02, -1.1260e-01,  4.9814e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8579e-03, -2.4394e+00, -7.7898e-01, -9.2916e-01,  3.7251e-03,
        -5.9125e-01,  5.1344e-02,  4.8563e-02, -4.2191e-01, -7.2056e-01,
        -1.0434e-01, -2.0274e-01, -5.8568e-01, -6.3595e-02, -1.1698e-02,
        -8.0310e-02, -2.9662e-02, -4.7264e-01, -1.9868e-01,  1.5378e-01,
         9.2968e-02,  1.6718e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2657e-01, -6.5730e-01, -5.8859e-02,  2.2463e-02, -2.4337e-02,
        -1.1905e-01, -1.0957e-01,  2.3123e-01,  1.5366e-02, -6.5552e-01,
        -5.1611e-02, -1.3165e-02, -1.0250e-01,  9.6773e-03, -3.4574e-01,
        -3.9138e-01, -6.5648e-01, -5.2790e-02, -6.8066e-02, -3.9069e-01,
        -6.6972e-03, -4.2871e-03, -3.4143e-01, -3.5680e-02, -1.3702e-02,
         3.9770e-02, -3.4009e-02,  1.8951e-02,  2.6632e-02, -3.1565e-01,
        -5.8179e-02, -9.5511e-02,  7.0746e-02, -1.5985e-02, -1.2893e-01,
        -2.8290e-01,  7.5740e-02, -6.7199e-02,  4.4451e-04,  3.6517e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0018,  0.0172,  0.0188, -0.0646,  0.0649, -0.5270,  0.0460,  0.0667,
        -0.3861, -0.0257,  0.0111, -0.0350, -0.1702, -0.8286, -0.4042, -0.0328,
         0.1592, -0.0501, -0.5826, -0.0194, -0.0993,  0.0414,  0.0404, -0.0774,
        -0.2075, -0.9198,  0.0188, -0.4426, -0.4913, -0.0767, -0.2167,  0.2677,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4029e-01, -2.8615e+00, -6.0071e-02,  1.7417e-01, -1.5602e-01,
         1.1127e-01,  2.8236e-02, -4.4892e-01, -1.2404e+00,  1.2297e-02,
         4.0754e-02, -3.9391e-01,  1.5603e-03, -3.7094e-02,  6.1845e-04,
        -4.3362e-02, -4.1318e-01, -7.9873e-02,  5.3393e-02,  1.1087e-01,
        -8.2159e-03, -4.3049e-03, -1.1705e-01, -4.8878e-01,  5.4144e-02,
         6.8544e-03, -3.9833e-01, -3.1088e-02,  1.2672e-02, -1.5837e-01,
         8.9144e-02, -1.1498e-02, -7.9583e-02,  2.6649e-01, -1.0091e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-0.5199,  0.0265, -0.9265, -1.0570, -0.2243, -0.1411, -1.2844, -0.8311,
        -0.4086, -0.6203,  0.0397, -0.5403, -0.0411, -0.2522, -0.1056, -0.1944,
        -0.3404, -0.1189,  0.1056,  0.1283, -0.0451,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2304, -2.7100,  0.2623, -0.5233, -0.7741, -0.3382,  0.1002, -0.4506,
        -0.5723,  0.0487, -0.0937,  0.1554,  0.0959, -0.1175, -0.4516, -0.0141,
        -0.2535,  0.1526, -0.1450,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2551, -0.2120, -0.7924,  0.1076, -0.3832, -0.1158,  0.0447, -0.0979,
        -0.0311, -0.4885, -1.3673, -0.0850, -0.3738, -1.5114, -0.1351, -0.2673,
         0.0291, -0.1704,  0.2901, -0.0083,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2324, -2.7284,  0.0074, -0.6822,  0.1169, -0.0832,  0.0635, -0.3374,
        -0.0378, -0.0284, -0.0316, -0.1494, -0.0493, -0.3920, -0.1579, -0.3543,
        -0.3712,  0.0243, -0.3164, -0.4015,  0.0606,  0.0768, -0.1392,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0888, -1.1802, -0.4508, -0.2427, -0.1206, -0.6061, -0.0013, -0.3924,
        -0.5746, -0.7590, -0.1454, -0.4834, -0.0902, -0.7348, -0.0790, -0.0502,
        -0.0562,  0.0712, -0.0561,  0.0760, -0.2710, -0.0760, -0.1097, -0.0271,
         0.1257,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2609e-01, -2.9651e+00,  9.6323e-02, -1.9305e-01, -1.0647e-01,
        -3.5619e-02,  1.5106e-01, -1.0377e-01,  1.2453e-01,  1.0056e-01,
        -7.0430e-01, -1.8357e-01, -1.1962e-02, -2.1187e-03,  4.4325e-03,
         2.2024e-03, -1.6913e-01, -7.3557e-03, -1.5879e-01, -1.3812e-01,
        -5.1277e-01, -8.9688e-01, -5.2078e-01, -2.7562e-01, -3.4153e-01,
        -1.4118e-02, -1.0968e-01, -5.4208e-02, -3.8057e-03, -4.2074e-01,
        -3.4713e-01, -8.5687e-02, -8.5471e-02,  4.1876e-02, -4.5397e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4964,  0.0253,  0.2041, -0.6670,  0.0092,  0.2931, -0.5460, -0.1353,
        -0.7409, -0.2441, -0.1405, -0.1699,  0.0299, -0.2849, -0.4995, -1.0560,
        -0.0412, -0.8997,  0.0030,  0.1556, -0.2462,  0.2450,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8470,  0.1112, -0.2075,  0.0921,  0.1985,  0.3359,  0.2337, -0.0617,
         0.4540,  2.2802, -0.0163, -0.0226, -0.2099,  0.1484,  0.4305,  0.5048,
         0.2083, -0.0332,  0.1136, -0.0152, -0.3928,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2223,  3.8978,  0.3090,  0.0171,  0.2235, -0.1416,  0.9574,  0.5792,
        -0.1049, -0.1044, -0.2438, -0.0344, -0.1929,  1.2589,  0.1682,  0.5726,
        -0.1501, -0.5316,  0.2164,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1404,  0.0220, -0.1661, -0.0159,  0.0084, -0.2288, -0.0265,  0.0136,
         0.0255, -0.0386, -0.0273, -0.1435, -0.0427, -0.1881,  0.0300,  0.0176,
        -0.2214, -0.3103, -0.0857, -0.2199, -0.0451, -0.0470, -0.1775,  0.0182,
        -0.3059, -0.0974, -0.0428, -0.2144,  0.0657, -0.5263, -0.0575, -0.0489,
        -0.2157, -0.4148,  0.0534, -0.0083, -0.0069, -0.0060,  0.0397, -0.0763,
         0.0266,  0.0475,  0.0252, -0.0090, -0.0273,  0.1629,  0.0181],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4915, -1.5789, -0.8444, -0.0208,  0.1180,  0.1005, -0.0411,  0.0846,
        -0.2997,  0.0487, -0.0035, -0.0737,  0.0714, -0.1017,  0.0839,  0.0788,
        -0.5476, -0.0212, -0.0206, -0.1418,  0.0675, -0.3412, -0.7601, -1.0629,
         0.0259,  0.0501, -0.2615,  0.0490,  0.0062,  0.0299,  0.0017,  0.0933,
        -0.2042,  0.2643,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1662,  2.2579,  0.0773,  0.0275,  0.0944,  0.4116, -0.0091,  0.0419,
        -0.0147,  0.2273, -0.1427, -0.0448,  0.3020,  0.6306,  0.0170, -0.1108,
         0.0133,  0.0545,  0.0597,  0.8532,  0.1177,  0.1196,  0.0860,  0.0023,
         0.0874, -0.0098, -0.0577,  0.0877,  0.0552,  0.2160,  0.0256, -0.0970,
         0.0485,  0.1654,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.4435, -4.0172, -0.0320, -0.1535, -0.1333, -0.5696,  0.1987,  0.2873,
        -0.2131, -0.6592, -0.1137, -0.0332, -0.2554, -0.8824, -0.0712, -0.1319,
        -0.2732, -0.1505,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1870, -0.0588, -0.0832,  0.1948,  0.0553,  0.0363,  0.0031, -0.0599,
        -1.1743, -1.9472, -0.2231, -0.0175,  0.0470, -0.3664, -0.0607,  0.1656,
        -0.2135, -0.0805,  0.2254, -1.3449, -0.0919, -0.1567,  0.0274,  0.0507,
         0.0780,  0.1964,  0.0057,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5016, -0.0803,  0.0080, -0.0817,  0.3140,  0.0323, -0.0082, -0.1164,
        -0.8014, -0.0839,  0.0558, -0.2341, -1.1030, -0.3086,  0.1354, -0.0021,
        -0.1687, -0.2337, -0.0585, -1.9035, -0.2314,  0.0497, -0.4752, -0.0398,
        -0.1305, -0.0724,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7430e-01, -2.2344e+00, -1.7539e-03, -9.5791e-01, -6.9539e-01,
        -2.0605e-02, -8.9945e-02,  4.2059e-02, -5.5434e-01, -2.8552e-01,
        -7.1982e-01, -4.8709e-01,  6.1576e-02,  9.4166e-02, -7.8122e-02,
        -4.1538e-01, -1.5471e-01, -1.8028e-01, -1.3616e-02,  1.9185e-01,
         2.9336e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0972, -2.3309, -1.2694, -1.1157, -0.0680,  0.2066, -0.6026,  0.0742,
        -0.0245,  0.1046,  0.1860, -0.3055,  0.1067,  0.1335,  0.2055,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6202,  0.0993,  0.0215,  0.0334,  0.1299,  0.1708,  0.4052, -0.3041,
         0.1259, -0.0925, -0.0327, -0.3049, -2.3034, -0.2230,  0.5608,  0.0310,
         0.2209, -0.1425,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2719e-01,  3.2996e+00,  1.0064e+00,  1.1555e+00,  1.7046e-01,
         1.1059e-01,  6.2659e-02,  3.2398e-01, -1.8986e-03,  9.3581e-01,
         6.7863e-01,  2.9980e-01, -5.9903e-02,  4.7755e-01,  3.3155e-02,
        -2.7294e-01, -1.5972e-01, -2.1921e-01, -4.0918e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1787, -0.6438, -0.7920, -0.1952, -0.0853, -0.1411, -0.0193, -0.0096,
        -0.0626, -0.0138, -0.0266,  0.0102, -0.1620, -0.0035, -0.0409,  0.1153,
        -0.3100, -0.0316,  0.0316,  0.0035, -0.4861,  0.0357, -0.3809, -0.0195,
        -0.5122, -0.0458, -0.4350,  0.0866, -0.0031, -0.1230, -0.0655,  0.1368,
        -0.4383, -0.0520, -0.3487, -0.1443, -0.6883, -0.0571, -0.1113, -0.2071,
        -0.0553, -0.0031,  0.0834, -0.0664], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0062, -1.6887, -0.0688, -0.7918, -0.4166, -0.1575, -0.2758, -0.0289,
        -0.8299, -0.0821, -0.2464,  0.0719, -0.0600, -0.0172,  0.1025,  0.0349,
         0.2834, -0.7528,  0.0474, -0.1852, -0.2698,  0.0082,  0.0038,  0.0233,
         0.0019,  0.0155,  0.0283, -0.2785,  0.0667, -0.1388, -0.0675, -0.1827,
         0.0202,  0.0221,  0.1143,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0378, -3.6439, -0.0092, -0.6543, -0.1418,  0.0717, -0.9076, -0.5961,
        -0.0203,  0.0323,  0.0403, -0.3821, -0.8449, -0.0141,  0.1324, -0.6146,
         0.0480,  0.0267, -0.0588,  0.0375,  0.0734, -0.0497,  0.1322, -0.0697,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6088, -1.7281,  0.0044, -0.4846, -0.1469, -0.0821,  0.1685,  0.0032,
        -0.5224,  0.0316, -0.1086,  0.1365,  0.0943, -0.1844, -0.1498, -0.0717,
         0.0361, -0.0339, -0.3279, -0.1057, -0.0568, -0.4264,  0.0664, -0.2677,
         0.0122, -0.2395, -0.3210, -0.1871,  0.0190, -0.1141, -0.1209, -0.0315,
         0.4447, -0.3673,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8573e-01,  1.2286e-01,  7.9510e-02, -1.4382e-01,  6.4547e-02,
         6.8817e-02, -5.5432e-01,  5.1095e-04,  2.8362e-02,  2.6287e-03,
         1.1951e-01,  1.3044e-01, -4.4242e-01, -3.9067e-02, -6.6251e-01,
        -1.4984e+00, -3.3979e-02, -1.5456e-02,  1.1624e-02,  8.7168e-02,
         6.3069e-02, -4.1821e-02,  1.5911e-01, -6.5476e-01, -2.4664e-01,
         2.1816e-02, -3.0719e-01, -6.6114e-02, -1.3820e-01, -4.5253e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.0535,  0.3580, -0.2915, -1.6680, -0.0767, -0.1402, -0.0388, -0.1421,
        -0.7134, -0.9049, -0.1688, -0.2312,  0.0989,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3864,  0.0601, -0.0307, -0.0466,  0.0410, -0.3626, -0.3210, -1.1530,
        -1.1580,  0.0646,  0.0535, -0.5263,  0.1481, -0.1761, -0.8359,  0.1234,
         0.1831, -0.2880,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0676, -1.6873, -1.5227, -0.0129, -1.0348, -0.2495, -0.5328, -0.0104,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0699, -1.3229, -0.1863, -0.2924, -0.1227, -0.0980, -0.0889, -0.0778,
         0.0623, -0.0723,  0.0138,  0.0247, -0.1007,  0.0157, -0.0545, -0.0902,
        -0.0093,  0.0059, -0.0228, -0.0183,  0.0681, -0.0439, -0.2928, -0.2386,
        -0.0497, -0.2243, -0.0234, -0.1133, -0.4017, -0.4592,  0.0407, -0.4724,
        -0.1033, -0.4521, -0.1157, -0.1911, -0.2600, -0.0215,  0.0396,  0.0172,
        -0.1168, -0.0186, -0.1033,  0.0103,  0.0337,  0.0297,  0.3290],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2701, -2.7353,  1.7239, -2.0700, -0.8560,  0.4540, -0.2435, -0.0657,
        -0.5514,  0.0730,  0.2766,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5764, -2.7772, -0.7350, -0.9240, -0.1330,  0.0120, -0.4970, -1.1469,
        -0.0896, -0.5884, -0.1324, -0.0711, -0.2979,  0.0704,  0.0796, -0.0506,
         0.1760, -0.1411,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0953, -0.7817, -2.4226,  0.0561,  0.0308, -0.0903, -1.1191, -0.0664,
        -0.4351, -0.0290,  0.0111,  0.0487, -0.0402,  0.2371,  0.0366,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7076, -3.0813, -0.0702, -0.0789, -0.1979, -0.8003, -2.1112, -0.0314,
         0.2411, -0.6668, -0.9947,  0.1326,  0.1014, -0.1197,  0.1423,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0360,  0.0402,  0.0637,  0.0608, -0.1027,  0.0490,  0.0294,  0.0714,
        -0.6073,  0.0546,  0.1296, -0.5313, -0.8373,  0.0946, -0.0451,  0.0056,
        -0.3874, -0.0223, -0.0171, -0.0782, -0.7127, -0.9157,  0.0452,  0.0408,
        -0.0483, -0.1096,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8279, -0.2898,  0.1614, -0.0678,  0.1659,  0.0847, -0.3603, -2.1749,
        -2.1070, -0.4215, -0.0114,  0.3155,  0.4064,  0.2707, -0.2365,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1347e-01, -3.0162e+00, -2.1349e-01, -5.0091e-01, -4.4791e-01,
        -1.8123e-01, -4.8517e-01,  2.7975e-03, -4.6157e-03, -8.6080e-01,
         1.8830e-01,  1.5256e-01, -7.0422e-01, -6.4989e-02,  4.2431e-02,
         1.4107e-02, -1.8618e-01,  1.1183e-01, -2.9165e-01,  1.6483e-01,
        -1.0262e-01,  1.0756e-01,  2.6837e-01,  1.7312e-01, -1.4619e-01,
         4.6280e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1601, -0.0265,  0.0841,  0.1281, -0.0291, -0.0849,  0.0065, -0.0215,
        -0.1395, -0.4892, -0.5848, -0.0397, -0.1510,  0.0498, -0.4931, -0.4422,
         0.0972, -0.1475,  0.1195, -0.2375,  0.0739, -0.0650, -0.0619, -0.2353,
         0.4303, -0.0633,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-6.2032e-01, -2.4966e+00, -1.9815e-02, -7.8724e-03, -1.8657e-01,
         7.6100e-02, -4.1128e-01, -4.3815e-01, -1.0389e-01, -2.1528e-01,
        -9.6935e-02, -2.6275e-01, -3.8054e-01, -5.5414e-02, -2.0584e-01,
        -7.0550e-02,  2.2410e-03, -3.6727e-02, -1.4947e-01, -8.4402e-02,
         8.8479e-02, -1.6018e-01,  9.6522e-02, -2.3434e-01, -5.0769e-01,
         1.5235e-01, -3.1383e-01, -6.9907e-02,  3.2098e-01, -1.2377e-02,
        -6.8971e-02, -2.4102e-01, -1.1349e-01, -5.3182e-02,  5.4376e-02,
         5.7097e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3224, -0.9767, -0.2277, -0.1777, -0.3560, -0.0927, -0.6523, -0.5612,
         0.0643, -0.1166, -0.2434,  0.0317, -0.0349,  0.1351, -0.0078, -0.0735,
         0.0314, -0.0230,  0.0852,  0.0470,  0.0558, -0.6161, -0.0364,  0.0965,
        -0.3184, -0.6380,  0.0698, -0.5108, -0.4872,  0.0059, -0.1785, -0.4565,
        -0.6316,  0.2009, -0.2672,  0.4926,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4415,  0.0449,  0.1376, -0.1440,  0.0649, -1.2135, -0.0249, -0.1143,
         0.0250,  0.0519, -0.2511, -0.6697, -1.6988, -0.0025, -0.0629, -0.0493,
        -0.1933, -0.1664, -0.2496,  0.6072,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0275, -0.7216, -0.1963, -0.2555, -0.0238, -0.0325, -0.1868, -0.4689,
        -0.0118, -0.1485,  0.0634, -0.0385,  0.0036, -0.0085, -0.5820, -0.5118,
        -0.1621, -0.3400, -0.0462, -0.0200, -0.0915, -0.0221, -0.0117,  0.0307,
        -0.0307,  0.0348,  0.0063, -0.0298,  0.0998, -0.3336, -0.5707, -0.2147,
        -0.0700, -0.4719,  0.0065,  0.0726, -0.0228,  0.0781,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0225e-02, -1.8704e+00, -8.1197e-01, -5.2016e-01, -8.3486e-02,
         1.4188e-02, -2.8288e-03,  9.6796e-03, -8.6279e-02,  6.4579e-02,
        -3.0059e-01, -4.6533e-01,  5.0589e-02, -2.3004e-02,  4.1305e-02,
        -3.0819e-01,  5.2917e-02, -3.2756e-01,  3.2941e-03, -9.6493e-04,
         6.8694e-02,  9.4076e-02,  1.0385e-01,  2.4732e-02, -6.7294e-02,
         4.8086e-02, -5.2369e-02, -1.6256e-01, -3.3039e-01,  5.4777e-02,
        -1.2603e-02, -8.5434e-02,  3.5999e-02, -6.5007e-04, -8.3650e-02,
        -3.9224e-02,  3.3079e-02,  1.4496e-01, -2.7952e-02, -1.8869e-01,
        -1.1847e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0582, -3.5011, -0.4150,  0.0523,  0.0167, -0.6226, -0.2779, -0.1120,
        -0.2352,  0.0717,  0.0220, -1.2545, -0.7930, -0.2209,  0.0194, -0.6736,
        -0.1233, -0.4728, -0.0709,  0.1540,  0.1663, -0.2808,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1471,  0.0554,  0.0394,  0.0115,  0.0501, -0.2245, -0.0228,  0.0301,
        -0.0282,  0.0193,  0.0764, -0.4500, -0.0110, -0.1020, -0.0885, -0.4901,
         0.1393, -0.4045, -0.1162, -0.4557, -0.9150,  0.0845,  0.0566, -0.4230,
         0.1074, -0.0506, -0.0581, -0.1412, -0.4322,  0.0067, -0.1450, -0.2668,
        -0.0710, -0.0240,  0.0370,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4044e-01, -2.8029e-01, -1.4126e+00, -4.2448e-02, -6.2356e-01,
        -8.1410e-02,  2.0291e-01,  1.2180e-03,  4.8401e-02,  1.0060e-02,
        -3.9767e-01, -1.9391e-03, -2.4909e-01,  9.0507e-02, -2.7441e-02,
         2.0573e-02,  2.5995e-02,  3.5237e-02, -6.6517e-01,  2.2399e-02,
        -2.7953e-01,  2.4250e-03, -1.7348e-01, -4.6587e-01,  4.5780e-02,
        -3.7556e-02, -2.3074e-01, -1.0537e-01,  3.9683e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6391, -2.5784, -0.5497, -0.5864, -0.1232, -0.5972, -0.0816,  0.0595,
        -0.2514,  0.0233, -0.2626, -0.1423,  0.1484,  0.1002, -0.0045, -0.3543,
        -0.2377,  0.1310, -0.2807, -0.0262,  0.0355, -0.0674, -0.3770,  0.0610,
         0.1605,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1869,  0.0317,  0.0740, -0.0167, -1.5935,  0.2657, -0.0709, -0.8639,
        -1.0102, -0.2395,  0.2363,  0.0124,  0.0144, -0.0176, -0.5731, -0.4819,
         0.1323, -0.2279,  0.0382, -0.0762,  0.0504,  0.2582, -0.0082,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6306, -0.0604,  0.0809, -0.0137, -0.0122,  0.1595,  0.0643, -0.0031,
         0.0113,  0.0363,  0.0486,  0.0319, -0.4203, -0.9606,  0.0941, -0.0620,
        -0.0557, -0.2916,  0.0568, -0.0043, -0.1056,  0.0595,  0.0485, -0.0506,
        -0.0835, -0.0935, -1.2366, -0.3491, -0.8957, -0.1314, -0.6225, -0.0951,
        -0.0322, -0.3212, -0.2451, -0.2511, -0.0498, -0.3149, -0.0483,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5294,  0.2723, -0.2716,  0.9469,  2.2675,  0.0749,  0.3025,  1.2998,
         1.6499, -0.0751,  0.4993, -0.4588,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.0210, -1.0027, -1.9258, -0.0553, -0.0833, -0.1656, -0.5584,  0.1295,
        -0.0930, -0.3087, -0.0391,  0.0415, -0.1751, -0.0381, -0.5288, -0.0589,
        -0.1619, -0.2157,  0.0056,  0.0225, -0.0059, -0.2033,  0.1063, -0.0181,
        -0.0024, -0.0297,  0.0611,  0.0163,  0.0171, -0.0043,  0.0899,  0.1112,
        -0.3872,  0.0160,  0.0500,  0.0453, -0.3783, -0.0789,  0.0083, -0.0305,
         0.0585,  0.0035, -0.0267,  0.1146,  0.1341,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2529, -4.5104, -0.0521, -0.8237,  0.4833, -0.0864,  0.2637, -0.2551,
        -1.0530,  0.3218,  0.2000, -0.1013,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5339, -5.2435,  0.1435, -0.6417, -0.0791, -0.3657, -0.3531,  0.5842,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5105e-01, -1.3085e+00, -2.1008e-01, -1.7594e-02, -2.8968e-01,
        -1.2180e-02,  6.1785e-02, -7.4075e-02, -3.0271e-01, -7.3644e-01,
        -1.3507e-01, -7.6138e-02,  2.7680e-02, -1.2603e-02,  4.6893e-03,
         3.1917e-02, -3.2458e-02,  1.1724e-01, -4.6773e-02, -2.4679e-02,
         2.9065e-02,  4.2761e-02,  7.7254e-04, -1.0216e-01, -4.3222e-03,
        -2.7284e-02,  2.8685e-02, -2.9721e-02, -7.3159e-02,  1.9221e-02,
         6.3188e-04,  6.9968e-03,  4.7602e-03, -1.3430e-02, -2.0371e-02,
        -2.5127e-02,  1.0565e-02,  2.3784e-02, -2.6513e-02, -6.1146e-03,
         1.1835e-02,  3.1483e-02, -6.1272e-02, -4.0702e-02,  4.3495e-02,
         8.8929e-02, -6.0838e-01,  6.2415e-02, -4.7901e-01, -8.7442e-01,
        -1.6998e-01,  5.6659e-03, -1.3540e-01, -1.0726e-02, -4.8510e-02,
         1.7853e-02, -1.3564e-02,  3.2869e-02,  4.8750e-02,  2.5747e-01,
        -3.4483e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0595, -0.1122,  0.0152, -0.3207, -0.5606, -0.2294, -0.2553, -0.0086,
         0.0239,  0.0382, -0.0759, -0.3467,  0.0042, -0.3344, -0.3438, -0.0943,
        -0.0020, -0.0611, -0.1902, -0.0806, -0.1540, -0.4969, -0.0851, -0.0333,
        -0.2280, -0.2924, -0.2153, -0.1013, -0.1552, -0.2238, -0.0883, -0.0509,
         0.0232,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5672,  0.3310,  1.0233,  0.7057,  0.2138, -0.0310,  0.4830, -0.0850,
        -0.1213,  0.0608, -0.0465,  0.0062,  0.5095,  0.0819,  0.1469, -0.2577,
         0.0502,  0.6937,  1.3827, -0.0634, -0.2116,  0.1509, -0.2241,  0.2681,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2433, -0.8242, -0.8986, -0.1852, -0.0851, -0.6780,  0.1992, -0.3249,
        -0.1292, -0.6675, -0.3956, -0.3372, -0.9672, -0.0578, -0.0262,  0.0464,
         0.1046,  0.0424,  0.1148,  0.1265, -0.1170, -0.0223, -0.0540, -0.3510,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6194e-01, -8.0370e-02, -3.5529e-01, -3.9402e-01, -2.6663e-02,
        -3.6104e-02, -6.0264e-03, -3.7238e-01,  6.4012e-02,  7.7994e-02,
         4.1855e-02, -5.6491e-02,  7.6529e-02,  2.1818e-02, -3.1330e-01,
        -4.6712e-01, -3.7251e-01,  4.7856e-02,  8.8642e-05, -9.9747e-02,
        -4.4677e-01, -2.3519e-02, -2.2536e-01, -6.8259e-01, -9.6445e-02,
        -5.3522e-02, -2.3971e-01, -3.6118e-02,  6.3150e-02, -1.5823e-01,
        -1.1241e-01, -2.6785e-02,  1.9836e-02, -1.7184e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2398, -0.1031,  0.0566,  0.0354, -0.5254, -0.0368, -0.0606, -0.0495,
        -0.0166,  0.0355,  0.1759,  0.2079,  0.0591, -1.1063, -0.1065, -0.1515,
        -0.8320,  0.0362, -0.5337, -0.0653, -0.4731, -1.2177,  0.1425,  0.0780,
        -0.3300,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0021,  0.1228, -0.0284, -0.0380, -0.1033,  0.1693, -0.8650, -0.6394,
        -0.4370, -0.8498, -0.0446, -0.1480, -0.1936,  0.0332, -0.0663, -0.1346,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0742, -1.4318, -0.0397, -0.8817, -0.2971, -0.5677, -0.3549, -0.0844,
         0.2960, -0.0379,  0.0355,  0.0638, -0.3361, -0.0337,  0.0263, -0.0342,
        -0.3633, -0.4973, -0.2178, -0.1452,  0.0712,  0.0079,  0.0111,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1710, -0.0634, -0.0243, -1.5173, -2.6114, -0.4517,  0.2144,  0.1146,
        -0.1720,  0.3659, -0.0994, -0.2814,  0.2413, -0.2219,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.0698, -0.7967, -0.8161, -1.0526, -0.1245,  0.1119, -0.3486,  0.1895,
        -0.3438, -0.1520, -0.2868, -0.5127, -0.1862,  0.0037, -0.0324,  0.0307,
        -0.1035, -0.3873, -0.0163, -0.3645, -0.0748,  0.0444,  0.1758,  0.2897,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0256,  0.2032, -0.8147, -0.0400, -1.1869, -0.2808, -1.2906,  0.1696,
        -0.0368, -0.1249,  0.2394,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1675, -0.1627, -0.0539, -0.0238,  0.1141, -0.4264, -0.9386, -0.0351,
        -0.4320, -0.0038, -0.2738,  0.0444, -0.2201, -0.0623, -0.4010, -0.0125,
        -0.1912, -0.5257, -0.0532, -0.2300, -0.1184,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1987,  0.2144, -0.0381,  0.3098,  0.2311, -0.0702, -0.1380, -2.5974,
        -1.2159, -1.4892, -0.1852,  0.1368, -0.0560, -0.5762,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0834,  0.1365, -0.1446, -0.1296,  0.0289, -0.0074,  0.1925, -0.5924,
        -0.2880,  0.2051, -0.1407,  0.3784,  2.7420, -0.2258, -0.1737,  0.2380,
        -0.1273,  0.1318,  0.0059,  0.2015, -0.3555,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1234, -1.9577, -0.0420, -0.9932,  0.2512, -0.8620, -0.2006, -0.9128,
        -0.0065, -0.4140, -0.0686, -0.0249,  0.2019,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3051, -1.0014, -0.0350, -0.0379, -0.0743, -0.0595, -0.2449, -0.2296,
        -0.1535,  0.0031,  0.0029,  0.1094,  0.0260,  0.0404,  0.0600, -0.1483,
        -0.0073, -0.0115, -0.0357,  0.1034,  0.0759,  0.1596,  0.1233, -0.0811,
        -0.2008, -0.1079, -0.5270, -0.3247, -0.3792,  0.0178,  0.0597,  0.1976,
         0.0141, -0.0021,  0.0109, -0.0101, -0.1030, -0.0019, -0.0125, -0.0606,
        -0.0869, -0.0219, -0.0647,  0.2109,  0.1965], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1160, -0.2522, -0.6947, -0.1446,  0.0071, -0.0628, -0.0922, -0.0952,
        -0.0807,  0.0340, -0.7029,  0.0284,  0.0569,  0.0102, -0.2384, -0.1229,
        -0.9881,  0.0498,  0.0435, -0.0186, -0.1541, -0.5848,  0.0928, -0.7046,
        -0.0334,  0.0598, -0.1879, -0.1916, -1.1310, -0.2401, -0.2269, -0.0444,
        -0.2191, -0.0256,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3050, -0.3466, -0.1668,  0.6317,  0.1688,  0.1923, -0.1399, -0.0497,
        -0.0090,  0.0085,  0.1843,  0.0667,  0.2441, -0.0201, -0.0259, -0.1392,
        -0.0806, -0.0835,  0.3750,  2.2941,  2.4088,  0.1963,  0.0629,  0.2656,
        -0.0772, -0.0940,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4783e-01, -2.4151e+00,  4.6365e-02, -7.6421e-01,  7.1506e-02,
        -7.8952e-02, -3.9022e-02, -4.2934e-01,  1.2243e-02,  2.2304e-01,
        -1.7631e-02, -1.6646e-03,  6.4966e-02, -9.2478e-02, -1.9766e-01,
        -4.6780e-01, -7.7575e-01, -8.2619e-02, -4.9612e-01, -1.6561e-01,
        -2.9280e-01,  8.5731e-02, -7.9022e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3287e-01, -1.2940e+00, -2.1135e-01, -1.9169e-01, -8.2255e-04,
        -4.3299e-02, -7.0466e-02, -1.4555e-01, -4.6563e-01, -5.4700e-02,
         4.5401e-02, -9.0362e-02,  3.6935e-02, -4.4232e-02, -1.2141e+00,
         1.6418e-01,  1.3168e-02, -2.2431e-01, -4.2414e-01,  6.6450e-02,
         2.7398e-02, -4.4413e-02,  3.3369e-02,  7.3218e-02,  3.0865e-02,
         4.0028e-02, -2.9921e-01, -5.7920e-01,  3.8953e-02, -3.0617e-02,
        -5.0805e-02, -5.4231e-03, -1.0930e-02, -1.2081e-02, -8.5381e-02,
        -5.2549e-02, -8.6360e-02,  2.9652e-02,  5.1721e-02,  1.2139e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1959e-02,  1.2345e-01,  6.8081e-02, -2.5096e-01, -4.5636e-01,
        -4.9239e-02, -1.7222e-01, -3.5856e-01,  1.1925e-01, -9.2960e-02,
         3.5647e-02,  1.6500e-02, -2.5733e-01, -8.1798e-02, -3.1137e-01,
        -3.5317e-01, -4.4126e-01,  2.7969e-02,  1.9392e-01, -2.0780e-01,
        -4.9581e-02, -2.9178e-01, -3.4577e-01,  2.7938e-02, -1.2318e-02,
        -8.1145e-03, -8.4065e-05, -1.7959e-01, -4.5128e-02, -9.5821e-02,
        -4.0801e-01, -3.0426e-04, -2.0441e-02,  2.8095e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.0943, -0.2764, -0.2369, -0.1256, -0.4061, -0.0208,  0.0934,  0.0343,
         0.0165,  0.0053, -0.0186, -0.0974,  0.0444, -0.0861, -0.9718,  0.0436,
         0.0214, -0.0633, -0.2478,  0.0041,  0.0229,  0.0085,  0.0035,  0.0180,
        -0.1754, -0.2174,  0.0182, -0.0328,  0.0269,  0.0493, -0.0200,  0.0344,
         0.0153, -0.0524, -0.1993,  0.0050, -0.1015, -0.1562, -0.0174, -0.2431,
        -0.2689, -0.1153, -0.1830, -0.0537,  0.0332,  0.0493, -0.0052, -0.0086,
        -0.0449, -0.0271, -0.0646,  0.1179], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5388,  0.2772, -0.0239,  0.2082, -0.8048, -0.0381,  0.1113,  0.1744,
         0.0633,  0.2659, -0.0187,  0.0832, -0.1158, -1.1458, -0.0351, -0.2472,
        -0.7683, -0.2968, -0.4262, -0.0840, -0.0644, -0.0292, -0.0719, -0.8354,
        -0.0772, -0.0064,  0.1656, -0.0448,  0.0148,  0.2230, -0.6561,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3703, -1.3514,  0.0685, -0.2713,  0.0332,  0.1714, -0.1496, -0.0918,
        -0.0432,  0.2906, -0.0554, -0.4273,  0.2888,  0.0883, -0.4467, -0.6799,
         0.0449, -0.3507, -0.6462, -0.5483, -0.2011, -0.0879, -0.2920,  0.1839,
         0.0085,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0107, -0.2031,  0.2436,  0.1560,  0.2184,  1.2985,  2.1215,  0.0131,
         0.0161, -0.2964,  0.0607,  0.0201,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4020, -2.3710, -0.2731, -0.5661, -0.7673, -0.1857, -0.0988, -0.0368,
        -0.1284, -0.0176, -0.1427, -0.0291, -0.4104, -0.7433, -0.1174, -0.2776,
        -0.0975, -0.0661,  0.0403,  0.0059, -0.2754, -0.0628, -0.1992, -0.2803,
        -0.1473, -0.3933, -0.1005, -0.0936, -0.0850,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1468, -3.7485, -0.3780,  0.1655, -0.1670, -1.2913, -1.2544, -0.4765,
        -0.0565,  0.3175, -0.2248,  0.2383,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0244, -1.7657, -0.1362, -0.6479, -0.0353, -0.6157, -1.0571,  0.0653,
        -0.4140,  0.0487,  0.0402, -0.2709,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1023,  0.3167, -0.7734, -0.0027, -0.3331, -0.0165,  0.0033,  0.1136,
        -0.8003, -0.1319, -0.0867, -0.9554, -0.1549, -0.1013, -0.1719, -0.7543,
         0.0546,  0.1430, -0.0614, -0.2379,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2665e-01, -1.6235e+00,  7.1216e-02, -6.5133e-01, -9.1690e-01,
        -1.7504e-01, -2.1932e-01,  5.8809e-02, -3.6471e-01, -1.0702e-01,
         9.9834e-03, -3.5196e-01, -1.6916e-01, -1.7702e-01, -3.7237e-03,
        -6.9432e-02, -2.1699e-01,  6.8316e-02, -8.3272e-02, -1.3137e-02,
        -2.1113e-01,  7.7002e-02,  1.4211e-03, -1.3372e-02, -2.2414e-01,
         8.6367e-02, -9.5365e-02, -3.2723e-01, -3.0540e-01,  6.1139e-02,
         2.9452e-02,  5.5475e-03, -2.0584e-01, -4.9476e-01,  1.0016e-01,
        -1.4715e-01,  5.7318e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1672,  6.3475,  1.2151,  1.1107, -0.1080, -0.0691,  0.0136,  0.4147,
        -0.4845,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2609, -0.0413, -0.1156, -0.6886, -0.0511, -0.1966, -0.5862, -0.3638,
         0.0042, -0.0249, -0.1044, -0.2988, -0.6898, -0.6697, -0.0487, -0.0643,
         0.0130,  0.0620, -0.1906, -0.5425,  0.0802, -0.0198,  0.0403, -0.0347,
         0.0619,  0.0248, -0.1114,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1565, -2.7847,  0.2840, -0.6818, -0.2853, -0.2967,  0.0284,  0.1018,
        -0.0539,  0.0229,  0.1609, -0.4208, -0.0828,  0.0891, -0.1096,  0.1571,
        -0.6097,  0.0226,  0.0331,  0.0150,  0.0188, -0.4709, -0.8706,  0.1159,
        -0.0465,  0.0914, -0.0702, -0.2138, -0.7162, -0.0492,  0.0994, -0.0574,
        -0.0982, -0.5066,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 0.3628, -0.0191, -0.0587, -0.0607, -0.2825, -0.0018,  0.0046,  0.0766,
         0.0813,  0.1520,  0.0422, -0.6637,  0.1255,  0.0109,  0.0630, -0.1057,
        -0.3965,  0.0011, -0.1086,  0.0078,  0.0226,  0.0487, -0.2394, -0.0271,
         0.0238,  0.0419,  0.0163, -0.0904, -0.0237, -0.0720,  0.0559, -0.3495,
        -0.7360,  0.0105, -0.0310, -0.2191, -0.4332,  0.0105,  0.0393, -0.0108,
        -0.1195, -0.3436,  0.0484, -0.0279, -0.0539,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3744, -0.1082,  0.0663, -0.0687, -0.1815, -1.3395, -1.8692, -0.0027,
        -0.0599, -0.6513,  0.1900, -0.0441, -0.5479, -0.2389, -0.0742, -0.2047,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4495,  0.1016, -0.0036,  0.1217,  0.0303,  0.1167,  0.1009, -0.0787,
        -1.1515, -1.2213, -0.3596, -1.1496, -0.1050, -0.3787, -1.0121, -0.0918,
         0.0263, -0.0938,  0.0334, -0.0258, -0.0286,  0.2656,  0.0347, -0.1026,
        -0.0224,  0.3493,  0.2266,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0048, -0.0665, -0.2872, -0.0198, -0.1679,  0.0238, -0.1123, -0.0442,
        -0.0871, -0.0538,  0.0157, -0.0078, -0.1523,  0.0072, -0.1289, -0.0254,
        -0.0994,  0.0346,  0.0097, -0.1283,  0.0201, -0.1277, -0.0986, -0.2421,
        -0.0134, -0.0200, -0.2009, -0.2679, -0.0391, -0.1193,  0.0072, -0.1991,
        -0.1946, -0.0810, -0.0792, -0.1822, -0.0022, -0.1640,  0.0031,  0.0183,
        -0.1116, -0.1930, -0.0020, -0.0036,  0.0257,  0.0090], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1643, -0.0996,  0.0348, -0.0100, -0.0206, -0.0117,  0.1490, -0.9360,
         0.7025, -0.0086, -0.0695,  0.2317,  0.1956, -0.0670,  0.2609, -0.0129,
         0.0166,  0.0291, -1.0861, -2.5438, -0.5169,  0.2011,  0.0588,  0.0446,
         0.1083, -0.0572, -0.4571,  0.7574, -0.3108, -0.4310,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1591, -0.0915, -0.1688, -0.0245,  0.0574,  0.0864, -0.0474,  3.0381,
        -0.6745, -0.4112,  0.3359,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0237, -2.2158, -0.1734, -0.2640, -0.4424,  0.0097, -0.2544, -0.5540,
         0.0332,  0.0371, -0.0444,  0.0217, -0.2066, -0.1497, -0.0958, -0.5971,
        -0.1729, -0.1559, -0.1090,  0.2543,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4444, -1.7140, -0.8517, -0.4446,  0.2088, -0.6170, -0.8311,  0.1489,
         0.3164, -0.3060, -0.0605,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1859, -2.9054, -1.4553, -1.3758,  0.2084, -0.2327, -0.3911, -0.6401,
        -0.7723, -0.2098, -0.0779, -0.1614, -0.1968, -0.3152, -0.0780, -0.4051,
        -0.0294, -0.0139,  0.0873,  0.0292,  0.5143,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3897e+00, -2.2459e+00, -1.4307e+00, -1.2480e-01,  1.3978e-03,
        -2.1975e+00,  1.2906e-01,  1.2254e-01, -2.4588e-01,  1.8634e-03,
        -1.0317e-01, -4.8961e-01, -1.6380e-02, -1.7344e-02,  3.3141e-02,
        -1.0147e-01, -6.6651e-01,  6.5842e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1389, -0.5914, -1.0943, -0.0229, -0.1437, -0.0387,  0.0531,  0.0343,
         0.0225, -0.0336, -0.0209, -0.0513, -0.0139, -0.4020, -0.4922, -0.1791,
        -0.3651, -0.3726, -0.1463,  0.0685, -0.0507, -0.3064, -0.4353, -0.0037,
        -0.1263,  0.0152, -0.1561, -0.0463, -0.1436, -0.2667,  0.0165, -0.0436,
        -0.0635,  0.0568, -0.0749, -0.0223,  0.0582,  0.0535, -0.0066,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0418, -2.5592, -0.0122, -0.5870, -0.1491, -0.0614, -0.0084,  0.0684,
        -0.1289, -0.0625, -0.2282, -0.8529, -0.0924, -0.0276, -0.0162, -0.0269,
        -0.5037,  0.0263,  0.0471, -0.0313, -0.0706, -0.0866,  0.0443,  0.0352,
        -0.0300,  0.0054, -0.1275, -0.1405, -0.4010, -0.5579, -0.0519, -0.0780,
         0.1022, -0.0869,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.5122, -0.3337, -1.4685, -1.8773, -0.5519, -1.0530, -0.0750, -0.0647,
        -0.3798,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0892, -1.1493, -0.0928, -0.6256, -0.1959, -0.7162,  0.0113, -0.0355,
        -0.5134, -0.1766, -0.2784, -0.2331, -0.0529, -0.0764, -0.0786, -0.6126,
        -0.2025, -0.8568, -0.0922,  0.0116, -0.0656,  0.0382,  0.0059, -0.0334,
        -0.1598, -0.1228,  0.0125, -0.1370,  0.0080, -0.0117, -0.1768, -0.3064,
        -0.0022, -0.2956, -0.0653,  0.0216, -0.0780,  0.1170, -0.1835,  0.5621,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1706, -0.2391, -1.1466,  0.0951, -0.3096, -0.1088,  0.0039,  0.0334,
        -0.3788, -0.5313,  0.0115, -0.0021, -0.0079, -0.0471, -0.0333,  0.1326,
         0.0422,  0.1078, -0.2414,  0.0273, -0.2669, -0.0070, -0.0422,  0.0283,
         0.0591, -0.1445, -0.3350,  0.0352, -0.4762, -0.5556, -0.0421, -0.0025,
         0.0112, -0.0084,  0.0449,  0.0445, -0.2377, -0.4723, -0.0648, -0.0239,
         0.1646,  0.3951,  0.0215,  0.0672, -0.0703,  0.0108, -0.0424,  0.0651,
        -0.0090,  0.0020,  0.0854, -0.0773,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1727, -1.9307, -0.3110, -0.3007, -0.2459, -0.0913, -0.6056, -0.2650,
        -0.2794,  0.0215,  0.0529,  0.1507, -0.6757, -0.0963, -0.1453, -0.4414,
        -0.2124, -0.1379, -0.4994,  0.0435,  0.2060,  0.3055,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1366e+00, -5.0156e+00, -2.4824e-01, -3.1833e-01, -8.4533e-03,
        -1.5072e-01,  2.1870e-01, -7.3990e-01, -1.9892e-01, -3.4462e-01,
        -1.0047e-01, -4.6641e-01,  3.4906e-02, -3.9739e-01,  7.1380e-02,
        -6.4674e-01, -6.0311e-02,  4.0086e-03,  1.8298e-01, -1.1046e-01,
        -1.4211e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8703e-01, -1.5602e+00, -2.7183e-01, -4.1106e-02, -8.2835e-03,
         4.7059e-02, -1.3563e-01,  1.2308e-01, -1.9525e-01,  4.9413e-04,
         4.5547e-02, -2.3777e-02,  8.9553e-03,  8.5743e-02,  2.6518e-03,
         5.3296e-02,  7.1978e-02,  3.3353e-02, -1.8503e-01,  3.8838e-02,
         1.2481e-02,  5.2999e-02, -4.6450e-03,  5.9090e-02,  3.5060e-02,
         2.1606e-02, -3.1740e-02, -2.0557e-01,  2.8502e-02, -2.6032e-01,
        -3.9353e-01, -4.3595e-01, -1.8226e-02, -2.6891e-01, -2.7997e-01,
        -1.2519e-02,  2.7002e-02,  4.2616e-02,  6.7882e-02, -4.3562e-02,
        -5.0053e-01, -7.5253e-03,  9.1242e-04, -4.3651e-02, -2.8552e-01,
         6.8512e-02, -3.7139e-01, -4.4427e-01, -3.7038e-02,  1.3932e-01,
        -1.3941e-01, -9.4613e-02,  1.3962e-02,  4.4843e-02, -2.1853e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1388,  0.0115,  0.1247, -0.1571, -0.0592, -0.1081, -0.4723, -0.0270,
         0.0418,  0.0242, -0.0121, -0.0255,  0.0898, -0.1936, -0.3647, -0.0841,
        -0.0477, -0.2460, -0.0268, -0.5860,  0.0164, -0.1760, -0.2560,  0.0388,
         0.0594,  0.0052,  0.0142, -0.3783, -0.0855,  0.0118,  0.0471,  0.0084,
        -0.0544, -0.3260, -0.1147, -0.1592,  0.0309, -0.2294, -0.2675, -0.2363,
        -0.0629,  0.0201, -0.0018, -0.0084,  0.0999,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0498, -1.8338, -0.5265, -0.7175,  0.0416, -0.1170, -0.1066,  0.0566,
        -0.3383,  0.0343, -0.1789, -0.2149, -0.0788, -0.5753, -0.4199, -0.1439,
        -0.1710, -0.4726, -0.2092, -0.3385, -0.6919,  0.1697,  0.1321, -0.1549,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1265,  0.0080,  0.0362, -0.0272, -0.1197,  0.0023, -0.2478, -0.3110,
         0.0947,  0.0518, -0.0130,  0.0126, -0.0051, -0.1900,  0.0257,  0.2493,
        -0.3767, -0.2514, -0.3764, -0.3845,  0.0269,  0.0432, -0.3369, -0.4818,
        -0.2161, -0.3870, -0.5294, -0.0655, -0.2002, -0.3532,  0.0403, -0.0398,
        -0.0652,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0190, -4.6376, -0.0664, -0.9306, -0.1346, -0.2561, -0.0300, -0.1340,
        -0.9168,  0.1263,  0.1386, -0.1930,  0.2459, -0.7810,  0.1153, -0.1627,
        -0.0687,  0.0434,  0.3870,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1233,  0.0440,  0.1834, -0.0784, -0.6876, -0.1378, -0.2317, -0.6800,
        -1.0521, -0.1957, -0.0479, -0.0040,  0.0095,  0.1068,  0.0070, -0.0173,
        -0.0981, -0.4671, -0.4758, -0.0980, -0.4334, -0.3547, -0.0662, -0.0737,
         0.0264,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6625, -1.4188, -1.0321,  0.0502, -0.1491, -0.5794, -1.7281,  0.2735,
        -0.0621, -0.1541, -0.0706,  0.1153, -0.2477,  0.0313, -0.0550,  0.3813,
         0.2610,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.1951,  0.0342, -0.0792,  0.0750,  0.0630, -0.0494, -0.0200,  0.0108,
         0.0528, -0.4103, -0.1309,  0.0829,  0.1248,  0.0806,  0.0356, -0.0892,
        -0.5178, -1.2590,  0.0069, -0.4151, -0.0277, -0.7278, -0.7619, -0.0634,
         0.0082, -0.0086,  0.2296,  0.1645, -0.0710,  0.0107,  0.0205,  0.4224,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1759, -0.4815, -0.6027, -0.0828, -0.0804,  0.0096, -0.3444, -0.8058,
        -0.0480,  0.1106,  0.0218, -0.0291, -0.0312, -0.0113, -0.1692, -0.0351,
        -0.0772, -0.0158, -0.4696, -0.0195, -0.3394,  0.0622, -0.3846, -0.4448,
        -0.0966, -0.1639,  0.0831, -0.0793,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1565e-01, -3.5593e-01, -5.7277e-02, -9.2191e-02, -2.6584e-01,
        -7.1119e-01, -1.0334e+00, -3.8576e-01,  6.0444e-02, -2.6007e-02,
         3.6511e-02, -6.6353e-01, -6.0252e-02, -3.6171e-01, -1.2194e-01,
        -8.0267e-01, -1.0296e-01, -9.1970e-01, -1.1851e-01, -8.0533e-02,
        -1.4182e-01,  6.5323e-02, -1.0866e-01, -5.2917e-02, -2.1515e-01,
         2.2476e-02, -1.0186e-03,  3.0283e-01, -6.3495e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2514, -0.9755, -0.0122, -0.2441, -0.7268, -0.0994,  0.0415,  0.0309,
        -0.0240,  0.0854,  0.1307,  0.0189, -0.2739, -0.9093,  0.1056, -0.4049,
         0.1238, -0.5671,  0.0090, -0.2262, -0.0637, -0.0054, -0.2738, -0.0820,
        -0.1927, -0.4541, -0.2424, -0.0257, -0.0052, -0.0055, -0.0191, -0.0112,
        -0.0302,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6260e-02, -1.8861e+00, -2.2218e-01, -5.0481e-01, -2.7306e-02,
        -3.9428e-01, -9.0728e-03, -1.2514e-01, -4.9175e-02, -3.9221e-01,
        -4.3706e-01, -3.5689e-02, -5.1180e-02,  4.6053e-02, -4.3839e-02,
         7.7458e-02, -1.6653e-01,  1.9464e-02, -7.5635e-01,  1.4487e-01,
        -5.2953e-01,  1.4205e-03, -3.1519e-02, -9.4215e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0816, -1.9636, -0.8495, -0.0125, -0.3563, -0.0155, -0.0253, -0.3271,
         0.3069, -0.5623, -0.1044, -0.2357, -0.4172, -0.0065,  0.1082,  0.0186,
         0.1079, -0.1996, -0.0064, -0.5047, -0.2160, -0.0648, -0.0041,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0051e-01, -1.4320e+00, -1.6734e-01, -1.1223e-01, -1.8858e-01,
        -9.0883e-02,  1.6275e-02, -2.8490e-03, -2.8156e-02, -4.2824e-03,
         4.8550e-02, -5.7169e-02, -7.3456e-02, -5.5850e-03, -6.3434e-02,
        -4.5409e-02, -4.9023e-01, -2.0985e-01, -1.5102e-01, -9.0790e-02,
         4.6256e-02, -1.6660e-01, -5.8031e-02, -1.2293e-01, -3.0937e-01,
        -7.9929e-02, -5.8386e-02,  2.7703e-04, -7.6500e-02, -3.9216e-01,
        -4.7028e-01,  3.4784e-02, -1.1517e-01, -3.1670e-02, -2.2334e-01,
        -2.8566e-01, -8.5435e-03, -1.8659e-02,  7.0576e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1232, -0.1358, -0.0746, -0.1756, -0.5696, -0.0233,  0.0270,  0.0571,
        -0.0553,  0.1259, -0.0373, -0.0065,  0.0153, -0.0063,  0.0669, -0.1792,
        -0.0717, -0.0145,  0.0582,  0.0489, -0.2517, -0.0440, -0.0245, -0.2249,
        -0.7845, -0.6464, -0.0357, -0.2441, -0.0619, -0.3768, -0.0240,  0.0379,
        -0.1062, -0.0092,  0.0032, -0.0827, -0.0424, -0.3461,  0.0222,  0.0112,
         0.0315, -0.0296,  0.2154,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7425e-01, -1.7874e+00,  7.7979e-02,  4.0201e-02,  3.4063e-01,
        -3.2462e-02, -2.3968e-01, -8.7642e-02, -3.8869e-01, -9.9710e-02,
         1.7924e-01, -2.5047e-02, -1.7163e-01,  1.9148e-02, -1.2289e-01,
         6.0121e-02, -2.5388e-01, -9.5294e-01, -6.1379e-02, -1.0671e-01,
         6.4502e-02,  4.9637e-02,  2.6289e-02, -7.7776e-02,  9.5073e-02,
        -3.3783e-02,  9.3306e-02, -2.2104e-03,  1.0362e-02,  4.2016e-03,
        -3.7573e-02, -4.7146e-01, -7.2530e-01, -2.0096e-01, -1.6661e-01,
        -3.6776e-02, -5.5258e-02, -3.2272e-01, -3.2051e-01, -1.3357e-03,
        -3.4019e-01, -4.6821e-01, -1.1208e-02, -3.4621e-03, -1.5640e-02,
        -8.4328e-02, -4.3765e-02,  2.1337e-02, -2.4098e-02, -1.6574e-02,
         7.0450e-02, -9.8355e-02,  5.1192e-02, -2.6721e-02,  9.6464e-02,
        -2.3894e-01, -2.0148e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6824,  3.8084,  0.0976, -0.0114,  0.5421, -0.0252,  0.6707,  1.3334,
        -0.1851,  0.4653,  0.6376,  0.0153, -0.3042, -0.3373,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6798e-01, -1.2964e+00, -3.1034e-01,  8.4901e-03, -5.0976e-02,
        -2.8396e-01, -1.9960e-02,  9.1490e-02, -2.9636e-01, -8.5898e-02,
        -3.2594e-01, -7.0168e-01, -1.4271e-01,  6.5019e-02, -3.0455e-01,
        -9.3767e-03, -2.0457e-01, -2.3518e-01,  7.0048e-02, -3.5240e-01,
        -5.7624e-01, -1.5623e-02, -1.9177e-01,  1.0737e-03,  1.5360e-01,
         1.3224e-01,  2.7722e-02,  4.4197e-02, -9.5501e-02,  2.3525e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0531, -2.2407, -0.2978, -1.9482, -0.8097,  0.2800, -0.6358,  0.6849,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.3003, -1.0255, -0.2403, -0.1890, -0.2730, -0.6505,  0.0014,  0.0137,
        -0.0187, -0.1091, -0.0854,  0.0373, -0.3786, -0.5679, -0.0838,  0.0119,
        -0.0258, -0.0359,  0.0014,  0.0386,  0.0108,  0.0014,  0.0402, -0.0562,
        -0.2576,  0.0709, -0.2050, -0.1217, -0.2532, -0.3709,  0.0263,  0.0935,
        -0.1915,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2167, -1.4475, -0.8604, -0.3055, -0.0359, -0.2156, -0.0868, -0.0199,
         0.1238, -0.1659,  0.0171, -0.0854, -0.1096,  0.0064,  0.1012,  0.0231,
         0.0236,  0.0510, -0.0576,  0.0725,  0.0106,  0.0611, -0.0187,  0.0070,
         0.0333, -0.0244, -0.0454, -0.4090, -0.0156,  0.0242,  0.0124, -0.0307,
        -0.1567, -0.3181,  0.0127,  0.0295, -0.0764, -0.0439, -0.0577,  0.0213,
         0.0038, -0.0300, -0.0219, -0.2538, -0.0052,  0.0441, -0.0180,  0.0028,
         0.0093,  0.0036, -0.0281, -0.3255, -0.0761, -0.1010,  0.0025,  0.0654,
        -0.0242,  0.0355,  0.0070, -0.1934, -0.0428, -0.0488, -0.2204,  0.0080,
         0.0312, -0.0238, -0.0255,  0.1123,  0.0891,  0.0115,  0.0103],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2144e-01, -1.6955e-01, -2.1828e-01, -2.4703e-02, -2.3873e-01,
        -6.6090e-02, -7.0635e-01, -2.2727e-05,  9.9213e-03,  9.8367e-02,
         5.0870e-02,  1.2661e-02,  3.3882e-02,  4.8785e-03, -2.3439e-02,
         1.0535e-01,  9.2329e-03,  1.2629e-02, -4.6713e-01, -6.9562e-01,
         1.2802e-02,  3.2449e-02,  4.4582e-03, -1.9154e-01,  1.9347e-01,
        -4.3247e-01, -3.7590e-02,  2.8538e-02,  8.9904e-03, -7.6829e-03,
        -1.5708e-01, -2.1222e-01, -3.2021e-01, -1.0592e+00, -1.0865e-01,
        -1.2157e-01, -5.8913e-01, -2.4844e-01, -2.0741e-02,  3.6808e-02,
         2.3067e-01, -1.5599e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0638,  1.4310,  0.0239,  0.0098,  0.0758, -0.0141, -0.0307, -0.1138,
         0.2434,  0.4448,  0.1444,  0.3215,  0.5113, -0.0064,  0.0378, -0.0083,
         0.1314,  0.3156,  0.4832, -0.0699,  0.0880,  0.1809,  0.0639, -0.0332,
         0.0894,  0.7158,  0.2301,  0.0313,  0.9038, -0.0850,  0.0163,  0.2253,
         0.0590,  0.0093,  0.0309,  0.3142,  0.1030,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6844, -3.6096,  0.2180, -0.2040, -0.0538,  0.2606, -0.1634, -0.0343,
        -0.2761, -0.1167, -0.1899,  0.2450, -0.5550, -0.2218,  0.0590,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5585e-01, -3.6341e-01,  1.2124e-01, -4.1143e-02, -1.0382e+00,
        -1.7260e+00, -5.5707e-01, -6.6261e-01, -1.6140e+00, -1.4177e-01,
         7.2904e-03,  3.8950e-01, -1.6609e-01,  6.4801e-04,  3.5545e-01,
        -5.4591e-02,  1.0177e-01, -8.2497e-02,  7.5365e-02,  6.1089e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0200, -0.0070, -0.0040,  0.0127, -0.6509,  0.0178,  0.0400, -0.0422,
        -0.0092,  0.0411, -0.2215, -0.3667,  0.0628, -0.0104, -0.1287, -0.0907,
        -0.0131, -0.1444,  0.0438, -0.1084, -0.0074,  0.0098,  0.0077,  0.0037,
         0.0235, -0.2467, -0.4491,  0.0199,  0.0459, -0.4066, -0.3924, -0.0347,
         0.0152, -0.0189, -0.1852, -0.3612,  0.0479, -0.1303,  0.0118, -0.0097,
         0.0013,  0.0821,  0.0849,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4113e-01, -3.3800e-02, -6.8210e-02, -6.2472e-02, -3.4880e-01,
        -1.2568e-01, -3.7002e-01, -4.6302e-01, -1.1143e-01,  8.2002e-02,
         1.1759e-01,  5.7045e-04, -5.5936e-02, -4.5837e-01, -6.5444e-01,
         3.9586e-02,  5.7949e-02, -6.8098e-03, -9.1987e-03, -2.1608e-02,
        -3.4530e-01, -1.8176e-02,  5.5069e-02,  3.7362e-02,  3.8152e-02,
        -1.0855e-01,  8.8319e-02,  3.7137e-02, -4.0244e-01,  1.6498e-02,
        -7.2245e-02, -2.5026e-01, -2.4883e-02, -2.8251e-01, -4.0249e-01,
        -1.6855e-01,  3.7585e-01, -6.3186e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0280, -2.2183, -1.0232, -0.8763, -0.0423, -0.8384,  0.0399,  0.0813,
        -0.2645, -0.6970, -0.1965, -0.1421, -0.6117, -0.1825, -0.0832, -0.3684,
         0.0317, -0.5769,  0.0051,  0.1932,  0.1009,  0.1815,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0932, -0.7141,  0.0888, -0.0032, -0.0119, -0.0791, -0.0221,  0.0335,
         0.0200, -0.7121, -0.1196,  0.0271, -0.0322,  0.0580, -0.3956, -0.3002,
        -0.6567, -0.0701,  0.0203, -0.3999,  0.0152,  0.0521, -0.2399, -0.0009,
        -0.1154, -0.0499, -0.0183, -0.0185, -0.0346, -0.3676, -0.0186, -0.2057,
         0.0718, -0.0841, -0.1577, -0.6390,  0.1878, -0.0452, -0.0939, -0.3146,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1662, -0.1122, -0.0631, -0.0947,  0.0080, -0.0927, -0.1339, -0.0979,
        -0.4277, -0.0797, -0.0818, -0.0023, -0.1022, -0.8873, -0.3998, -0.1641,
         0.0617,  0.0565, -0.2455,  0.0609, -0.0713,  0.1089,  0.0272,  0.0370,
        -0.0661, -0.3934, -0.0209, -0.2944, -0.5558,  0.0035, -0.0506, -0.0157,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1342e-01, -2.1585e+00, -1.1051e-03, -1.0260e-01, -7.5746e-02,
        -2.5240e-02, -1.1397e-01, -5.6321e-01, -9.0261e-01, -1.6506e-04,
        -1.2203e-01, -2.6149e-01,  4.9166e-02,  3.2926e-03,  5.7697e-02,
         8.3003e-02, -3.9941e-01, -6.5583e-02,  1.6621e-02, -2.1238e-02,
        -7.8427e-02, -1.0707e-01, -1.2562e-01, -5.6412e-01,  1.0831e-01,
         6.4437e-02, -1.3609e-01, -1.5842e-02, -5.8324e-03, -1.1784e-01,
        -4.2363e-02,  3.9371e-02, -9.2197e-03,  1.4344e-01, -2.0770e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 2.8184e-01, -2.3517e-01, -1.1444e+00, -8.7116e-01, -2.9866e-04,
        -7.0352e-02, -1.2560e+00, -8.4158e-01, -7.7896e-02, -4.7054e-01,
         7.9164e-02, -6.6536e-02,  6.6401e-02, -2.5814e-01, -5.1721e-02,
        -8.8820e-02, -1.4925e-01, -7.4119e-02,  2.9827e-02,  1.5375e-01,
         5.7320e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2755, -2.2744,  0.2230, -0.5524, -0.7721, -0.3736, -0.1622, -0.6032,
        -0.5210,  0.1527, -0.0519,  0.1479,  0.0621, -0.0769, -0.3036, -0.0125,
        -0.2212, -0.1593,  0.0180,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6407, -0.2902, -0.5145,  0.0506, -0.3885, -0.0663,  0.0059, -0.0998,
        -0.0826, -0.4173, -1.2337,  0.0969, -0.3233, -1.1466, -0.0381, -0.3794,
        -0.0045, -0.1529, -0.0563,  0.0851,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2417, -2.2544,  0.1039, -0.5029,  0.0381,  0.0268, -0.1724, -0.2157,
        -0.0347, -0.0213, -0.0835, -0.0840,  0.1123, -0.3295, -0.1128, -0.2650,
        -0.5857, -0.0979, -0.1709, -0.2658,  0.2113,  0.1328,  0.2043,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3262, -0.8815, -0.3160, -0.1353, -0.1609, -0.6038, -0.1848, -0.0870,
        -0.3072, -0.4012, -0.0195, -0.4348, -0.0397, -0.5681,  0.0335,  0.0207,
        -0.0086, -0.0299, -0.1770,  0.0593, -0.3721, -0.0241, -0.0030,  0.1676,
        -0.1568,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2549, -2.4676, -0.2750,  0.0051,  0.0432,  0.0529,  0.1565,  0.0407,
         0.0599,  0.0607, -0.6779, -0.1458, -0.0425,  0.0134,  0.0586, -0.0872,
        -0.1207,  0.0671,  0.1376, -0.0530, -1.0050, -0.7072, -0.4344,  0.0088,
        -0.0473, -0.3355, -0.1117, -0.0230, -0.1602, -0.1258, -0.1561,  0.0654,
        -0.1865,  0.1121,  0.0445,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0880,  0.0382,  0.0202, -0.6116, -0.0038, -0.0788, -0.7286, -0.1336,
        -0.7272, -0.1855, -0.1668, -0.1866,  0.1412, -0.1920, -0.1830, -0.7285,
        -0.0729, -0.6102,  0.0385,  0.0883, -0.1339,  0.2505,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1799, -0.1456,  0.2440, -0.0694, -0.1478, -0.1591, -0.0630, -0.1169,
        -0.1018, -2.5740,  0.3714,  0.0517,  0.2091, -0.0236,  0.0526, -0.6808,
        -0.0406,  0.1338,  0.3009,  0.0547, -0.0817,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4630, -3.6629, -0.2436, -0.0856, -0.2019,  0.1493, -0.9340, -1.0829,
         0.2400,  0.1043, -0.0697,  0.1708,  0.0685, -0.7195, -0.1941, -0.4478,
         0.1325, -0.0480, -0.1238,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0076, -0.1476, -0.3543,  0.0053, -0.0141, -0.3323, -0.0149, -0.0338,
         0.0198, -0.0261, -0.0604, -0.0701,  0.0040, -0.1683,  0.0429, -0.0086,
        -0.3278, -0.2475,  0.0432, -0.2578, -0.0714, -0.0609, -0.1547,  0.0775,
        -0.2444, -0.0252, -0.0330, -0.1513,  0.0016, -0.5384, -0.0664, -0.0846,
        -0.2217, -0.4628,  0.0185, -0.0683, -0.0323, -0.0077, -0.0674, -0.1602,
         0.0186,  0.0034, -0.0052, -0.0242, -0.0092, -0.0283, -0.1432],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0643, -1.4637, -1.2001,  0.0347,  0.0348,  0.0982, -0.0497,  0.0071,
        -0.2421,  0.0281,  0.0025,  0.0261,  0.1243,  0.1228,  0.1121,  0.0107,
        -0.5886, -0.0266,  0.0876,  0.0084,  0.0667, -0.0813, -0.7115, -0.5869,
         0.0210, -0.0650, -0.0876, -0.0168,  0.0626,  0.0220,  0.0757,  0.0575,
        -0.1005, -0.0141,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3503, -2.4447, -0.1770,  0.0587, -0.0417, -0.3148, -0.0778,  0.1001,
        -0.0556, -0.2308, -0.0143, -0.0863, -0.4798, -0.3900,  0.0644, -0.0494,
        -0.0575, -0.3256,  0.1961, -0.4201, -0.0308, -0.1337, -0.0876,  0.0403,
        -0.0919,  0.0491,  0.0282, -0.1459,  0.0178, -0.2385,  0.0743, -0.0700,
        -0.6319,  0.0938,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.3172, -2.7954,  0.0529,  0.1719, -0.0682, -0.6255,  0.1657, -0.0532,
         0.1857, -0.6584, -0.2463,  0.0538, -0.2001, -0.8645, -0.0469,  0.0188,
         0.1219,  0.2045,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0271, -0.0495, -0.0143,  0.1072,  0.0047, -0.0492,  0.0528, -0.1067,
        -1.0452, -2.0334,  0.2926,  0.0774,  0.0050, -0.0583,  0.0372, -0.0168,
        -0.2335,  0.0099, -0.1329, -0.8075,  0.0061,  0.0160, -0.1858, -0.0498,
        -0.0484, -0.2097, -0.0791,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2761,  0.0563,  0.0819, -0.0519, -0.1685,  0.0478,  0.0966, -0.1727,
        -0.9710, -0.0413, -0.0259, -0.2896, -1.0286, -0.0254,  0.0359, -0.0282,
        -0.0533,  0.0288,  0.0210, -1.4170,  0.0174,  0.0785, -0.0739,  0.0701,
         0.1478, -0.2056,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1190, -1.7781, -0.0669, -0.6429, -1.0019,  0.0909, -0.0728, -0.0190,
        -0.3839, -0.1819, -0.5571, -0.2832,  0.0464,  0.0348, -0.2137, -0.5317,
        -0.5755, -0.3548, -0.0443,  0.1283,  0.1346,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2973, -1.8052, -1.0559, -1.5451, -0.1646,  0.1048, -0.3579, -0.0437,
        -0.1427, -0.0276,  0.0492, -0.3711, -0.0309,  0.0211, -0.1550,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2613e-01, -1.7516e-03, -2.9222e-01,  3.9785e-02, -2.2223e-01,
        -1.1909e-02,  3.3644e-01, -5.6217e-02, -1.5575e-02,  1.2046e-01,
        -6.2751e-02, -2.6885e-01, -2.4655e+00, -4.3116e-01, -3.2713e-01,
         2.5204e-01,  1.6672e-01, -1.7969e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1416, -2.6643, -0.7932, -0.9427,  0.2098,  0.2181, -0.0692, -0.6309,
        -0.1946, -0.2374, -0.5688, -0.2219,  0.1436, -0.3198, -0.0617,  0.0938,
         0.0558,  0.3955,  0.5007,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2078e-01, -1.5962e+00, -8.6174e-01, -3.6038e-02, -1.8402e-01,
        -1.7202e-01,  7.1551e-02,  9.0293e-02, -8.0284e-02, -1.2623e-03,
         2.9307e-02, -1.1491e-01, -1.2095e-01,  5.3146e-02, -3.4863e-02,
        -2.9385e-03, -3.2212e-01,  4.2047e-03,  6.9378e-02, -3.2088e-02,
        -4.7394e-01,  9.0932e-02, -3.9644e-01, -8.4387e-02, -5.6785e-01,
        -9.2657e-02, -2.5764e-01,  1.9736e-01,  2.0377e-02, -1.4012e-01,
        -7.8571e-03,  1.5585e-01, -2.3883e-01, -5.6992e-03, -1.8598e-01,
        -1.6203e-01, -4.1453e-01, -4.8135e-02, -3.6769e-02, -1.4885e-01,
        -3.9260e-02, -5.7805e-02,  2.1759e-01, -1.7351e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1294, -1.3371, -0.1306, -0.6940, -0.3644, -0.0345, -0.3353, -0.0891,
        -0.7691,  0.0171, -0.1964,  0.1563, -0.0840,  0.0253,  0.0436,  0.0629,
         0.4328, -0.5192, -0.0761, -0.3202, -0.2696,  0.0017, -0.0205, -0.0333,
        -0.0216,  0.0428,  0.0158, -0.6263, -0.0290,  0.0865,  0.0048, -0.1777,
         0.0627,  0.1847,  0.2528,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9831e-01, -2.4777e+00,  6.3165e-02, -8.2173e-01, -2.3241e-01,
        -8.1331e-02, -9.2752e-01, -5.9093e-01,  1.5694e-01,  2.1799e-03,
        -1.2747e-01, -3.5929e-01, -1.1356e+00,  2.0008e-02,  1.8011e-02,
        -5.1742e-01, -1.5482e-01, -1.9191e-02, -1.2490e-01, -6.8378e-04,
         1.3707e-01, -1.2510e-01,  1.1187e-02,  3.0077e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1359, -1.5073, -0.0460, -0.6657, -0.0464, -0.3091, -0.0967, -0.1839,
        -0.6819,  0.0672, -0.1368, -0.0210, -0.0163, -0.1664, -0.6995, -0.0753,
         0.0275,  0.0297, -0.1434, -0.0656, -0.0401, -0.2883,  0.1393, -0.2250,
        -0.0233, -0.3126, -0.2992, -0.1430, -0.0684, -0.0773, -0.2332, -0.0526,
        -0.1010, -0.0746,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4995e-01,  6.0296e-02,  5.9710e-04, -2.9954e-01, -3.0094e-03,
         1.0424e-01, -7.3778e-01, -7.4360e-02,  1.8182e-01,  1.3411e-01,
         6.9071e-02,  1.5396e-01, -5.2220e-01, -1.7440e-01, -8.6516e-01,
        -1.1655e+00, -9.9532e-03, -1.5937e-01, -5.6554e-02, -2.3766e-01,
         1.9016e-02, -7.7651e-03, -4.3655e-02, -4.2337e-01, -2.4362e-01,
        -2.4912e-02, -2.6960e-01,  8.5138e-03, -7.1857e-03,  4.1818e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.0742,  0.0572, -0.2932, -1.8976, -0.0122, -0.0024, -0.1308, -0.1826,
        -0.7630, -0.8260,  0.2587,  0.1917,  0.2839,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3152,  0.0550,  0.3008, -0.1648, -0.1496, -0.3201, -0.0513, -0.6200,
        -1.0624,  0.1092,  0.0231, -0.4932, -0.1338, -0.2501, -0.6909,  0.0684,
         0.1609,  0.3150,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1890, -0.9488, -2.5350, -0.0297, -1.1573, -0.1481,  0.0364, -0.2667,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1586, -1.5789,  0.0105, -0.2622, -0.2244, -0.0103, -0.0923,  0.0120,
         0.0262, -0.0619,  0.1416,  0.1086, -0.1222, -0.0129, -0.0030, -0.1237,
         0.0098,  0.0177,  0.0300, -0.0155,  0.0876,  0.0316, -0.2150, -0.2015,
         0.0519, -0.1693, -0.0329, -0.0670, -0.7252, -0.1162,  0.1214, -0.3665,
        -0.0545, -0.8061, -0.0802, -0.2869, -0.3121,  0.0215,  0.0999,  0.0452,
        -0.1375, -0.0238, -0.1573,  0.0607,  0.2117, -0.0175, -0.0421],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6827, -1.5031,  0.4969, -2.3755, -0.0858,  0.1505, -0.1066, -0.0733,
        -0.0980,  0.3623, -0.1662,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1480, -1.8056, -0.2693, -0.8523, -0.2400, -0.0375, -0.4788, -1.6883,
        -0.2702, -0.4422, -0.1918,  0.2739, -0.4129,  0.0124,  0.0914, -0.0023,
         0.0338, -0.0061,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7081e-01, -1.0451e+00, -2.0749e+00, -5.1511e-01, -2.5461e-01,
        -1.6547e-01, -1.3596e+00, -4.7406e-02, -5.5622e-01, -1.1647e-01,
        -8.6334e-02, -3.7307e-02,  8.5639e-04,  1.2492e-01,  2.9110e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2110, -2.7216,  0.0824, -0.1687, -0.6646, -0.8757, -1.0088, -0.4855,
        -0.2886, -0.6463, -0.6662,  0.0466, -0.0033, -0.3086,  0.3050,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3918,  0.0654,  0.1296, -0.2533,  0.0323,  0.4380,  0.3889,  0.3601,
        -0.9508, -0.4825, -0.3636, -1.0447, -1.0548,  0.0633,  0.0135, -0.0669,
        -0.4344,  0.0321, -0.0196, -0.0222, -0.5468, -1.0299, -0.0186, -0.1035,
         0.3764,  0.1057,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1069, -0.2070, -0.1533,  0.1134,  0.0835,  0.1115,  0.6691, -1.1558,
        -2.3901, -0.2282,  0.1061,  0.0153, -0.0145,  0.4200, -0.0402,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6034e-01, -2.4918e+00,  1.1213e-03, -6.7557e-01, -7.4506e-01,
        -6.5635e-02, -4.8861e-01, -1.8590e-01, -4.9897e-03, -1.2362e+00,
         1.8426e-01,  5.9521e-02, -3.0161e-01, -8.4217e-02,  3.8409e-02,
         2.9605e-02, -1.2104e-01,  2.9461e-02, -5.2325e-02,  1.5873e-01,
        -1.2411e-01, -1.4517e-02, -1.6026e-01, -4.2802e-02,  6.3879e-01,
         5.0917e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0032,  0.0377,  0.0782,  0.0200,  0.0419, -0.0762,  0.0125,  0.0853,
        -0.1268, -0.1971, -0.6182, -0.0553, -0.0383,  0.1992, -0.5636, -0.3192,
         0.0272, -0.1848, -0.1088, -0.4150, -0.0681, -0.1743, -0.0891, -0.2250,
        -0.0598, -0.1834,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-7.9829e-01, -4.3120e+00,  1.0291e-01,  8.8908e-02,  1.1967e-02,
         1.6696e-01, -2.7786e-01, -4.4240e-01, -8.1531e-02, -2.2917e-01,
        -1.4025e-01, -3.3600e-01, -5.1976e-01,  9.9986e-02, -3.1690e-01,
        -9.1169e-02, -2.9516e-02, -1.8305e-02, -1.1007e-01, -7.0301e-02,
         2.4863e-01, -1.9871e-01,  6.6789e-02, -1.1563e-01, -8.6539e-01,
         4.2380e-02, -1.8695e-01, -7.1893e-02,  3.9627e-01,  4.9612e-02,
         2.0874e-03, -2.1148e-01, -7.4247e-03, -8.8084e-02,  1.5639e-01,
        -2.4977e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1726, -0.8009, -0.3598,  0.1498, -0.4173,  0.0309, -0.3536, -0.4610,
        -0.0131, -0.0691, -0.2078, -0.0544, -0.0993, -0.0118, -0.0666, -0.1584,
         0.0957,  0.0048,  0.0521,  0.0073, -0.0170, -0.4511,  0.0088, -0.0096,
        -0.3294, -0.3917, -0.0307, -0.3622, -0.2661, -0.0104, -0.1121, -0.2460,
        -0.3601,  0.0293,  0.4424,  0.1209,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8772e-02,  1.5742e-01,  2.5993e-02, -8.1936e-02, -1.9640e-03,
        -6.9136e-01,  1.0342e-03, -1.0876e-01,  7.2000e-02, -1.9946e-02,
         9.1593e-02, -8.4631e-01, -1.1279e+00, -9.8765e-02,  9.6230e-02,
         1.0062e-01,  1.2477e-01,  6.8154e-02, -1.2059e-01, -1.6356e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0771, -1.1665, -0.3095, -0.2123,  0.0083, -0.1042,  0.0241, -0.2898,
        -0.0587, -0.2032,  0.0342, -0.0850, -0.0162,  0.0268, -0.5806, -0.4062,
        -0.1336, -0.1975,  0.0428, -0.0439, -0.1237, -0.0095,  0.0446, -0.0175,
         0.0207,  0.0310, -0.0193, -0.0909, -0.0398, -0.3399, -0.3622, -0.0953,
        -0.0962, -0.5838, -0.0309,  0.0621, -0.0791, -0.1602,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8044e-01, -1.5965e+00, -8.0167e-01, -4.2107e-01,  1.1900e-03,
         2.0217e-03, -2.8009e-02,  1.4750e-02,  1.3271e-02, -6.3515e-02,
        -3.3785e-01, -3.8045e-01, -5.4015e-02,  2.5142e-02, -2.3224e-02,
        -3.7271e-01, -1.5988e-02, -4.9444e-01, -4.4467e-02,  4.5396e-02,
         3.4812e-02,  6.3347e-02,  1.0918e-02,  2.2651e-02, -1.9936e-02,
        -9.8351e-03, -1.3978e-01, -1.8475e-01, -4.6385e-01, -1.9175e-02,
        -3.9292e-02, -1.0853e-01, -4.2748e-02,  9.0817e-03, -1.8816e-01,
         3.2204e-03, -3.5678e-02,  6.1327e-02, -3.6702e-02,  1.0417e-01,
         7.9660e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6402, -2.9680,  0.0994,  0.0749, -0.1186, -0.3746,  0.0042,  0.3293,
         0.0092, -0.0379, -0.0689, -0.3773, -0.6568, -0.0799,  0.0210, -0.4592,
        -0.1040, -0.2386, -0.1029,  0.1007,  0.5782, -0.2219,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2706,  0.0545,  0.0171,  0.0117,  0.1044, -0.1554,  0.0130,  0.0553,
        -0.1119,  0.0145, -0.0355, -0.4942, -0.0373, -0.1969,  0.0312, -0.5063,
         0.0348, -0.2590, -0.0869, -0.4372, -0.6029,  0.0459,  0.0834, -0.2300,
        -0.0170,  0.0178, -0.0427, -0.2184, -0.5775, -0.0476, -0.2062, -0.3146,
        -0.0137,  0.0711, -0.1301,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1783, -0.4800, -2.1731, -0.4966, -0.5987, -0.1087,  0.0541,  0.0698,
         0.0910,  0.0246, -0.1831,  0.0762, -0.0887,  0.1247, -0.0071, -0.0753,
         0.2191, -0.1489, -0.3652, -0.1018, -0.3398, -0.0038, -0.3968, -0.4799,
        -0.0939,  0.0334, -0.2965, -0.2573,  0.1680,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0818, -2.1973, -0.6082, -0.3489, -0.0531, -0.4579,  0.1638, -0.1506,
        -0.2483,  0.0560, -0.2759, -0.2828,  0.0713, -0.1073, -0.0533, -0.4686,
        -0.2410,  0.0024, -0.1656,  0.0754, -0.0633, -0.1030, -0.0979, -0.0894,
         0.1678,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2169e-01, -2.0425e-02, -1.2044e-02, -3.1577e-03, -7.7488e-01,
        -1.2110e-02,  1.6021e-01, -6.5763e-01, -9.0239e-01, -4.7462e-02,
        -1.8357e-02,  2.1093e-01,  6.7031e-02,  1.6966e-01, -8.0969e-01,
        -8.6625e-01,  1.2130e-01, -2.8562e-01, -3.6372e-04, -2.1115e-02,
         3.4727e-02, -7.1378e-02,  1.3927e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8649e-04,  4.3557e-02,  8.3682e-02,  7.6822e-02,  3.6747e-02,
         4.9027e-02,  2.7034e-02,  4.7212e-02, -4.3216e-03, -2.3860e-02,
         1.2741e-02,  2.6877e-02, -5.8153e-01, -6.5935e-01, -1.7648e-01,
        -5.7957e-02, -3.9832e-02, -2.3032e-01, -6.8436e-02,  6.6070e-02,
         7.8417e-04,  6.7609e-02,  2.6803e-02,  4.2151e-02,  8.3682e-02,
         7.0828e-02, -7.6493e-01, -5.2035e-02, -3.4560e-01,  3.9283e-02,
        -4.6812e-01, -5.9751e-02, -4.9123e-02,  1.5834e-01, -3.0865e-01,
        -1.3615e-03, -4.7412e-02, -1.0526e-02, -1.4105e-01,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3888,  0.1035,  0.0495, -0.9508, -1.6785, -0.0236,  0.0038, -0.9744,
        -0.8259, -0.1556, -0.0747,  0.0331,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-2.7689e-01, -7.4479e-01, -1.3214e+00,  2.3667e-02, -4.9365e-02,
         5.7785e-02, -2.7705e-01, -6.4014e-02, -9.2147e-02, -3.8550e-01,
         1.6855e-02,  4.9596e-02, -1.2088e-01, -4.3347e-02, -4.4117e-01,
        -6.1869e-02, -9.4444e-02, -5.8440e-01, -9.7128e-02,  1.9389e-02,
        -1.7514e-01, -2.0504e-01,  4.6647e-02, -2.8416e-04, -3.3442e-02,
         7.2092e-02,  6.9840e-02, -2.0169e-01,  3.5480e-01, -2.0635e-01,
         1.6707e-03,  4.8650e-02, -3.1887e-01, -5.4377e-02, -8.1839e-03,
        -8.5553e-03, -3.3769e-01, -3.4372e-02, -2.4373e-04, -2.5228e-02,
         2.4807e-02,  1.1722e-02,  7.5804e-02, -1.7605e-01,  8.2549e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2274, -3.4887,  0.0337, -1.2451, -0.1128,  0.3038,  0.1657, -0.3686,
        -0.9584,  0.2040, -0.0925,  0.0566,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4806,  6.0362,  1.7727,  1.8097, -0.4195, -0.6604,  1.1404, -1.0811,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5255e-01, -1.9051e+00, -1.3084e-01, -9.5777e-03, -4.9748e-01,
        -2.2803e-02, -3.9714e-02, -1.0989e-01, -3.5779e-01, -7.0081e-01,
        -1.1628e-01, -5.9945e-02, -7.1105e-02,  8.9707e-03,  6.3864e-02,
         1.8760e-02,  7.3876e-02, -3.3937e-02,  2.1050e-02, -3.5663e-02,
         4.2533e-02,  4.6383e-02, -4.3322e-02, -5.5996e-02, -6.5012e-02,
         4.4101e-02,  1.4123e-02, -1.3143e-01, -9.4987e-02,  1.5013e-02,
         2.5406e-04, -8.3196e-02,  2.0749e-02,  4.4449e-02, -3.2052e-02,
         5.0366e-02,  1.8241e-02, -7.3090e-02,  6.2232e-02,  5.8349e-03,
        -3.3194e-02,  4.8951e-02,  1.0708e-02, -1.6914e-02, -1.4444e-01,
         1.6936e-01, -6.2162e-01, -5.6698e-02, -3.9056e-01, -9.3076e-01,
        -2.9112e-01,  1.1196e-01, -4.0169e-01, -2.2149e-03, -8.8254e-02,
        -1.6863e-01, -8.9021e-02, -2.2092e-02,  4.7683e-02, -2.4102e-02,
        -2.3386e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1306, -0.0859, -0.0363, -0.3058, -0.3813, -0.1238, -0.2276, -0.1511,
         0.0777,  0.0041, -0.0452, -0.3657, -0.0077, -0.3427, -0.2485, -0.0353,
        -0.0722,  0.0421, -0.1239, -0.1024, -0.3725, -0.5495, -0.1540, -0.0763,
        -0.2497, -0.1967, -0.1025, -0.0937, -0.1334, -0.1215,  0.0035, -0.1615,
         0.0155,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5639, -0.3576, -1.2109, -1.2187, -0.0317,  0.0690, -0.4460, -0.0568,
         0.0051, -0.1106, -0.0440,  0.1347, -0.3836, -0.0462, -0.2067,  0.6669,
         0.0997, -0.5147, -1.3565, -0.0106, -0.1080, -0.3113,  0.4000,  0.1834,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0936, -0.6997, -0.6580, -0.1964, -0.0979, -0.5208,  0.0533, -0.3561,
         0.0190, -0.3105, -0.3717, -0.3765, -0.5391,  0.0475, -0.0667,  0.0474,
        -0.0118,  0.0538,  0.0573, -0.0053, -0.2632, -0.0239, -0.0040, -0.1816,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2124, -0.0705, -0.2795, -0.6284, -0.1444,  0.0550, -0.0450, -0.3405,
         0.0546,  0.0519, -0.0237, -0.0372,  0.0613,  0.0807, -0.2522, -0.3407,
        -0.4151,  0.0453,  0.0252,  0.0440, -0.4785,  0.0101, -0.2033, -0.5034,
         0.0094,  0.0617, -0.1758, -0.0144, -0.0504, -0.0878, -0.0503, -0.0489,
         0.2182, -0.0582,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0310,  0.0594, -0.0553, -0.0276, -0.7060,  0.0499, -0.0637,  0.0356,
        -0.2542, -0.0770,  0.1407,  0.0947, -0.1101, -1.0462, -0.0507, -0.0189,
        -0.9257,  0.0034, -0.3833, -0.0926, -0.1693, -0.6459,  0.1795, -0.1282,
        -0.0777,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6234, -0.0756,  0.2555,  0.1029,  0.1265, -0.0675,  1.0103,  0.8779,
         0.9978,  2.8215,  0.1750,  0.5079,  0.2347,  0.3207, -0.1390, -0.0632,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0487, -1.9317, -0.0714, -0.7626, -0.1379, -0.6303, -1.6289, -0.1966,
         0.1642,  0.0394,  0.0557,  0.0549, -0.3448,  0.0592,  0.0564, -0.0812,
        -0.1512, -0.6051, -0.3525, -0.1148, -0.0217, -0.1039,  0.0622,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1858, -0.0891, -0.4995, -0.8442, -1.9729, -0.2828,  0.4053,  0.0843,
        -0.0136,  0.2077,  0.1573,  0.0605,  0.2125, -0.0825,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-3.4799e-02, -6.3127e-01, -9.7574e-01, -1.1754e+00,  1.1966e-04,
         5.1981e-02, -2.6461e-01,  6.6618e-02, -1.7800e-01, -1.3286e-01,
        -5.6233e-01, -5.2856e-01,  6.7756e-02,  4.4304e-03, -8.2413e-02,
         2.7520e-01, -8.9586e-02, -6.2169e-01, -4.4801e-02, -4.1801e-01,
        -6.0944e-02, -8.1438e-02,  1.2518e-01,  5.1558e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.9864, -0.0130, -0.8362,  0.0858, -0.7027, -0.2200, -1.7062,  0.1293,
         0.2495,  0.5054, -0.2012,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0781,  0.1228,  0.0785,  0.0687,  0.0980, -0.2996, -0.8091,  0.0074,
        -0.3459,  0.0442, -0.5794,  0.0169, -0.3011,  0.1057, -0.3073, -0.0939,
        -0.1553, -0.2492,  0.0909,  0.0650, -0.1094,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3824, -0.0775,  0.0599,  0.0960,  0.1487,  0.0657, -0.0997, -1.3796,
        -0.1757, -1.6644, -0.5398,  0.1993,  0.1288, -0.2268,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1537,  0.0720, -0.0746, -0.0264,  0.0738,  0.0118, -0.0134, -0.2035,
        -0.0912,  0.1456,  0.3087,  0.1904,  2.2624, -0.1664, -0.0816,  0.1021,
         0.2060, -0.0098, -0.1635, -0.2246, -0.0907,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0529, -2.2904,  0.1330, -0.9437,  0.3329, -1.1447, -0.0478, -1.0046,
        -0.0907, -0.7000, -0.2899,  0.0959,  0.2202,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5830e-01, -7.0313e-01,  3.6270e-02,  8.3254e-02, -4.6044e-02,
        -2.1888e-02, -2.0905e-01, -3.1912e-01, -1.1815e-01, -2.4447e-02,
         7.1909e-03,  1.5312e-01,  2.5088e-02,  3.4065e-02,  4.0671e-02,
        -5.1270e-02,  1.9947e-02, -5.7103e-02,  4.8720e-03,  1.4323e-01,
         1.0202e-02,  9.7500e-02, -6.5852e-02, -1.5500e-04, -1.1607e+00,
        -2.6674e-02, -6.0500e-01, -7.3070e-01, -7.4403e-01, -1.0793e-02,
        -1.1806e-01,  1.8169e-02, -7.3200e-02, -7.7911e-02, -3.0599e-02,
         6.8141e-02, -3.1565e-01, -7.8499e-03,  4.3601e-03, -1.3046e-01,
        -1.4390e-01, -2.6976e-02, -1.5726e-02, -2.2740e-02,  4.4727e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5701,  0.2853,  0.8806,  0.2298,  0.0365, -0.1665, -0.1293,  0.1387,
         0.2001,  0.0289,  0.9907,  0.1103,  0.0474,  0.1140,  0.8817,  0.1466,
         1.1488, -0.0289,  0.0567,  0.0623, -0.2402,  0.7064, -0.3521,  1.0505,
         0.1526,  0.1541,  0.0441,  0.1954,  0.6916,  0.1236, -0.1172,  0.2055,
         1.3282,  0.1073,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0811, -0.0558,  0.1562, -0.2884, -0.0305, -0.1743,  0.0203, -0.0072,
         0.0364, -0.0551, -0.0737,  0.0080, -0.0565, -0.0298, -0.0339,  0.0608,
        -0.1018, -0.1025, -0.2236, -1.2491, -2.1113,  0.2427,  0.1530, -0.3251,
         0.0276, -0.1940,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0410, -2.3410, -0.0102, -0.7079,  0.2114,  0.0295, -0.0244, -0.4149,
        -0.1324, -0.0597, -0.0611,  0.0202,  0.0802,  0.0942, -0.0525, -0.3031,
        -0.6714, -0.0113, -0.1451, -0.0125, -0.4398,  0.1188,  0.3484,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9365e-02, -9.7468e-01, -2.2519e-01, -3.1556e-01, -3.0127e-02,
         2.1930e-02, -5.0189e-02, -3.1339e-01, -5.0053e-01, -1.5290e-02,
         7.6230e-02,  4.0224e-02,  3.8171e-02,  1.4523e-02, -7.9430e-01,
         1.2192e-01, -8.4075e-02, -2.7784e-01, -4.9380e-01, -7.3632e-02,
         1.1102e-02, -8.0023e-02, -1.5746e-02,  7.8064e-02, -4.9532e-03,
        -1.4759e-01, -3.3356e-01, -5.5090e-01, -4.4324e-02, -3.4147e-02,
         1.0307e-01, -2.9509e-02,  1.3063e-02,  5.7110e-03,  2.9274e-03,
         7.6525e-02, -1.9245e-01, -4.5156e-04, -1.3686e-01, -5.2446e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2375,  0.0152, -0.0053, -0.1185, -0.3422, -0.0509, -0.0778, -0.4090,
         0.0638, -0.0163, -0.0238,  0.0220, -0.1535, -0.0153, -0.1687, -0.2309,
        -0.3780, -0.0321, -0.0439, -0.3568,  0.0752, -0.4271, -0.3561,  0.1001,
        -0.0192,  0.0077, -0.0403, -0.3025,  0.0426, -0.0773, -0.3357, -0.0455,
         0.0142, -0.0241,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.4773, -0.2640, -0.1468, -0.1053, -0.3108, -0.0188,  0.1305, -0.0628,
         0.0601,  0.0492, -0.0188, -0.1487,  0.1265, -0.0979, -1.0851, -0.0050,
        -0.0251, -0.0135, -0.2389, -0.0141,  0.0803,  0.0015, -0.0161,  0.1117,
        -0.3416, -0.5352,  0.2677, -0.0703, -0.0226,  0.0210, -0.0574,  0.0373,
         0.0676, -0.0449, -0.3042,  0.0870, -0.0716, -0.2568, -0.0406, -0.3059,
        -0.1241, -0.0206, -0.4275, -0.1234, -0.0550,  0.0034, -0.0243, -0.0332,
        -0.1309,  0.0277, -0.1095, -0.0278], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2875, -0.0228, -0.0155, -0.0366, -0.8770,  0.0210,  0.0558,  0.2512,
         0.0606, -0.0631, -0.0055,  0.0336,  0.0201, -1.0164, -0.1598, -0.3253,
        -0.7448, -0.3988, -0.2404,  0.0354, -0.0523,  0.0412, -0.0879, -0.4103,
        -0.0080,  0.0585,  0.0671,  0.0194,  0.0630, -0.0040, -0.0874,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2083, -1.2981, -0.0061, -0.3491,  0.1492,  0.1059, -0.1499,  0.0185,
        -0.0573,  0.1124,  0.1113, -0.3057,  0.0051,  0.0346, -0.5131, -0.4569,
        -0.1678, -0.4945, -0.7402, -0.8681, -0.1946, -0.0055, -0.1166, -0.1930,
         0.4332,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1409,  0.0076,  0.0790, -0.2732,  0.2433, -0.8981, -1.7928,  0.1376,
        -0.1989, -0.1469, -0.2211, -0.1400,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0690, -1.2289, -0.0866, -0.2287, -0.4693, -0.0967, -0.0027,  0.0023,
        -0.0714,  0.0358,  0.0201, -0.0739, -0.3325, -0.4753,  0.0080, -0.2550,
        -0.0171, -0.0754,  0.0293,  0.0045, -0.1173, -0.0055, -0.0978,  0.0913,
         0.0679, -0.2251,  0.1223, -0.0982,  0.1716,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2994, -2.1360,  0.0374, -0.0318, -0.2313, -0.8495, -1.7111,  0.2276,
        -0.5756,  0.1565,  0.1981, -0.1699,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7358, -2.8465,  0.2433, -0.8783,  0.0058, -1.4946, -0.9034, -0.1766,
        -0.4410,  0.1174,  0.0877,  0.1675,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5303,  0.0651, -1.2566,  0.0619, -0.7339, -0.0769, -0.0034, -0.0906,
        -0.9398, -0.1049, -0.1795, -1.2203, -0.1139, -0.0477, -0.0966, -0.7454,
         0.0132,  0.1605,  0.0404, -0.0800,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2328, -1.2024, -0.0707, -0.4940, -0.6676,  0.0779, -0.1225,  0.0108,
        -0.2273, -0.0203, -0.0519, -0.3546, -0.2185,  0.0193,  0.0679,  0.0058,
        -0.1429,  0.0540,  0.0489, -0.0239, -0.1438,  0.0804,  0.0676, -0.0434,
        -0.0436,  0.0715, -0.0694, -0.3258, -0.2695,  0.0719,  0.0646, -0.0387,
        -0.2162, -0.2234,  0.0169,  0.0919, -0.1009,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2371,  5.0971,  0.0610, -0.1591,  0.0070,  0.0405,  0.0909,  0.0814,
         0.0688,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0185, -0.1660,  0.1777, -0.7627, -0.1033,  0.1019, -0.3138, -0.4507,
        -0.0975,  0.0299, -0.0155, -0.0459, -0.7101, -0.7507,  0.0139,  0.0043,
         0.0041,  0.2085, -0.3964, -0.6415,  0.0098, -0.0012,  0.0132, -0.0419,
        -0.0203,  0.1710, -0.2366,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1656, -1.4350,  0.1475, -0.6295, -0.1011, -0.3040, -0.0048,  0.0126,
         0.0195,  0.1049, -0.0196, -0.5651, -0.2355, -0.0161,  0.0049,  0.1489,
        -0.4767,  0.0573, -0.0303,  0.0259,  0.0093, -0.5218, -0.6783, -0.0193,
        -0.0575,  0.0930,  0.0232, -0.2995, -0.7518, -0.1084,  0.0398, -0.0629,
         0.0580, -0.2800,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.0676, -0.0458, -0.0224, -0.0456, -0.3445,  0.0073,  0.0068,  0.0083,
         0.0117,  0.0116, -0.0333, -0.3782,  0.0449, -0.0116,  0.0303, -0.2530,
        -0.7151,  0.0924, -0.1712,  0.0079,  0.0765,  0.0014, -0.2347, -0.0688,
         0.0216,  0.0450, -0.0303,  0.0588, -0.0211, -0.0140,  0.1177, -0.4902,
        -0.5953, -0.0051, -0.0736, -0.2010, -0.2808, -0.1280, -0.0097, -0.0179,
        -0.0717, -0.2003, -0.0030,  0.0670,  0.0205,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1062, -0.0089,  0.0411, -0.0054, -0.2566, -0.9172, -1.3525, -0.2788,
         0.1419, -0.8165, -0.2035, -0.0332, -0.4964,  0.1366, -0.0474, -0.2413,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1543,  0.1383,  0.0934, -0.0347, -0.1175, -0.0640, -0.1541, -0.0936,
        -0.9122, -1.5291, -0.4417, -1.1628, -0.0238, -0.7265, -1.3243,  0.0035,
        -0.0679, -0.0321,  0.0486, -0.0529,  0.0180,  0.0652, -0.0768, -0.2408,
        -0.0525,  0.2771,  0.0302,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0946,  0.0093, -0.2273,  0.0267, -0.1806, -0.0012, -0.1162, -0.0160,
        -0.0663, -0.0053,  0.0145, -0.0199, -0.1251, -0.0157, -0.1393, -0.0285,
        -0.0716,  0.0370,  0.1626, -0.2298,  0.0602, -0.1214, -0.0921, -0.3322,
        -0.0281,  0.0034, -0.1974, -0.1930,  0.0151, -0.0806,  0.0190, -0.1336,
        -0.1652,  0.0446, -0.0068, -0.0647, -0.0469, -0.2599,  0.0362,  0.0372,
        -0.0752, -0.1306, -0.0165,  0.0354, -0.1542, -0.0103], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1861,  0.2610, -0.0234,  0.1064, -0.0390, -0.1724,  0.0706, -0.8153,
         0.0793,  0.0574, -0.0119,  0.0225,  0.0350, -0.0166, -0.0894, -0.0229,
        -0.0046, -0.1876, -0.7246, -1.9124, -0.0150, -0.0765, -0.0321, -0.1101,
        -0.0304,  0.0877, -0.6517,  0.0184, -0.1451, -0.0373,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1327,  0.4796,  0.1828, -0.0341,  0.1065, -0.4840, -0.3185, -2.7707,
        -0.9253, -0.5396, -0.2821,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0606, -2.5445,  0.2228, -0.1449, -0.3139, -0.0774, -0.2594, -0.4310,
        -0.0534,  0.0512,  0.0938,  0.0904, -0.5804, -0.0919,  0.0219, -0.2454,
        -0.0760, -0.0173, -0.1043,  0.1431,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4983, -2.0271, -1.1765, -0.6587,  0.2354, -0.7076, -0.7158,  0.1028,
        -0.0116, -0.5448,  0.3927,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1361, -1.6049, -0.7987, -1.2116, -0.1209, -0.1607, -0.2346, -0.5845,
        -0.6874, -0.0767, -0.1144, -0.0909, -0.0288, -0.0691, -0.0982, -0.4102,
         0.1083, -0.0933, -0.0874,  0.0553,  0.4314,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2458e-01, -1.4747e+00, -1.2339e+00, -1.8061e-01, -2.3367e-01,
        -1.2527e+00, -2.3682e-02,  2.9657e-04, -2.1648e-01, -7.9713e-02,
        -2.5499e-02, -2.2481e-01, -1.4387e-02, -3.2960e-02, -2.4482e-02,
        -1.6403e-03, -1.3963e-01,  1.0258e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1174, -0.9869, -0.7356, -0.1329, -0.0647, -0.0277, -0.0474,  0.0085,
         0.0050, -0.0512,  0.0114,  0.0073, -0.0581, -0.3492, -0.2638, -0.0345,
        -0.2039, -0.3174, -0.1747, -0.0305, -0.0359, -0.2865, -0.5753,  0.0027,
        -0.2279,  0.0923, -0.1110, -0.0786, -0.1174, -0.2886, -0.0110, -0.0512,
         0.0292,  0.0396, -0.0689, -0.0482,  0.0125, -0.0777, -0.0367,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3907, -4.0166,  0.0792, -0.1880,  0.0441,  0.0240,  0.1191,  0.1427,
         0.2802,  0.0133, -0.2126, -1.0151, -0.1364,  0.0548,  0.1050,  0.0694,
        -0.2373,  0.0188,  0.0938,  0.0626, -0.0834, -0.0350,  0.0349,  0.0121,
        -0.0741, -0.0147, -0.2242, -0.1203, -0.4546, -0.5220,  0.0275, -0.0863,
         0.0138, -0.3064,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.0416, -0.4011, -1.0835, -1.1689, -0.6918, -0.5409, -0.2359,  0.0696,
         0.1857,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2003, -1.5558, -0.0801, -0.6565, -0.2402, -0.5052, -0.1052, -0.2670,
        -0.5135, -0.0028, -0.2946, -0.1413,  0.0040, -0.0431, -0.0984, -0.3441,
        -0.0612, -0.3322, -0.0700,  0.0146, -0.0602,  0.0366,  0.0771, -0.0596,
        -0.1255, -0.0085,  0.0230, -0.0165, -0.0633, -0.0250, -0.1306, -0.3793,
         0.0178, -0.2050,  0.0633, -0.0722, -0.1360,  0.0547,  0.0219,  0.0189,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3448, -0.3590, -0.9191, -0.0139, -0.1211, -0.0599, -0.0454,  0.0052,
        -0.4598, -0.4236, -0.0223,  0.0480,  0.0087,  0.0214, -0.0103,  0.0625,
        -0.0058,  0.0849, -0.3309,  0.0110, -0.2164,  0.1118,  0.0395, -0.0739,
         0.0237, -0.1745, -0.3977,  0.0581, -0.3959, -0.5839,  0.0065,  0.0096,
         0.0344,  0.0020,  0.0327,  0.0214, -0.2757, -0.4492,  0.0408, -0.0201,
         0.0768, -0.0944,  0.0505,  0.0441,  0.0303, -0.1241, -0.0160,  0.0158,
         0.0384, -0.0113, -0.0844, -0.1123,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0868, -1.3293, -0.2737, -0.1739,  0.0840, -0.0361, -0.3729, -0.0808,
        -0.2242, -0.0241, -0.0073, -0.2115, -0.6032, -0.0031, -0.3855, -0.4508,
        -0.3053, -0.0041, -0.2500,  0.0106,  0.0394, -0.0245,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0359e-01, -3.9302e+00,  3.3295e-01,  1.1972e-01,  8.6757e-02,
         2.9118e-01, -1.9621e-01, -7.4086e-01, -1.9532e-01, -3.7532e-04,
        -1.4433e-01, -3.7567e-01, -1.7266e-02, -1.0662e-01, -1.1707e-01,
        -4.6826e-01, -1.8875e-01, -1.2819e-01, -3.9945e-02,  7.5826e-01,
        -4.3725e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0014, -0.7446, -0.0700, -0.1075, -0.0209,  0.0067, -0.1395,  0.0811,
        -0.0924, -0.0123,  0.0133, -0.0487,  0.0090,  0.0210,  0.0171,  0.0250,
         0.0108,  0.0108, -0.0894,  0.0031,  0.0378, -0.0125, -0.0238,  0.0384,
         0.0110,  0.0299,  0.0034, -0.1674, -0.0114, -0.1808, -0.3175, -0.3055,
        -0.0410, -0.2579, -0.2378, -0.0364, -0.0365,  0.0407,  0.0441, -0.0259,
        -0.2927,  0.0111,  0.0567,  0.0339, -0.1749,  0.0178, -0.2226, -0.1235,
        -0.0213,  0.0608, -0.1229, -0.0391,  0.0093,  0.0731, -0.0241],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0395, -0.0019,  0.0608, -0.0469,  0.0018, -0.1238, -0.2679, -0.0369,
         0.0057, -0.0048, -0.0217,  0.0148, -0.0294, -0.0480, -0.2425, -0.0966,
        -0.0331, -0.1121, -0.0169, -0.3704,  0.0315, -0.0877, -0.1522, -0.0101,
         0.0396, -0.0088, -0.0376, -0.2986, -0.0948, -0.0330, -0.0441, -0.0057,
        -0.0640, -0.3534, -0.0293, -0.1564, -0.0205, -0.0779, -0.1216, -0.1179,
        -0.0121, -0.0189,  0.0522, -0.0057, -0.0243,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0644, -1.8006, -0.6810, -0.6501, -0.1384, -0.1617, -0.1817, -0.1319,
        -0.3996,  0.0109,  0.0550, -0.1142, -0.1039, -0.5065, -0.3153,  0.2169,
        -0.0852, -0.2974, -0.0645, -0.2294, -0.3166,  0.0401, -0.1422, -0.0590,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0536,  0.0969,  0.0113,  0.0023, -0.1229, -0.0401, -0.2615, -0.3372,
         0.1047, -0.1016, -0.0037, -0.0388, -0.1158, -0.2532,  0.0417,  0.1385,
        -0.3473, -0.2252, -0.3694, -0.2446,  0.0137,  0.0571, -0.2733, -0.4322,
        -0.0074, -0.2307, -0.3450, -0.0480, -0.1078, -0.5264, -0.0992,  0.0890,
        -0.1867,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1705e-01, -3.8866e+00, -2.3962e-01, -7.5386e-01, -1.7047e-01,
        -8.3801e-02,  6.0464e-02, -2.4340e-01, -1.1748e+00, -1.2905e-01,
         4.6159e-02, -1.2064e-01, -1.5436e-02, -8.7710e-01, -1.7377e-03,
         6.3677e-02, -6.5611e-02, -1.7142e-01,  7.8905e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4914, -0.0997,  0.2132, -0.1218, -0.8463, -0.0705, -0.3993, -0.4819,
        -0.6016, -0.0920, -0.0440,  0.2028, -0.0031,  0.0490, -0.0263,  0.0071,
        -0.0975, -0.2846, -0.1588,  0.0120, -0.1789, -0.4767, -0.0155,  0.0153,
        -0.1292,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0473, -1.8949, -1.0334,  0.0307, -0.3266, -0.8940, -0.8146,  0.0093,
         0.0112, -0.3326, -0.0516,  0.1454, -0.3961, -0.0462,  0.0806, -0.0321,
         0.3102,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.4969e-01,  7.4043e-04, -9.2844e-02, -1.4889e-02,  2.0578e-02,
        -1.1497e-01,  2.8283e-03, -8.4046e-02, -9.1751e-02, -5.4432e-01,
        -1.2382e-01, -4.5383e-02,  1.1436e-01,  6.6860e-02, -2.9431e-01,
        -1.4761e-01, -7.7061e-01, -9.9880e-01, -1.0119e-01, -6.1838e-01,
         1.9919e-01, -4.8558e-01, -5.5874e-01, -4.8492e-02, -3.8752e-02,
         6.6317e-02, -7.2076e-02, -2.0535e-02,  2.3165e-01, -1.2844e-01,
        -1.5948e-01,  7.8251e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0144, -0.4881, -0.9934, -0.1461,  0.0831, -0.0283, -0.2580, -0.8328,
        -0.0652,  0.0259,  0.0509,  0.0169, -0.0434,  0.0411, -0.1493,  0.0419,
        -0.1622, -0.0573, -0.3014,  0.0359, -0.2457,  0.0104, -0.2494, -0.6169,
        -0.0153, -0.0651,  0.0172, -0.1144,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0640, -0.2668,  0.0065,  0.1207,  0.0604, -0.9183, -0.7960, -0.1629,
        -0.1293, -0.1654, -0.1137, -0.5717,  0.0103,  0.0482, -0.0389, -0.3379,
         0.0224, -0.5700, -0.0041,  0.1304, -0.0876, -0.0410, -0.0410, -0.1756,
        -0.5704,  0.0284,  0.1659,  0.0051, -0.0409,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8047e-01, -1.2253e+00, -1.8968e-01, -4.0975e-01, -1.0538e+00,
         7.4573e-02, -8.9444e-02,  2.3938e-02, -7.6837e-02,  1.5206e-01,
         4.2958e-01, -2.4299e-01, -3.8222e-01, -1.0628e+00, -2.0177e-02,
        -2.1011e-01,  4.1655e-02, -4.5928e-01, -1.3525e-02, -1.0890e-01,
        -2.7740e-02,  7.8305e-02, -2.5538e-01, -7.0966e-02,  4.3655e-02,
        -2.7379e-01, -4.1062e-01,  3.7579e-02, -7.2117e-02, -3.6330e-02,
        -1.1278e-03,  2.7965e-01, -4.1966e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3300e-01, -1.6281e+00, -7.3223e-02, -6.4089e-01, -4.7800e-02,
        -3.2208e-01,  1.9824e-04, -1.9920e-01, -2.1175e-01, -4.2460e-01,
        -8.4402e-01, -1.7431e-01, -1.2515e-01, -1.8026e-02,  9.4177e-03,
         9.7659e-02, -2.7649e-01,  1.1576e-02, -9.4548e-01, -5.9195e-02,
        -5.3079e-01, -4.1148e-03,  8.2656e-02,  1.0817e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2462, -1.3933, -1.1124,  0.0829, -0.4220, -0.0846, -0.2540, -0.3134,
         0.0637, -0.3632, -0.0580, -0.1289, -0.4880,  0.0222, -0.0702,  0.0184,
         0.0092, -0.3041,  0.0101, -0.6777, -0.1744,  0.2853,  0.2860,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0120, -0.7158,  0.0823, -0.0281, -0.1968, -0.3361,  0.0959,  0.0097,
        -0.0392, -0.0342,  0.0500,  0.0167, -0.0867,  0.0415, -0.0470,  0.0919,
        -0.5240, -0.2359, -0.2604, -0.0020, -0.0451, -0.1866, -0.0943, -0.0181,
        -0.3381, -0.0918, -0.0533,  0.0238,  0.0139, -0.2242, -0.3921, -0.0141,
        -0.1703,  0.0231, -0.2167, -0.2818, -0.0044, -0.0153,  0.0737,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1774, -0.0341, -0.0226, -0.2130, -1.2183,  0.0057,  0.0535,  0.0708,
        -0.0500, -0.0032, -0.0166, -0.0126, -0.0218, -0.0395,  0.0052, -0.4961,
        -0.2896, -0.0296,  0.0480, -0.1388, -0.4713,  0.0168,  0.0910, -0.0030,
        -0.5571, -0.3662, -0.0047, -0.1922, -0.0263, -0.2879, -0.0975, -0.0310,
        -0.0298, -0.0607, -0.0514,  0.0239,  0.0139, -0.5768,  0.1071,  0.0040,
         0.0061,  0.1604, -0.1413,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0203e-02, -1.4864e+00, -1.5968e-01, -4.3029e-02, -2.1353e-03,
         3.2799e-02, -7.4714e-02,  7.1045e-02, -4.3721e-01, -1.3871e-02,
         7.5292e-02, -9.9730e-04, -1.2607e-01, -2.5413e-02, -1.5374e-03,
         2.0885e-02, -1.3664e-01, -4.5179e-01, -4.8871e-02,  1.5664e-02,
         2.3214e-02,  2.6482e-02, -5.3991e-02,  3.2976e-02,  5.9876e-02,
         3.0769e-02,  1.1333e-02,  6.2346e-02, -5.0275e-03,  1.8080e-02,
         2.7529e-02, -2.1393e-01, -4.4018e-01,  2.0915e-01, -7.7520e-02,
         3.5696e-02,  4.8298e-02, -2.6167e-01, -2.3489e-01, -1.1188e-02,
        -2.5145e-01, -2.6123e-01, -2.1267e-02, -5.9050e-02, -5.5994e-02,
         9.2435e-02, -2.3950e-02,  4.9398e-03, -1.0313e-02,  4.5317e-03,
         4.8135e-02,  5.8256e-02,  1.8329e-03,  2.5267e-02,  1.8156e-02,
         1.5435e-01, -9.9006e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3902, -3.0264, -0.1067, -0.1143, -0.3562,  0.0858, -0.5154, -1.1152,
        -0.3107, -0.0038, -0.4676,  0.1060, -0.0073,  0.3478,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1231, -1.0281, -0.5081, -0.0684,  0.0163, -0.2424,  0.0251,  0.0158,
        -0.2226,  0.0389, -0.3023, -0.3754, -0.0481, -0.1797, -0.2147, -0.0065,
        -0.1363, -0.1410, -0.0379, -0.2500, -0.6794, -0.1148, -0.1097, -0.0080,
        -0.0114, -0.0103,  0.0570,  0.1110, -0.1554,  0.0051,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2243, -1.4504,  0.0475, -1.6872, -1.2448, -0.0604, -0.6411, -0.1386,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0875, -1.3739, -0.0717, -0.2246, -0.4703, -0.4633, -0.0152, -0.0792,
        -0.0411, -0.0992,  0.0948, -0.0466, -0.9028, -0.7394, -0.0299,  0.0106,
        -0.0031,  0.0265,  0.0261,  0.0272,  0.0949,  0.0629,  0.0192, -0.2110,
        -0.3216,  0.0338, -0.3206, -0.1496, -0.4456, -1.0039, -0.0242, -0.1244,
        -0.3666,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5948e-02, -1.8712e+00, -6.9590e-01, -1.3728e-01, -1.1271e-01,
        -3.4475e-01, -2.2134e-01, -9.4396e-02,  1.9478e-02, -1.9466e-01,
        -2.0473e-02, -1.7701e-01, -1.3752e-01, -4.9730e-02,  5.4186e-02,
        -6.9177e-02, -3.6715e-02, -2.6532e-02, -2.6248e-01,  3.0948e-02,
         5.0593e-03,  4.1570e-02, -6.3096e-02, -2.3592e-02,  6.3022e-02,
        -1.9941e-02,  1.0273e-02, -4.1246e-01,  4.6084e-02,  2.1027e-02,
        -2.4484e-02, -2.8337e-03, -3.8300e-01, -4.5386e-01,  9.4048e-02,
        -7.3348e-02, -3.0983e-01,  9.3661e-02, -2.7627e-02,  1.7531e-03,
         4.0604e-02,  7.9350e-02,  3.2283e-03, -3.0545e-01, -3.9102e-02,
        -1.1760e-02, -7.5652e-03, -6.1970e-02,  5.2589e-03, -5.5349e-02,
         2.1177e-02, -3.1580e-01, -8.4168e-02, -1.0581e-01, -7.3618e-02,
         3.8496e-02, -2.0845e-03,  2.8622e-02, -4.8840e-02, -2.1253e-01,
        -1.0737e-01, -1.6203e-01, -5.0157e-01,  8.4932e-03, -1.4098e-02,
        -3.0665e-02, -8.8139e-02, -1.8625e-02,  3.4268e-02,  1.5684e-02,
        -1.0300e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2909, -0.0787, -0.3023, -0.0197, -0.4274, -0.0081, -1.1084, -0.1686,
         0.0294,  0.0835, -0.0439, -0.0812,  0.0537,  0.0862,  0.1824,  0.0798,
         0.0152,  0.0122,  0.1444, -0.5126, -0.0788,  0.0659,  0.0785, -0.3040,
         0.0761, -0.3156, -0.0399,  0.0235, -0.0105, -0.1090,  0.1004, -0.0220,
        -0.4822, -0.7435, -0.2043,  0.0117, -0.6581, -0.2499,  0.0201,  0.0559,
         0.2425, -0.0309,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3276, -2.4818, -0.0854,  0.0369,  0.0301,  0.0519,  0.0531,  0.0887,
        -0.0720, -0.4355, -0.1286, -0.3214, -0.4659,  0.0085, -0.0527, -0.0615,
         0.0161, -0.6491, -0.7457, -0.0262, -0.1609, -0.3448,  0.0585,  0.0558,
         0.0232, -0.5002, -0.0127, -0.0212, -0.4090,  0.1204,  0.0634, -0.0199,
         0.0464,  0.0426, -0.0503,  0.1555, -0.1637,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2223, -4.8703, -0.2248, -1.0341,  0.0985,  0.5230, -0.1520, -0.0905,
        -0.1513, -0.0364, -0.2198,  0.5564, -0.3302, -0.2937,  0.3099,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0089, -0.4134,  0.0160, -0.0522, -1.1917, -1.8112, -0.0359, -0.5412,
        -1.8885, -0.2501,  0.0676, -0.2582, -0.2184, -0.2729,  0.1634, -0.0337,
        -0.0245, -0.2259,  0.1526,  0.1257,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0545,  0.0788,  0.0190, -0.0093, -0.6049,  0.0116,  0.0171, -0.0945,
        -0.0140,  0.0748, -0.2556, -0.3386,  0.0249,  0.0182, -0.1624, -0.1274,
         0.0275, -0.1271,  0.0894, -0.2339, -0.0623,  0.0038, -0.0101, -0.0031,
        -0.0266, -0.1970, -0.6186, -0.0553,  0.0212, -0.1736, -0.3173, -0.0182,
         0.0040,  0.0137, -0.2133, -0.3477,  0.0702, -0.0761, -0.0134, -0.0221,
         0.0863,  0.1109, -0.0625,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1047, -0.0087,  0.0126,  0.0049, -0.2263,  0.0087, -0.1331, -0.3478,
        -0.1215, -0.0724,  0.0886,  0.0011,  0.0415, -0.6078, -0.7692, -0.0049,
        -0.0046,  0.0213, -0.0049,  0.0836, -0.4463, -0.0745,  0.1159,  0.0515,
         0.0294, -0.0158,  0.0167,  0.0052, -0.4079, -0.0416,  0.0065, -0.1588,
         0.0084, -0.1486, -0.1771,  0.0170,  0.0741, -0.1081,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4768e-02, -1.8697e+00, -9.2011e-01, -7.9728e-01,  7.2898e-02,
        -5.8365e-01,  1.6187e-01,  1.1957e-04, -4.5014e-01, -4.4518e-01,
         4.8916e-02,  3.5357e-02, -3.3108e-01,  3.2494e-02,  8.6931e-02,
        -2.0948e-02,  9.8519e-03, -4.9471e-01,  1.0783e-01,  1.0299e-01,
         9.5257e-03,  1.6500e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9103e-01, -9.9167e-01, -2.0037e-01,  1.9177e-02,  9.9531e-02,
         2.5663e-02, -1.9216e-02,  2.1616e-02, -1.1393e-01, -1.0029e+00,
        -3.5616e-01,  4.4074e-02, -8.6755e-02, -9.6369e-02, -3.6807e-01,
        -2.5392e-01, -5.6696e-01, -1.4822e-01, -4.5732e-02, -2.6330e-01,
         1.3518e-01, -3.9113e-04, -2.1181e-01,  1.3570e-02,  1.3698e-02,
         2.4182e-02, -4.8555e-02, -2.2200e-02,  1.4160e-02, -2.4719e-01,
        -1.2399e-02,  4.7471e-02,  5.6593e-02, -1.4369e-01, -1.8034e-01,
        -2.9137e-01, -1.4383e-01,  3.1918e-02,  2.0720e-01, -8.8378e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2447, -0.0940, -0.0456,  0.0438, -0.1079, -0.0923, -0.0142, -0.0551,
        -0.3832, -0.1063, -0.0445, -0.0180, -0.1396, -1.1317, -0.5890,  0.0553,
         0.1294, -0.0537, -0.7450,  0.0423,  0.0670, -0.0228,  0.0633,  0.0016,
         0.0773, -0.3945,  0.0922, -0.3794, -0.3587, -0.0534,  0.1841,  0.2018,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2499e-02, -3.8539e+00, -1.6304e-02, -1.1545e-02, -1.4627e-01,
         5.9779e-02, -5.6931e-02, -4.7048e-01, -6.6976e-01, -1.1940e-03,
         1.9784e-01, -2.7608e-01, -1.7050e-02, -2.6101e-02, -4.9361e-02,
        -3.2227e-02, -4.3647e-01, -1.1485e-01,  1.9298e-02,  8.2817e-03,
         1.7770e-02,  1.5210e-02, -4.7397e-02, -7.1835e-01, -1.2738e-01,
         1.6596e-02, -4.8422e-01, -7.3301e-02, -1.2189e-01, -2.1046e-01,
         5.5071e-02, -6.7275e-02, -5.9832e-02,  1.6243e-02,  9.6256e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.0657, -0.1718, -0.7816, -0.6889, -0.1066, -0.1911, -0.8102, -0.8042,
        -0.1008, -0.6096, -0.1374, -0.3238,  0.0066, -0.3356, -0.1442, -0.1078,
        -0.2056, -0.0572,  0.0519,  0.0659,  0.1147,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2033, -3.6026,  0.0794, -0.6115, -1.0378, -0.8092, -0.2652, -0.4172,
        -0.2852,  0.2483,  0.0957,  0.0291, -0.0240, -0.1639, -0.4401,  0.1837,
        -0.4153, -0.8164,  0.4418,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2340, -0.4941, -0.5874,  0.0178, -0.1410, -0.0210,  0.3346, -0.0237,
         0.0900, -0.8413, -0.5980,  0.0049, -0.3198, -0.8522, -0.1224, -0.1583,
        -0.0717, -0.1497,  0.1965,  0.1563,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2309, -2.9557, -0.0783, -0.6004, -0.0229,  0.0103, -0.1088, -0.3358,
         0.0050, -0.0227, -0.0607, -0.1232, -0.0165, -0.2880, -0.0363, -0.2977,
        -0.5515, -0.1860, -0.2032, -0.3483,  0.2535,  0.0309, -0.1262,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1216, -1.5373, -0.4933, -0.5068, -0.0707, -0.5976, -0.0771, -0.5815,
        -0.5933, -0.4714, -0.2030, -0.4321,  0.0031, -0.4806,  0.0799,  0.0368,
        -0.0314,  0.0090, -0.1209, -0.1563, -0.3376,  0.0246,  0.0341, -0.2724,
         0.1500,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1864e-01, -2.7385e+00,  5.3296e-02, -3.7958e-02, -1.5784e-02,
        -2.8365e-01,  9.2829e-02, -1.9754e-02,  5.9550e-02,  1.2071e-02,
        -5.9905e-01, -4.6891e-02,  4.1589e-02,  9.8440e-02,  8.8146e-02,
        -2.1052e-02, -1.4674e-02, -7.8221e-03, -5.6835e-03, -7.4478e-02,
        -6.3734e-01, -4.0515e-01, -3.5910e-01, -3.6844e-02, -2.7664e-02,
        -6.0344e-02,  8.1129e-02,  2.9635e-03,  3.4589e-02, -1.0691e-01,
        -1.7578e-01, -2.2498e-03, -2.6342e-01,  5.4915e-02,  4.9834e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6237, -0.0697, -0.0936, -0.4589, -0.0787, -0.0928, -0.3508,  0.0107,
        -0.6889, -0.0517, -0.1885, -0.2401, -0.2046, -0.3373, -0.5691, -0.8898,
        -0.2439, -0.5751,  0.2136, -0.0215,  0.3559, -0.1438,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6724, -0.4838,  0.7099, -0.0602,  0.0564, -0.4511, -0.0923, -0.2763,
        -0.0269, -1.2303,  0.3516, -0.0101, -0.1005, -0.2829, -0.3165, -1.1312,
        -0.5961,  0.1830, -0.1444, -0.1898, -0.5819,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3513, -3.7460, -0.2462,  0.0668, -0.0950,  0.2597, -0.5839, -1.0443,
         0.3731,  0.0808,  0.2453,  0.0955,  0.3102, -0.4805,  0.0234, -0.2791,
        -0.0854,  0.8164, -0.2304,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0145,  0.0076, -0.1712,  0.0500,  0.0207, -0.2953,  0.0464, -0.0391,
        -0.0258, -0.0012, -0.0568, -0.1671, -0.0109, -0.1778,  0.0281,  0.0217,
        -0.2044, -0.2608,  0.0612, -0.1698, -0.0179, -0.0041, -0.1515, -0.0394,
        -0.2735, -0.0547, -0.1086, -0.1970, -0.1012, -0.5061,  0.0207,  0.0327,
        -0.1619, -0.4208,  0.0195, -0.0375,  0.0209,  0.0023,  0.0011, -0.0475,
        -0.0160,  0.0220,  0.0319, -0.0263,  0.0168,  0.0151,  0.0503],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2915, -2.2086, -0.9334, -0.0910,  0.0295, -0.0270,  0.0792,  0.0125,
        -0.1577,  0.0936,  0.0636, -0.0181,  0.0654,  0.1993,  0.0407, -0.0656,
        -0.6972,  0.0023,  0.0376, -0.1297, -0.0680,  0.0701, -0.5151, -0.6653,
         0.0638,  0.0081, -0.0179, -0.0076, -0.0555,  0.0520,  0.0440,  0.0453,
         0.3224,  0.1923,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6007e-01,  3.0737e+00, -2.1538e-01,  6.3445e-02,  3.6204e-02,
         5.1482e-01, -8.4427e-03, -1.6196e-01,  6.3269e-02,  1.6324e-01,
        -2.8117e-01, -8.5690e-02,  5.7286e-01,  8.7241e-01, -2.0821e-04,
         8.5027e-02,  2.9833e-01,  1.4683e-01,  3.7825e-01,  8.4248e-01,
         1.0237e-02,  1.2218e-01,  1.4708e-01,  1.0810e-01,  4.8486e-02,
        -1.2662e-01, -2.1723e-01,  1.8400e-01, -3.3059e-02,  3.7474e-01,
         1.9819e-02, -2.0996e-01,  2.4966e-01, -4.1033e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-0.1618, -3.1725, -0.0238, -0.0707, -0.4681, -0.7037, -0.1743,  0.0108,
        -0.0298, -0.8028, -0.1099, -0.1016, -0.3820, -1.0640, -0.1153,  0.0100,
        -0.0122,  0.1439,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3035,  0.0458,  0.1188,  0.2846,  0.0383, -0.0724,  0.0699, -0.0061,
        -1.0827, -2.1028, -0.0456,  0.0629, -0.3375, -0.1056,  0.1040,  0.0695,
        -0.0580,  0.2033,  0.1148, -1.4774,  0.0611,  0.1338, -0.3568, -0.2204,
        -0.0636,  0.3621, -0.3378,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6710e-01, -1.6975e-01,  1.0220e-01, -4.3142e-02, -4.2532e-03,
         1.6366e-01,  9.4265e-02,  4.0400e-01,  1.5180e+00,  1.8262e-01,
         2.9144e-02,  3.7089e-01,  1.7920e+00,  6.7583e-02, -1.5703e-03,
        -1.0634e-02,  5.2904e-02,  6.0607e-02,  1.7970e-01,  1.9928e+00,
         8.4126e-02,  5.4516e-03,  2.9716e-01,  1.5317e-03, -5.8034e-01,
         1.5104e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1234, -1.8571, -0.0805, -0.9570, -0.7800,  0.2917, -0.0214, -0.1089,
        -0.7156, -0.1819, -0.7598, -0.5308,  0.0666, -0.0889, -0.7692, -0.7213,
        -0.5665, -0.6697,  0.0454, -0.0777,  0.4922,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1720, -1.5589, -0.9194, -1.0337, -0.2061,  0.0884, -0.6187, -0.0397,
        -0.1541,  0.0759,  0.0701, -0.4897,  0.0383,  0.0785,  0.0590,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3305, -0.1405, -0.0473,  0.0290, -0.2808,  0.0239,  0.0585, -0.1730,
        -0.0421,  0.1510, -0.0291,  0.1298, -2.4044, -0.2509, -0.1832, -0.1654,
        -0.1341,  0.0667,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3712, -2.7291, -0.8901, -1.1834, -0.1642, -0.0845,  0.0111, -0.2188,
        -0.0305, -0.6343, -1.1196, -0.6972, -0.1598, -0.6037,  0.0247, -0.0400,
         0.2179, -0.6786, -0.7918,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0386, -1.0522, -0.7430,  0.0348, -0.0607, -0.2161,  0.1218, -0.0580,
        -0.1345,  0.0342, -0.0664, -0.0245, -0.2093, -0.0260, -0.0706,  0.0533,
        -0.3506, -0.0119,  0.0047,  0.1263, -0.4512, -0.0201, -0.3118, -0.0066,
        -0.2695,  0.0262, -0.3338,  0.1431,  0.0246, -0.3266, -0.0521,  0.2048,
        -0.3825, -0.0079, -0.3291, -0.1620, -0.4607,  0.0557, -0.0720, -0.0272,
         0.0058, -0.0409,  0.0734,  0.0377], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3569, -1.6768, -0.1393, -0.6064, -0.4305, -0.0936, -0.1876, -0.0238,
        -0.5794, -0.1507, -0.1341,  0.1211,  0.0113, -0.0146,  0.1642, -0.0205,
         0.6222, -0.5131,  0.0045, -0.2535, -0.3220,  0.0072, -0.0295,  0.0334,
        -0.0964,  0.0645, -0.0855, -0.5671,  0.0740,  0.1012,  0.0251, -0.3476,
        -0.0235, -0.4460, -0.1172,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3274e-01, -3.8487e+00, -1.5260e-01, -9.9648e-01, -2.3800e-01,
        -7.5822e-02, -7.7728e-01, -6.1270e-01,  2.0556e-03,  3.6441e-02,
         2.2614e-02, -4.4235e-01, -6.5314e-01, -9.5665e-02,  1.5042e-02,
        -5.0784e-01, -1.9251e-02, -3.9183e-02,  1.0157e-01,  6.6359e-02,
         6.4521e-02,  6.8622e-02, -1.8544e-01,  1.1317e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0017e-01, -1.7627e+00, -1.4189e-01, -5.7862e-01, -2.8384e-01,
        -1.7385e-01, -8.6320e-03, -2.1423e-01, -5.5104e-01,  7.3957e-02,
        -2.6814e-01,  5.1828e-02, -5.0708e-03, -3.3724e-01, -3.9095e-01,
        -1.1233e-02,  4.3986e-02, -5.9176e-02, -3.7303e-01,  5.2954e-02,
         1.0759e-03, -2.6634e-01,  1.5520e-02, -1.9054e-01,  3.0706e-02,
        -2.4660e-01, -1.9755e-01, -1.8919e-01, -3.2416e-02, -6.6014e-02,
        -2.5021e-01,  2.9977e-02, -4.6565e-02,  1.8490e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4900,  0.0441, -0.1251, -0.1756,  0.0706,  0.0281, -0.8558, -0.1058,
         0.1172,  0.0225,  0.1279, -0.0996, -0.6129, -0.0515, -0.7792, -1.1667,
        -0.0710,  0.0371, -0.0254, -0.0033,  0.0623,  0.0132,  0.0380, -0.6051,
        -0.3221, -0.0447, -0.1886, -0.0536,  0.0131, -0.0482,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.2637, -0.1391, -0.4595, -1.8544, -0.2241, -0.0929, -0.1480,  0.0114,
        -1.0183, -1.1584, -0.0212, -0.2957,  0.0207,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1937,  0.0486, -0.0603,  0.0331, -0.0507, -0.1758,  0.0131, -0.8994,
        -1.0485, -0.0088, -0.0789, -0.4223, -0.1628, -0.2521, -0.9848, -0.0145,
         0.0412, -0.0174,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4460, -1.4489, -2.6262,  0.7631, -0.7700,  0.2247, -0.2693, -0.0496,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4126e-01, -2.1034e+00, -5.6358e-02, -3.8514e-01, -2.7625e-01,
        -1.5356e-01, -2.0607e-01, -8.0917e-02, -2.4293e-02, -1.7712e-01,
        -3.0757e-02,  1.7917e-01, -1.6734e-01, -2.9567e-02,  3.5134e-02,
        -9.5682e-02,  4.5529e-02,  5.7750e-02, -3.9069e-02, -6.4190e-04,
        -3.6585e-02,  9.4132e-02, -2.2283e-01, -2.5061e-01,  9.1304e-03,
        -2.2900e-01, -4.9023e-02,  7.5279e-02, -7.4355e-01, -1.7640e-01,
         1.0821e-01, -5.0390e-01, -7.5522e-02, -1.0015e+00, -1.5784e-01,
        -2.1887e-01, -1.8021e-01, -8.0604e-02,  1.0484e-01,  5.7519e-02,
        -7.1538e-02, -1.3681e-01, -1.2310e-01, -2.7313e-02,  8.2113e-02,
        -7.0779e-02, -1.5151e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3255,  1.7787, -0.7623,  1.8233,  0.7753, -0.5774, -0.6148,  0.3003,
         1.0668,  1.0750, -0.4138,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5600e-01, -3.7530e+00, -1.0895e-01, -4.2881e-01,  4.2110e-02,
         1.0277e-03, -3.6333e-01, -1.4593e+00,  2.7144e-01, -1.1350e+00,
        -4.2016e-01,  5.2185e-02, -8.6207e-01, -1.0324e-01, -3.1968e-02,
         1.1281e-01,  5.9557e-02,  2.4830e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8417, -1.0977, -2.5597,  0.0596,  0.6563,  0.1319, -1.1515,  0.0893,
        -0.3931,  0.0061,  0.0689,  0.1379,  0.0713,  0.0704,  0.5011,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7713, -3.8172, -0.1941,  0.1838,  0.1519, -0.5533, -1.2209, -0.2558,
        -0.1788, -0.5203, -0.6979, -0.0643, -0.0291,  0.0194,  0.4932,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2106,  0.0356, -0.0504,  0.3897,  0.0452,  0.1474, -0.0081, -0.0268,
        -0.4599, -0.3325, -0.2926, -0.7348, -1.0529, -0.0637, -0.0047, -0.1179,
        -0.1857, -0.0339,  0.0419,  0.0527, -0.5801, -0.9870,  0.0593,  0.1026,
         0.0189, -0.1126,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6730, -0.0268,  0.2472,  0.0085,  0.2127, -0.0066, -0.3109, -1.2122,
        -2.6271, -0.1581, -0.4707,  0.0166, -0.1430,  0.5088,  0.2418,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0646, -3.0170,  0.0086, -0.3734, -0.7855, -0.2084, -0.4056, -0.0071,
        -0.0265, -0.9821,  0.1773, -0.0175, -0.1776, -0.0529, -0.0450, -0.0271,
        -0.0861,  0.0176, -0.3700,  0.0648, -0.2235,  0.0845,  0.1496,  0.1383,
         0.0481,  0.3814,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2617,  0.1153,  0.0750,  0.1465, -0.0741, -0.2542, -0.0759,  0.0475,
        -0.2817, -0.3660, -0.6805, -0.0940, -0.0878, -0.0693, -0.7687, -0.4032,
         0.1472, -0.2958,  0.0276, -0.3147, -0.1385, -0.0256, -0.1444, -0.1426,
         0.2611, -0.0725,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-1.1312e+00, -3.7280e+00,  1.2691e-01,  2.5877e-01, -5.5599e-02,
         1.3189e-01, -4.3494e-01, -5.3731e-01, -5.6623e-04, -3.5368e-01,
        -1.0465e-01, -3.9211e-01, -7.9980e-01,  2.2816e-02, -3.4166e-01,
        -7.0985e-02, -1.8116e-02, -5.0244e-02, -1.6982e-02,  3.0273e-02,
         2.2940e-01, -3.8395e-02,  2.0145e-01, -3.7117e-01, -7.2715e-01,
         2.2079e-01, -2.4762e-01, -1.0852e-01,  2.9389e-01, -2.0167e-02,
         2.0384e-02, -2.1006e-01, -5.6805e-02, -1.5296e-01,  7.1619e-01,
        -2.8575e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2016, -0.9579, -0.3319,  0.0404, -0.3526,  0.0573, -0.2860, -0.6024,
         0.0659,  0.0879, -0.2390,  0.0318, -0.0244,  0.0426, -0.0538, -0.1341,
         0.0212, -0.0103,  0.0763, -0.0043, -0.0638, -0.6274, -0.0215, -0.0476,
        -0.2700, -0.4959, -0.0452, -0.2808, -0.3050,  0.0616, -0.0235, -0.3229,
        -0.3199,  0.0604, -0.1200, -0.1452,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2042,  0.0454,  0.1083, -0.1905, -0.0848, -1.2131, -0.2531, -0.1456,
         0.0039, -0.0615, -0.1903, -0.4740, -1.2913,  0.6402,  0.0495, -0.0847,
         0.0939,  0.0769, -0.3146,  0.0240,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3041e-02, -8.2903e-01, -1.6380e-01, -1.9880e-01,  6.4622e-02,
        -1.3369e-03,  1.4992e-02, -4.0103e-01, -2.7120e-02, -1.0561e-01,
         4.3262e-02, -2.9704e-02,  2.7655e-02,  5.2355e-02, -4.2420e-01,
        -3.2633e-01, -6.2631e-02, -2.9870e-01, -5.6298e-02, -4.3993e-02,
        -8.5782e-02, -1.0420e-02, -9.5870e-03,  1.1671e-01,  3.1607e-02,
        -1.6548e-02,  7.3013e-04, -2.8142e-02,  1.9210e-02, -1.3237e-01,
        -5.6235e-01,  5.7244e-02,  3.0076e-02, -5.0726e-01, -7.5186e-03,
        -5.4100e-02, -2.0240e-02,  6.7598e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0523e-01, -1.6254e+00, -7.9826e-01, -6.2485e-01, -1.4703e-03,
         2.5150e-02, -2.0480e-02, -3.8783e-02, -4.5343e-02, -1.5164e-01,
        -3.4263e-01, -5.4472e-01, -4.7322e-02, -1.1689e-02,  9.7926e-03,
        -2.7123e-01,  4.7380e-02, -2.1504e-01, -2.7881e-03,  4.1580e-02,
         4.7497e-02,  3.6012e-02,  9.0546e-02,  9.3833e-02, -7.0622e-03,
        -3.9240e-02, -9.2817e-02, -2.3974e-01, -4.3181e-01,  2.9448e-02,
        -4.9139e-02, -2.8673e-02,  7.3166e-02,  2.9134e-03, -1.9269e-01,
         5.0070e-02,  4.6202e-02,  4.1019e-03,  3.8849e-04, -2.8127e-02,
         8.8039e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0359, -3.1413, -0.1263, -0.3020, -0.0871, -0.6372, -0.0033, -0.0425,
        -0.1353,  0.0738,  0.0220, -0.6452, -0.5889, -0.0803,  0.1236, -0.5062,
        -0.0077, -0.3487, -0.0164,  0.0590,  0.0690,  0.0055,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0927,  0.0472,  0.0169,  0.0138,  0.0528, -0.1705, -0.0520,  0.0731,
         0.0566,  0.0483,  0.0622, -0.2051, -0.0198, -0.1854,  0.0131, -0.4793,
         0.0719, -0.1795, -0.0041, -0.2906, -0.4281,  0.0369,  0.1424, -0.2031,
         0.0307, -0.0227, -0.0439, -0.1347, -0.4607,  0.0819, -0.2842, -0.1605,
        -0.1018, -0.0243, -0.0489,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0699, -0.0921, -1.6033, -0.1324, -0.3566,  0.0078,  0.0944, -0.0786,
         0.1105, -0.0124, -0.3374,  0.0599, -0.0844,  0.0815, -0.0082, -0.1314,
         0.2667, -0.1389, -0.5813, -0.0658, -0.3985,  0.1526, -0.0930, -0.4592,
        -0.1497, -0.0686, -0.3037,  0.1218, -0.2192,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3389e-01, -2.0991e+00, -4.6852e-01, -2.9522e-01, -1.3987e-01,
        -3.8407e-01,  3.7681e-02, -8.3927e-05, -1.4956e-01, -1.8248e-01,
        -2.3922e-01, -2.4182e-01, -2.6873e-02, -2.5285e-02,  2.9035e-02,
        -2.0824e-01, -1.9343e-01, -8.7512e-02, -2.1008e-01,  5.6751e-02,
        -4.5349e-02, -1.2152e-01, -7.1982e-02, -9.9814e-02,  1.0605e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3046,  0.0646, -0.0484,  0.0428, -1.2835,  0.0685, -0.2249, -0.6710,
        -1.1507, -0.1556, -0.0803,  0.1574, -0.0301,  0.2745, -0.6101, -0.5788,
         0.1301, -0.4688, -0.0242,  0.0243,  0.0098,  0.0689,  0.2080,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1077,  0.0490,  0.0341,  0.0784,  0.0805,  0.0375, -0.0507, -0.0545,
        -0.0222, -0.0471,  0.0081, -0.0053, -0.4124, -0.3312, -0.0315, -0.1655,
        -0.0178, -0.3885,  0.0293,  0.0313,  0.1045,  0.0668,  0.0402,  0.0077,
        -0.0797, -0.0965, -0.7061,  0.0638, -0.4540,  0.0607, -0.4261, -0.0782,
        -0.0894, -0.0835, -0.2880, -0.0304, -0.0384,  0.1055, -0.0878,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0106e-01, -1.4515e-01, -9.9018e-05, -8.5368e-01, -1.8428e+00,
         2.2451e-01, -5.3822e-01, -2.6378e-01, -9.0884e-01,  4.9192e-01,
        -1.9025e-01,  2.0497e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.4123e-01, -9.0673e-01, -1.3351e+00, -1.2727e-01, -1.6092e-02,
         1.2161e-01, -2.4577e-01,  6.1519e-02, -2.5683e-02, -2.6329e-01,
        -5.5424e-03,  1.4998e-02, -1.5493e-01, -2.5390e-02, -2.9036e-01,
         1.5445e-02, -4.2833e-02, -2.1672e-01,  3.4678e-03, -4.3804e-02,
        -6.5215e-02, -1.4718e-01,  3.2114e-02, -3.7941e-02, -1.8147e-02,
         3.4883e-03,  5.9558e-02, -3.9996e-02, -1.0622e-01,  5.5309e-03,
        -4.0904e-02, -7.1054e-03, -3.8293e-01,  2.1597e-02,  3.9962e-02,
         5.0768e-02, -2.6181e-01, -4.9582e-04,  2.5703e-03, -9.2538e-03,
         4.3749e-03,  9.6189e-03,  8.1646e-02, -6.2266e-02, -7.5096e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0551, -7.3708, -0.5110, -1.0381,  0.0506, -0.0787,  0.9932, -0.4649,
        -1.4216, -0.2355, -0.7579,  0.0520,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6074, -5.6988,  0.1986, -1.3945, -0.1241, -0.0247, -0.2208, -0.2287,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2153, -1.2539, -0.0512,  0.0122, -0.3663, -0.0831,  0.0102, -0.0270,
        -0.1258, -0.5149, -0.0910,  0.0252, -0.0260, -0.0057,  0.0295,  0.0137,
        -0.0059, -0.0298, -0.0116, -0.0336,  0.0237,  0.0458, -0.0305, -0.0065,
        -0.0569,  0.0170,  0.0054, -0.0035, -0.2454, -0.0293, -0.0110, -0.0713,
         0.0104,  0.0109,  0.0039,  0.0309,  0.0877,  0.0216, -0.0130,  0.0149,
        -0.0414, -0.0200,  0.0577, -0.0595, -0.0838,  0.0940, -0.5363,  0.0288,
        -0.5143, -0.8268, -0.2759, -0.0018, -0.2239,  0.0575, -0.1249, -0.0481,
        -0.1372, -0.0380,  0.0260, -0.0329,  0.1416], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1588, -0.0420, -0.0226, -0.0676, -0.3244,  0.1248, -0.2105,  0.0252,
        -0.1516,  0.0023, -0.0557, -0.5850,  0.0766, -0.5633, -0.5453,  0.0221,
        -0.1347, -0.0478, -0.2303, -0.1532, -0.4035, -0.3794, -0.1266,  0.0720,
        -0.2437, -0.2693, -0.0376, -0.0703, -0.0859, -0.1483, -0.0778,  0.1546,
        -0.4475,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4585e-01, -1.7553e-01, -8.6085e-01, -9.5901e-01, -1.2989e-01,
         7.9569e-02, -5.6918e-01, -9.9760e-04,  2.1196e-02, -8.0764e-02,
        -1.9720e-02, -1.3772e-02, -3.0761e-01, -1.2379e-01, -6.9232e-02,
         1.8743e-01, -8.3350e-02, -6.4612e-01, -1.7310e+00,  2.2342e-01,
        -8.4254e-02, -3.0609e-01,  1.3010e-01, -7.5047e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0404, -0.5065, -0.2120, -0.2980, -0.0198, -0.6649,  0.0441, -0.3539,
         0.0379, -0.5513, -0.4358, -0.6510, -0.9048, -0.0515,  0.1007,  0.0548,
         0.0297, -0.1454,  0.0984,  0.0043, -0.1748, -0.1873,  0.1405,  0.0214,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1236,  0.1295, -0.4607, -0.4606, -0.0334, -0.0220,  0.0878, -0.2393,
        -0.0090,  0.0668,  0.0323, -0.0054, -0.0100, -0.0165, -0.2464, -0.7194,
        -0.4598,  0.0597,  0.0190, -0.0853, -0.3768,  0.0139, -0.2978, -0.5089,
         0.1025,  0.0654, -0.2933, -0.0226, -0.0469, -0.0957, -0.0549, -0.0537,
         0.2613,  0.0481,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0385, -0.1169, -0.0221,  0.0953, -0.6395, -0.0028, -0.0833, -0.0078,
         0.0734, -0.0611, -0.0118,  0.0307, -0.1090, -0.7742,  0.1240,  0.1936,
        -0.8338,  0.1389, -0.4569,  0.0458, -0.5718, -0.8915,  0.2302,  0.0391,
         0.0956,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6379, -0.1179, -0.3103, -0.0163,  0.0940,  0.3423, -0.7074, -0.3646,
        -0.9323, -1.4798,  0.3031, -0.3399, -0.7300, -0.2638,  0.3844, -0.0945,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4680e-02, -1.3874e+00, -3.4124e-02, -1.0626e+00, -2.6119e-01,
        -4.4685e-01, -9.1736e-01, -4.5433e-02,  5.1637e-02, -3.2904e-04,
        -2.1654e-02, -4.3170e-02, -3.0353e-01,  1.0228e-02,  3.5033e-02,
        -1.2597e-01, -2.8464e-01, -6.3472e-01, -3.6477e-01, -1.6634e-01,
        -3.9685e-02,  1.9464e-01,  1.2155e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1584, -0.3283, -0.3928, -2.2694, -4.4026, -0.3684,  0.3763, -0.0774,
        -0.3409,  0.3750, -0.2014,  0.0551,  0.2181, -0.2746,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.3847, -0.4463, -0.3680, -0.6067, -0.0824,  0.0921, -0.3246, -0.0594,
        -0.2031, -0.0625, -0.5429, -0.5221, -0.0594, -0.0639,  0.0189,  0.0152,
         0.1525, -0.5351, -0.0640, -0.3688, -0.1073, -0.0524,  0.0320, -0.1085,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3217,  0.0914, -0.6431,  0.0846, -1.0091,  0.1031, -1.7055,  0.0220,
         0.1514, -0.0029,  0.0636,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0341,  0.0206,  0.0994, -0.0060,  0.1421, -0.3305, -0.7553, -0.2681,
        -0.5636, -0.1008, -0.3985,  0.0609, -0.3900,  0.0051, -0.3418, -0.1613,
        -0.1318, -0.4417, -0.0516,  0.0515, -0.1600,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4037,  0.2593, -0.0518,  0.1206,  0.2805,  0.0431, -0.2627, -1.7697,
        -0.4269, -1.5485, -0.1760,  0.1463,  0.1289, -0.2210,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0997, -0.4980,  0.0299, -0.1307,  0.0116, -0.0094, -0.1200,  0.3212,
        -0.0290, -0.0386, -0.7043, -0.2215, -2.3317, -0.0350, -0.0974, -0.1076,
        -0.0743, -0.1339,  0.2721,  0.4225,  0.3560,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9076,  4.6870,  0.2206,  1.9932, -0.3097,  1.4273,  0.5245,  0.9553,
         0.4502,  1.1315,  0.4216,  0.0542,  0.0617,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1232, -0.9602, -0.0499, -0.0767, -0.0905, -0.0752, -0.3476, -0.4193,
        -0.1343,  0.0350,  0.0103,  0.0194,  0.0510,  0.0192, -0.0128, -0.1350,
        -0.0100,  0.0206, -0.0399,  0.1026, -0.0848,  0.0703,  0.0207, -0.0498,
        -0.6802,  0.1375, -0.4153, -0.5578, -0.4156, -0.0920, -0.0806,  0.0450,
        -0.0207, -0.0182,  0.0603,  0.0173, -0.2359, -0.0262,  0.0491, -0.1093,
        -0.2356, -0.0416, -0.0634,  0.1852,  0.1364], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1514, -0.2927, -0.5944, -0.1087, -0.0434,  0.0798,  0.0097, -0.2545,
        -0.0384, -0.0628, -0.5229,  0.0483, -0.0820,  0.1442, -0.7959, -0.1569,
        -0.8740, -0.0107,  0.0763,  0.0593,  0.1454, -0.5524,  0.0436, -0.5639,
         0.0943,  0.1279, -0.0387, -0.0723, -0.2814, -0.0584, -0.0230,  0.0255,
        -0.1123,  0.0343,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3942,  0.0100,  0.1075, -0.2735, -0.0124, -0.0511,  0.2023, -0.0069,
         0.4603,  0.0608, -0.0622, -0.1137, -0.0398,  0.0684,  0.0435,  0.2261,
         0.1293,  0.3493, -0.2768, -1.9830, -2.8055,  0.3456,  0.1619, -0.8988,
        -0.0897, -0.2640,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0657, -3.0352, -0.1001, -0.7328,  0.2450, -0.0479, -0.0352, -0.9240,
        -0.0750, -0.0302,  0.0744,  0.0222,  0.0131,  0.0688, -0.1505, -0.9577,
        -0.7138,  0.2378, -0.5270, -0.2525, -0.5979,  0.0896,  0.2680,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2842, -0.9561, -0.1777, -0.2312, -0.0184,  0.0023, -0.0878, -0.3686,
        -0.6086, -0.0442,  0.0139, -0.0112,  0.0744, -0.0045, -0.6781, -0.0129,
         0.0776, -0.2717, -0.3749, -0.0584, -0.0087,  0.0140,  0.0207,  0.1326,
         0.0022, -0.0261, -0.4349, -0.6061, -0.0629, -0.0786,  0.0428, -0.0247,
        -0.0162,  0.0264, -0.0632, -0.0066, -0.1762,  0.0400, -0.1715,  0.0511,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1080,  0.0891,  0.0569, -0.1733, -0.4825, -0.0023, -0.1353, -0.2974,
         0.0278, -0.0430,  0.0106, -0.0599, -0.2332, -0.0274, -0.1235, -0.3334,
        -0.3405, -0.0168, -0.1369, -0.2180,  0.0062, -0.3925, -0.4670, -0.0299,
         0.0158,  0.0359, -0.0420, -0.3460, -0.0411, -0.1209, -0.2551, -0.0233,
        -0.0146,  0.1412,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0539, -0.0321, -0.0622, -0.1104, -0.3902, -0.0563,  0.0252, -0.0485,
         0.0288,  0.0075,  0.0099, -0.1454,  0.0511, -0.0657, -0.6802,  0.1363,
         0.0373, -0.0132, -0.2551, -0.0105,  0.0238,  0.0185, -0.0018,  0.0412,
        -0.4491, -0.6011,  0.1920, -0.1358, -0.0170, -0.0582, -0.1230,  0.0043,
        -0.0042, -0.0153, -0.1976,  0.0142, -0.1493, -0.2226, -0.0370, -0.1686,
        -0.1519, -0.0944, -0.2354, -0.0605,  0.0382,  0.0037, -0.0097,  0.0186,
        -0.0408, -0.0132, -0.0434, -0.0581], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8372e-01,  1.2354e-02, -8.0261e-02, -9.8545e-02, -9.3404e-01,
        -3.2032e-02, -2.0494e-01,  2.0286e-01,  4.7135e-02,  7.6070e-03,
         1.5884e-01,  5.1229e-02, -1.3520e-01, -6.9635e-01, -4.6228e-02,
        -2.5791e-01, -7.5296e-01, -2.9478e-01, -6.6581e-01, -6.7435e-02,
        -2.2778e-01,  3.9421e-02, -2.1033e-01, -3.5038e-01, -1.4855e-02,
        -1.2512e-02,  5.7625e-02, -3.9428e-04,  8.5855e-04,  3.2752e-02,
        -3.2178e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3780, -1.7072,  0.0115, -0.1998,  0.1037,  0.2305, -0.2210, -0.0088,
        -0.0212,  0.1564,  0.0721, -0.5303,  0.2472, -0.2234, -0.9140, -0.6787,
        -0.0106, -0.7360, -1.0525, -0.7585, -0.2816, -0.0838,  0.0776, -0.0994,
        -0.1060,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0807,  0.0321, -0.1015,  0.1505, -0.2284, -1.9258, -2.0948, -0.3278,
         0.1123, -0.0210, -0.0792,  0.1710,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2365, -2.2146, -0.1409, -0.5947, -0.9197,  0.0238,  0.1546,  0.0424,
         0.0707,  0.0434,  0.1873, -0.0433, -0.3614, -0.9639, -0.1633, -0.7067,
         0.0367,  0.1034,  0.0296,  0.1147, -0.2106, -0.0233, -0.0047, -0.2100,
         0.0869, -0.2417,  0.2136, -0.4522, -0.1596,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7725, -3.7680,  0.3178, -0.4997,  0.3810, -1.0450, -1.0720,  0.3875,
         0.0098, -0.1110,  0.1360, -0.0381,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6997, -3.5885, -0.3920, -1.1775,  0.0036, -0.7774, -1.1625, -0.1892,
        -0.4181, -0.0765, -0.2023,  0.2650,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0775,  0.2306, -1.4280, -0.1341, -0.8310,  0.0774,  0.0357, -0.0404,
        -0.6154, -0.1020,  0.1050, -0.4568,  0.0317,  0.1560, -0.0600, -0.4270,
        -0.0464,  0.0262, -0.1267,  0.2938,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0065, -1.3391, -0.1364, -0.4046, -0.7294, -0.0993, -0.1752, -0.0958,
        -0.0977, -0.1179, -0.1136, -0.4421, -0.2445, -0.0539, -0.0911,  0.0365,
        -0.2347,  0.0614,  0.0029,  0.0062, -0.3302,  0.1268,  0.0888,  0.0611,
         0.0355,  0.0692,  0.0395, -0.1385, -0.1280, -0.0049,  0.0284,  0.0150,
        -0.2076, -0.3042, -0.0566,  0.0833,  0.0384,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1237,  4.4667,  0.3466,  0.2449,  0.1523,  0.1263, -0.2494,  0.1211,
        -0.1309,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0834,  0.1308,  0.0433, -0.6834, -0.0302,  0.2955, -0.4420, -0.4508,
         0.1832,  0.0390, -0.0737, -0.1015, -0.9299, -0.8706, -0.0066,  0.0624,
        -0.0364, -0.1178, -0.3473, -0.5932,  0.0387,  0.0776,  0.0156,  0.0159,
        -0.0582,  0.1354,  0.0386,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0740, -2.5536, -0.0605, -0.6071, -0.2736, -0.2804, -0.0585,  0.0239,
        -0.0640,  0.0331, -0.0499, -0.7377, -0.2525,  0.0778, -0.0618,  0.0367,
        -0.5621, -0.0056,  0.0407,  0.0261,  0.0601, -0.5585, -0.5100,  0.0209,
        -0.0128, -0.0152, -0.0085, -0.1540, -0.9155, -0.1125, -0.0243,  0.0856,
         0.0603,  0.3631,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-3.6229e-02, -3.3294e-03,  5.3007e-02,  1.1957e-02, -4.7415e-01,
        -5.0321e-02,  7.7134e-02,  6.7439e-02,  9.4078e-02,  5.3343e-02,
        -1.2731e-02, -3.8295e-01,  3.6684e-03,  1.1171e-02,  1.3762e-02,
        -4.2755e-01, -5.1649e-01, -2.3706e-02, -1.6408e-01, -4.1271e-02,
         5.2335e-02,  1.5150e-02, -1.7296e-01,  5.0664e-02, -5.6789e-02,
         1.6741e-02, -1.2290e-04, -5.9728e-02,  1.7541e-02, -4.8464e-02,
         2.6154e-02, -3.8567e-01, -6.8942e-01,  1.2918e-01, -5.7041e-02,
        -2.3072e-01, -2.0016e-01, -6.7280e-02,  9.4777e-02,  9.8041e-03,
        -1.0585e-01, -2.3254e-01,  4.0703e-02,  1.1607e-01, -3.8184e-02,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6932e-02, -8.8480e-02,  1.5859e-02, -1.2303e-01, -3.2618e-02,
        -9.1277e-01, -7.8427e-01,  5.2838e-02, -8.4318e-02, -4.9814e-01,
        -1.1220e-01, -8.9666e-04, -4.4714e-01, -4.7201e-02,  1.0673e-01,
        -3.1625e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1147,  0.1025, -0.0853,  0.1586,  0.1097, -0.0567, -0.0296, -0.1066,
        -0.9407, -1.3461, -0.4735, -0.6230, -0.1398, -0.8977, -1.1669, -0.1089,
        -0.0901, -0.1013,  0.0173, -0.0759, -0.0065,  0.1761, -0.0605, -0.2134,
        -0.1831, -0.2203, -0.2331,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2531, -0.0399, -0.3511,  0.0434, -0.1489,  0.0205, -0.1063, -0.0406,
        -0.1207, -0.0586, -0.0075, -0.0568, -0.1205,  0.0068, -0.0896, -0.0069,
        -0.0934, -0.0082,  0.0335, -0.1211,  0.0223, -0.1197, -0.0440, -0.3225,
         0.0542, -0.0033, -0.1392, -0.2501,  0.0462, -0.1772, -0.0280, -0.1812,
        -0.2019, -0.0510, -0.0123, -0.2341, -0.0779, -0.3258,  0.0498,  0.0141,
        -0.1194, -0.1983, -0.0383,  0.0418,  0.0850, -0.0565], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4107e-01, -3.3329e-01,  4.6358e-02, -1.0143e-01, -6.7638e-02,
        -1.4890e-01,  1.0246e-02,  1.1079e+00,  1.8417e-01,  2.1406e-02,
        -5.4466e-02, -7.7199e-02, -1.7192e-02, -7.7024e-05,  2.2009e-01,
         6.2464e-02, -8.5154e-02,  1.8909e-01,  1.0948e+00,  2.5226e+00,
        -3.4595e-03,  9.5967e-02,  1.2748e-01,  1.2016e-01, -1.3567e-03,
         9.0624e-02,  6.0685e-01, -1.5847e-01,  5.8583e-02, -4.3216e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2631,  0.0564,  0.0215,  0.2106, -0.0019,  0.3180,  0.5149,  1.7761,
         0.4519, -0.0059, -0.0617,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1163, -3.1941,  0.3560, -0.1527, -0.3637, -0.1291, -0.3440, -0.9638,
         0.0732,  0.0367,  0.1209,  0.0963, -0.1387, -0.0864, -0.1065, -0.4743,
        -0.0434, -0.3677, -0.2012,  0.1115,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7865, -3.4341, -1.8821, -0.8478,  0.3068, -0.6432, -0.6872,  0.0134,
        -0.0389, -0.2083,  0.3048,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5204, -1.8387, -0.6689, -0.6256, -0.1072,  0.0379, -0.0390, -0.8928,
        -0.6440, -0.0833, -0.0832, -0.1610, -0.1587, -0.1671, -0.0522, -0.2850,
         0.0891,  0.0176, -0.0360, -0.0334,  0.0996,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3101e-02, -2.6613e+00, -1.4702e+00, -1.1511e-01,  5.6111e-02,
        -1.9721e+00, -3.0196e-01,  1.6650e-02, -2.1063e-01, -9.1341e-02,
        -5.1210e-02, -4.4091e-01,  7.0829e-02, -3.9432e-02,  4.8860e-02,
        -1.0936e-03,  9.9494e-03,  1.5826e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1673, -0.8514, -1.2591, -0.0249, -0.0821,  0.1406, -0.0786,  0.0736,
         0.0331, -0.0833,  0.0681,  0.0274, -0.1070, -0.4013, -0.3314,  0.0174,
        -0.2190, -0.2915, -0.1141,  0.0149, -0.0946, -0.3833, -0.7623, -0.0837,
        -0.2712, -0.1005, -0.4270, -0.0910, -0.3495, -0.2356,  0.0249, -0.0183,
        -0.0030,  0.1000, -0.0560,  0.0335,  0.0490, -0.0519, -0.0471,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4557, -2.9017,  0.0248, -0.7634, -0.0340, -0.1545, -0.0756, -0.0062,
        -0.2768, -0.1699, -0.0579, -0.9734,  0.0207,  0.0165,  0.1143,  0.0615,
        -0.6143,  0.0237, -0.0595, -0.0285, -0.0266, -0.0252,  0.0274,  0.0160,
         0.0448, -0.0067, -0.1495, -0.0280, -0.2312, -0.3608, -0.0054,  0.0861,
         0.0951, -0.0850,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.2914,  0.1171, -1.0635, -1.5923,  0.2482, -0.7847, -0.0044, -0.4013,
        -0.6326,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0607, -1.0513, -0.2457, -0.4056, -0.1573, -0.5150, -0.0266, -0.3191,
        -0.2335, -0.0289, -0.2829, -0.1369, -0.0191, -0.1234, -0.0188, -0.5618,
        -0.1258, -0.2265, -0.0303, -0.0310,  0.0240,  0.0517,  0.0207,  0.0404,
        -0.3878, -0.0044, -0.0303,  0.0142, -0.0261, -0.0181, -0.1528, -0.3668,
         0.0062, -0.1986,  0.1510, -0.0341,  0.0212,  0.0119, -0.0641,  0.0864,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0755, -0.4239, -1.2714, -0.0698, -0.3932, -0.1430, -0.0655, -0.0684,
        -0.3849, -0.3156, -0.0314, -0.0028,  0.0039, -0.0084, -0.1224,  0.0084,
         0.0694,  0.1153, -0.3742,  0.0470, -0.2484, -0.0125,  0.0867, -0.0255,
        -0.0186, -0.1165, -0.1360,  0.0942, -0.2372, -0.3498,  0.0815,  0.0112,
        -0.0069, -0.0356,  0.0490,  0.0140, -0.1938, -0.1467, -0.0152, -0.0039,
         0.0722,  0.1001,  0.0562, -0.0308, -0.0073,  0.0074, -0.0501,  0.0358,
         0.0272, -0.0280,  0.0850,  0.0117,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0762, -1.4765, -0.1594, -0.4138, -0.2302, -0.0430, -0.8012, -0.1154,
        -0.1676, -0.0165,  0.1852, -0.1691, -0.7380, -0.1149, -0.5301, -0.4841,
        -0.3322, -0.0842, -0.4452, -0.0573, -0.2788, -0.1326,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3551e-01, -3.8357e+00,  2.1774e-03, -2.7084e-02,  7.7886e-02,
         3.4730e-03,  1.4786e-01, -5.1810e-01,  6.9794e-02, -1.3667e-01,
         5.0818e-02, -4.5184e-01, -2.0758e-02, -1.6037e-01, -1.8846e-03,
        -7.9461e-01, -2.9301e-01, -1.7553e-01,  1.6955e-02,  2.4556e-01,
        -2.5097e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0053, -0.9055, -0.1682, -0.1279,  0.0059, -0.0189, -0.1691,  0.0083,
        -0.1104, -0.0321,  0.0123, -0.0437,  0.0300, -0.0457,  0.0117,  0.0237,
         0.0464,  0.0030, -0.2260,  0.0340,  0.0239,  0.0053, -0.0331,  0.0088,
        -0.0175,  0.0067, -0.0089, -0.2008,  0.0157, -0.1494, -0.3816, -0.3266,
         0.0635, -0.1793, -0.1444, -0.0109,  0.0246,  0.0135, -0.0365, -0.0012,
        -0.2385,  0.0109,  0.0357, -0.0084, -0.2322,  0.0463, -0.2242, -0.1371,
         0.0090,  0.0025, -0.1008, -0.0304, -0.0351,  0.0180,  0.0112],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1461, -0.0243,  0.0837, -0.0876, -0.0010, -0.1518, -0.4354, -0.0665,
         0.0646,  0.0048, -0.0008,  0.0250, -0.0265, -0.1218, -0.3215, -0.0399,
        -0.0475, -0.1495, -0.1519, -0.4469, -0.0605, -0.2162, -0.2477,  0.0100,
         0.0300, -0.0315,  0.0472, -0.3573, -0.0760,  0.0677,  0.0070,  0.0013,
        -0.0827, -0.4028, -0.0083, -0.1881, -0.0452, -0.2180, -0.2130, -0.1878,
        -0.0334,  0.0045,  0.0603,  0.0157,  0.0715,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7183e-01, -1.8941e+00, -8.8644e-01, -8.5649e-01, -1.0537e-02,
        -7.6280e-02, -2.0209e-01, -1.4780e-01, -2.1585e-01, -9.3170e-04,
         1.3886e-01, -2.2275e-01,  7.5649e-02, -3.5165e-01, -5.1136e-01,
        -1.3209e-04, -1.0489e-01, -4.7842e-01,  1.6771e-01, -3.8200e-01,
        -4.3900e-01,  2.7527e-01,  5.2333e-02, -3.3304e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2791,  0.1474,  0.0290,  0.0378, -0.0770, -0.0336, -0.3521, -0.2994,
         0.0714, -0.0278,  0.0151,  0.0186, -0.0468, -0.2466,  0.0517,  0.0330,
        -0.2698, -0.3247, -0.3697, -0.4483, -0.0976,  0.0713, -0.2577, -0.6287,
         0.0131, -0.2869, -0.3208, -0.2178, -0.3532, -0.5233, -0.0163, -0.2990,
        -0.2716,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6653, -3.7259, -0.2411, -1.1127, -0.0316,  0.0275,  0.0194, -0.1951,
        -0.8725, -0.0146,  0.0548, -0.0779,  0.0626, -1.0159,  0.1279, -0.2058,
        -0.0555, -0.2754,  0.1689,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1611,  0.0290,  0.2292, -0.1459, -0.7192, -0.2799, -0.5850, -0.5705,
        -0.8039, -0.0243,  0.1399, -0.2156, -0.0498, -0.1182,  0.0229, -0.0064,
         0.0046, -0.3792, -0.3270, -0.0611, -0.3519, -0.5383, -0.0293, -0.0062,
         0.4554,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2466, -1.6564, -0.6803,  0.1524, -0.3034, -1.1745, -1.0316,  0.1582,
        -0.1961, -0.5372, -0.0935,  0.0117, -0.5673,  0.0284, -0.0947, -0.0337,
         0.1190,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-0.1689, -0.0678,  0.0801,  0.1407, -0.0024,  0.1056, -0.0773, -0.1973,
        -0.1106,  1.0765,  0.3579, -0.1621, -0.2107, -0.0902, -0.0543,  0.1379,
         1.0360,  1.0739, -0.1385,  1.0073, -0.3053,  0.9617,  1.3435,  0.0077,
        -0.1084, -0.2464, -0.1250, -0.1826, -0.0925,  0.1831,  0.0842, -0.6631,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8519e-01, -6.8278e-01, -1.0992e+00,  1.4009e-01, -7.3988e-03,
        -2.7784e-02, -4.8751e-01, -6.8200e-01, -6.6460e-02,  8.8704e-02,
        -1.0440e-03, -2.6063e-02, -9.7940e-02,  5.1406e-02, -6.4323e-02,
         9.3064e-02,  8.6482e-03,  6.2817e-02, -2.0163e-01, -1.0899e-02,
        -2.2890e-01,  4.5030e-02, -2.4556e-01, -5.5913e-01, -4.8036e-02,
        -8.0312e-02,  2.9150e-02, -1.0049e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3366e-02, -3.6348e-02,  2.8699e-04,  4.8545e-02,  5.7789e-02,
        -7.2833e-01, -1.0165e+00, -2.6324e-01,  8.0983e-02, -1.2417e-01,
         8.0110e-02, -3.5535e-01,  5.5916e-02,  5.6226e-03,  7.3101e-02,
        -3.6126e-01,  8.1638e-03, -5.9105e-01,  6.3324e-02,  8.1955e-02,
        -1.0904e-01,  2.2112e-01, -2.6797e-02,  8.7995e-02, -3.4461e-01,
         9.4623e-02, -3.0532e-02,  4.8975e-01,  3.4830e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0635, -1.2296, -0.1631, -0.3146, -0.8816,  0.0838, -0.0033,  0.0068,
        -0.0235,  0.0481,  0.0425, -0.1415, -0.2753, -0.6390,  0.1759, -0.3733,
         0.1260, -0.8083, -0.0441, -0.1980, -0.0261, -0.0480, -0.3050, -0.0286,
        -0.1474, -0.5937, -0.3626, -0.0356,  0.0138, -0.0402, -0.0453, -0.0517,
        -0.2474,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0367, -1.3009, -0.0609, -0.6558, -0.0651, -0.3603, -0.0882, -0.2180,
        -0.0805, -0.2758, -0.5680, -0.0154, -0.0539,  0.0088,  0.0397,  0.0392,
        -0.1816,  0.0364, -0.6671,  0.0845, -0.4562, -0.0256, -0.0414, -0.1579,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3484e-01, -1.1655e+00, -1.1561e+00, -1.0068e-01, -5.4865e-01,
        -1.6436e-02, -7.1671e-02, -4.0545e-01,  5.6711e-02, -2.3998e-01,
         1.5351e-01, -2.5912e-01, -5.1457e-01, -1.2293e-01,  3.6692e-02,
        -7.5317e-04,  3.6235e-02, -3.1432e-01, -5.6635e-02, -2.8796e-01,
        -2.0909e-01, -3.6514e-01,  3.2572e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4284e-02, -1.5294e+00,  6.0303e-02,  1.9756e-03, -1.8770e-01,
        -3.0632e-01,  6.4460e-02, -9.6624e-03,  1.9237e-03,  5.9043e-02,
         1.9579e-02, -7.2262e-02, -2.5332e-01, -4.9802e-02, -7.2284e-02,
        -1.8842e-01, -5.9349e-01, -3.9275e-01, -2.8010e-01, -5.9154e-02,
        -7.2118e-03, -8.0367e-02, -7.1878e-02, -3.4602e-02, -3.6513e-01,
        -8.8026e-02, -1.2441e-03,  2.7451e-02,  2.0941e-03, -2.4709e-01,
        -2.6678e-01, -7.4586e-02, -1.8194e-01, -4.5580e-02, -2.0804e-01,
        -2.7408e-01, -3.2259e-02, -4.1183e-01,  7.6644e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2478, -0.2628, -0.0630, -0.1125, -0.7472,  0.0149,  0.0283,  0.0186,
         0.0008,  0.0446, -0.0080, -0.0167,  0.0143,  0.0190, -0.0439, -0.2780,
         0.0514,  0.0492,  0.0101,  0.0580, -0.2541,  0.0115,  0.0743, -0.0375,
        -0.4849, -0.6200, -0.0635, -0.1318, -0.1158, -0.3753, -0.0430,  0.0171,
        -0.0223, -0.0366, -0.0580, -0.0085, -0.0999, -0.5042, -0.0205, -0.0141,
        -0.0599,  0.0186, -0.1935,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6145e-02, -1.2511e+00,  4.8823e-02,  3.1531e-02,  2.7908e-01,
         7.9106e-02, -6.8248e-02, -2.0152e-01, -5.1497e-01,  9.1131e-02,
         4.8703e-02,  2.7630e-03, -2.0658e-01,  1.2020e-02, -2.1080e-03,
        -4.9545e-02, -2.9382e-01, -4.5944e-01,  1.3768e-02,  1.4839e-02,
         5.7973e-02, -1.8425e-01, -6.2095e-02, -4.5272e-02,  1.0131e-01,
        -9.1421e-02,  3.0194e-02, -3.0508e-02, -1.4511e-02, -1.7328e-02,
        -1.0944e-01, -3.5753e-01, -7.6650e-01, -4.8068e-02, -4.8163e-02,
        -1.6312e-02, -4.4668e-02, -7.3248e-02, -3.0192e-01, -9.5781e-02,
        -2.9932e-01, -5.5532e-01, -5.4291e-02,  5.4228e-02, -4.7258e-02,
        -1.5633e-01, -2.2636e-02,  2.7984e-02, -1.5460e-02, -7.5563e-03,
        -8.0638e-04, -1.0073e-01,  1.1480e-01,  6.1634e-03,  9.6525e-03,
        -2.1749e-01, -1.2912e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2267, -4.0633,  0.1029,  0.0383, -0.5319, -0.0410, -0.9876, -1.1519,
        -0.3114, -0.1653, -0.4106,  0.0766, -0.0244,  0.4563,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5539e-02, -1.4545e+00, -4.5473e-01,  1.2870e-02, -1.0805e-01,
        -2.0265e-01,  5.6960e-02,  3.7780e-04, -2.6845e-01, -6.5715e-03,
        -2.7844e-01, -5.4982e-01,  1.6167e-02, -1.0831e-01, -4.0609e-01,
        -7.7794e-02, -2.2488e-01, -1.9440e-01,  7.4363e-02, -3.7065e-01,
        -5.6499e-01, -4.0897e-02, -2.4072e-01,  5.4887e-02,  1.1409e-01,
         2.0776e-01,  4.4595e-02,  1.1067e-01, -3.6583e-01, -4.3211e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3932, -1.5792, -0.2216, -1.2170, -0.5543,  0.0549,  0.1304,  0.0442,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.0687, -1.6473, -0.1126, -0.1677, -0.3922, -0.4941, -0.0347, -0.0756,
        -0.0190, -0.0426, -0.0999,  0.0599, -0.2339, -0.6063, -0.0826, -0.0200,
        -0.0409, -0.0023,  0.0465,  0.0260,  0.0885,  0.0247,  0.0655, -0.4365,
        -0.2467,  0.0106, -0.2609, -0.0458, -0.4771, -0.5780, -0.0228, -0.3976,
        -0.3630,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0135, -1.2674, -0.6341, -0.2236, -0.1177, -0.2592, -0.1085, -0.1164,
         0.0551, -0.1563, -0.0628, -0.0833, -0.2099, -0.0167,  0.0416,  0.0442,
         0.0044,  0.0195, -0.1866, -0.0077, -0.0472,  0.0453, -0.0673, -0.0544,
        -0.0171,  0.0250, -0.0893, -0.5362, -0.0282,  0.0202, -0.0524, -0.0019,
        -0.3141, -0.5967, -0.1887,  0.0994, -0.1237,  0.0589,  0.0515, -0.0015,
        -0.0183,  0.0081, -0.0094, -0.2947, -0.1009,  0.0363, -0.0204, -0.0020,
        -0.0128, -0.0275,  0.0261, -0.2158, -0.0801, -0.1035,  0.0114, -0.0108,
        -0.0183,  0.0521,  0.0031, -0.1311, -0.0158, -0.1035, -0.3251,  0.0056,
         0.0178, -0.0312, -0.0400,  0.0783,  0.1438, -0.0021, -0.1090],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5698e-01, -1.4551e-01, -2.7499e-01,  6.0157e-02, -3.9593e-01,
         3.7508e-02, -6.1298e-01, -6.7570e-02,  4.1857e-02, -1.3670e-03,
        -6.3577e-02, -4.3650e-02, -1.9748e-02,  4.8924e-02, -1.8467e-02,
         5.3584e-02, -6.0424e-02,  1.1354e-01, -3.5237e-02, -8.3406e-01,
         1.9945e-02,  3.3354e-03,  8.3196e-02, -3.0397e-01,  1.0989e-01,
        -4.7133e-01, -7.9502e-02,  2.0397e-02, -2.8248e-02, -1.4423e-02,
         1.6984e-03, -5.4764e-02, -5.2046e-01, -5.9132e-01,  1.7606e-02,
        -2.2613e-01, -3.8137e-01, -1.5788e-01,  1.9645e-05, -8.9702e-03,
         1.0408e-01,  1.4159e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4106e-01, -2.3572e+00,  3.5367e-04, -3.6398e-02,  1.8507e-03,
        -8.6144e-02,  1.3153e-01, -2.2213e-01,  4.9214e-02, -4.4151e-01,
        -1.1735e-01, -5.2108e-01, -7.7785e-01, -7.3057e-02, -1.0419e-01,
        -1.7130e-01, -6.1007e-02, -5.4302e-01, -8.2708e-01, -1.5887e-01,
        -8.5661e-02, -3.1107e-01, -4.5431e-02, -3.8104e-02, -1.2851e-02,
        -4.6915e-01, -3.7932e-02,  2.2066e-02, -5.8380e-01,  1.9813e-01,
        -1.2395e-02, -5.7198e-02,  4.4349e-02, -9.7295e-02, -9.2205e-02,
        -1.5596e-01, -1.9158e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4910,  4.1557,  0.2781,  0.3389,  0.0473, -0.3291, -0.3448, -0.3270,
        -0.1256, -0.1581,  0.4916, -0.3530,  0.6073, -0.0767,  0.2548,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8088e-01, -9.4630e-02,  3.2756e-04, -3.7209e-02, -8.6486e-01,
        -1.0909e+00,  8.7197e-02, -8.1015e-01, -1.2967e+00, -1.6367e-01,
        -4.6515e-02,  8.9313e-02, -2.8252e-01, -2.8091e-02,  1.5378e-01,
        -6.2415e-02,  1.3213e-02,  4.1060e-01,  2.1417e-02,  7.4798e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0676,  0.0475,  0.0112, -0.0331, -0.7636, -0.0906,  0.0192, -0.1513,
        -0.0179,  0.0161, -0.2385, -0.2763, -0.0088, -0.0524, -0.1976, -0.1799,
        -0.0046, -0.1043,  0.0023, -0.0920, -0.0175,  0.0009, -0.0142,  0.0059,
         0.0120, -0.1801, -0.1323, -0.0030, -0.0288, -0.2373, -0.2564, -0.0231,
         0.0086, -0.0358, -0.1804, -0.3447,  0.0288, -0.1275, -0.0427,  0.0280,
         0.0070,  0.0711, -0.0906,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1243, -0.0107,  0.0867, -0.0191, -0.3337, -0.0319, -0.2531, -0.6665,
         0.1102, -0.1257, -0.0445, -0.0077, -0.0734, -0.4709, -0.7961, -0.0197,
         0.0670, -0.0076, -0.0351, -0.0374, -0.3057, -0.0306,  0.0411, -0.0398,
         0.0765,  0.0020, -0.0304,  0.0341, -0.4506, -0.0479,  0.0156, -0.2782,
        -0.0977, -0.2906, -0.4162,  0.0850,  0.0669,  0.0615,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1266, -1.9301, -0.6822, -0.9446,  0.1618, -0.5914, -0.0515,  0.0408,
        -0.2238, -0.6060,  0.0855,  0.0131, -0.3762, -0.0127,  0.1205,  0.0635,
         0.1429, -0.4570, -0.0346,  0.2271,  0.3016,  0.2752,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7520e-03, -1.8375e+00, -3.7617e-01,  9.6441e-02,  8.3378e-02,
        -2.4634e-01, -6.7437e-02,  1.2355e-01, -8.8831e-02, -7.1893e-01,
         1.2753e-02,  7.2791e-02, -1.5426e-03, -1.1576e-02, -2.3622e-01,
        -3.6324e-01, -5.5524e-01, -4.5059e-02,  6.2928e-03, -4.5103e-01,
         1.0971e-01, -3.9027e-02, -3.4725e-01, -1.1439e-01, -5.0214e-02,
        -1.0003e-02, -5.2994e-03,  8.2148e-02,  3.6137e-02, -2.4807e-01,
         8.0670e-02, -1.8359e-01,  1.9568e-03, -3.3083e-02, -2.7712e-01,
        -7.7769e-01, -6.8378e-02, -9.3734e-02, -2.7256e-03, -3.6033e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0996, -0.0424,  0.0282,  0.0123,  0.0567, -0.1443, -0.0553, -0.0074,
        -0.3044,  0.0344, -0.0441, -0.0479, -0.1379, -0.7965, -0.6422,  0.0678,
         0.0326,  0.0340, -0.3040,  0.0528,  0.0232,  0.0562, -0.0467,  0.0407,
        -0.0982, -0.4495, -0.0295, -0.3188, -0.5361, -0.1039, -0.1063, -0.0889,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1095e-02, -3.0464e+00, -2.4131e-02,  1.7258e-01, -2.6303e-02,
        -6.3114e-02,  4.8568e-03, -7.5083e-01, -1.0726e+00, -1.0639e-01,
        -9.4812e-02, -3.5634e-01, -8.5606e-02,  1.2145e-01,  9.6408e-02,
         1.9094e-02, -4.7830e-01, -9.2237e-02, -2.4233e-02, -3.9786e-02,
        -4.0612e-02,  2.0280e-01, -5.3964e-02, -6.7941e-01, -9.1322e-02,
        -9.4307e-02, -2.9325e-01, -1.0742e-03,  6.8962e-02, -1.2356e-01,
         4.2682e-02,  5.0179e-02,  8.1722e-03,  2.7523e-01, -1.1776e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.3779, -0.5497, -1.9175, -0.5323, -0.1077,  0.2079, -0.9167, -1.0816,
        -0.3639, -0.3846, -0.0480, -0.3006,  0.1294, -0.1888, -0.1072, -0.0456,
        -0.0397, -0.0539, -0.0419,  0.4018, -0.1802,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1713, -3.1040, -0.0442, -0.6304, -0.6432, -0.3385, -0.1313, -0.4379,
        -0.5471,  0.0508, -0.0219,  0.0541,  0.0867, -0.1727, -0.3747, -0.0218,
        -0.1707, -0.0575,  0.0731,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2781,  0.1217, -0.6029,  0.2250, -0.5058, -0.0505,  0.0736,  0.0448,
        -0.0930, -0.4039, -0.8893, -0.1673, -0.3108, -1.1260, -0.0186, -0.2862,
         0.0938, -0.1681, -0.0413, -0.1241,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1377, -2.8255, -0.0124, -0.7249,  0.0167,  0.0431,  0.3046, -0.5170,
         0.0592,  0.0171, -0.0639, -0.0885,  0.0571, -0.4953, -0.1510, -0.4256,
        -0.4952, -0.0265, -0.2805, -0.3294,  0.0872, -0.1674, -0.1226,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0631, -1.3053, -0.3851, -0.2550, -0.0391, -0.3490,  0.0045, -0.2693,
        -0.8376, -0.8566, -0.1316, -0.2880,  0.0312, -0.5949,  0.1391,  0.0913,
        -0.0847,  0.0551, -0.0997,  0.0393, -0.3206, -0.0645,  0.1384, -0.3775,
         0.0274,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0921, -3.4758, -0.0686, -0.0240,  0.0933, -0.0918, -0.0214, -0.3067,
         0.0977, -0.2119, -0.6396, -0.1350,  0.0038,  0.0578,  0.1102, -0.0501,
         0.0186,  0.0400,  0.1936,  0.1116, -0.7465, -0.7617, -0.4501, -0.2881,
        -0.1673, -0.0762,  0.0618, -0.0223, -0.1034, -0.1337, -0.2106,  0.0207,
        -0.1245,  0.2740, -0.1582,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0622, -0.0135,  0.1175, -0.4736, -0.0176, -0.2296, -0.6588,  0.0417,
        -0.5969,  0.0091,  0.1791, -0.1582, -0.0308, -0.2117, -0.3711, -0.7861,
        -0.1930, -1.0297, -0.1304, -0.2147, -0.2222,  0.0257,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0829,  0.1855, -0.6707, -0.3739, -0.3993, -0.6109,  0.0913, -1.3603,
         0.7203, -3.4852,  1.9292, -5.0141,  2.7720,  1.2822, -2.9376, -1.1369,
        -0.1244,  0.2469, -0.5778, -1.9608, -1.1624,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2913,  5.8878,  0.2423,  0.1695,  0.5840,  0.1669,  0.8623,  0.9849,
        -0.3907, -0.0319, -0.1589, -0.1402, -0.4498,  1.2095, -0.1362,  0.6941,
        -0.1957,  0.0772, -0.1159,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1465,  0.0202, -0.0855, -0.0009, -0.0253, -0.3075,  0.0383, -0.0077,
         0.0291,  0.0116, -0.1035, -0.0662, -0.0220, -0.2041,  0.0013,  0.0104,
        -0.2657, -0.3386, -0.0211, -0.2835, -0.1274, -0.0717, -0.2699,  0.0369,
        -0.3669, -0.0058,  0.0074, -0.0669, -0.0334, -0.5082, -0.0265, -0.0928,
        -0.2217, -0.3619,  0.0036, -0.0243, -0.0859,  0.0180, -0.0265, -0.2538,
        -0.0360, -0.0062, -0.0161, -0.0040, -0.0496, -0.0781, -0.2174],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1189, -2.2798, -1.4308, -0.1898,  0.0133, -0.0288, -0.0405,  0.0188,
        -0.3728,  0.0169,  0.0843, -0.0769,  0.1718, -0.0194,  0.1682,  0.1042,
        -0.5339,  0.0148,  0.0384, -0.0085,  0.2186, -0.1409, -0.7047, -0.6511,
         0.1865,  0.1970,  0.0485,  0.0894, -0.0366, -0.0239, -0.0702,  0.1366,
        -0.0472, -0.0634,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4802, -1.9752, -0.0361,  0.0193,  0.1889, -0.2988, -0.0899, -0.0046,
         0.0545, -0.3562, -0.0586, -0.0212, -0.5435, -0.7821,  0.1106, -0.0249,
         0.1090,  0.0520,  0.1817, -0.5690,  0.0518, -0.2632, -0.0649, -0.1323,
        -0.0157, -0.0075, -0.0653, -0.1616, -0.0566, -0.2065,  0.0112,  0.0201,
        -0.2234, -0.1013,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.3091, -3.7476, -0.1658, -0.1716, -0.0432, -0.5149,  0.1574, -0.0049,
        -0.0925, -0.7155, -0.1120, -0.2038, -0.2641, -0.9906,  0.0352, -0.0758,
        -0.0929,  0.0110,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4528,  0.1676,  0.0611, -0.1310,  0.0705,  0.0369,  0.0647,  0.0750,
         2.1261,  3.8184,  0.5915, -0.0889,  0.2453,  0.3784, -0.2674, -0.1481,
        -0.0070, -0.2519, -0.2954,  1.3983, -0.1448, -0.1978, -0.1120, -0.0561,
        -0.0649, -0.3576,  0.0308,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4086, -0.0586, -0.0290, -0.0916,  0.0773, -0.0311, -0.0265, -0.3672,
        -1.2308, -0.0960,  0.0363, -0.3644, -1.1886, -0.2932, -0.1263,  0.1181,
         0.0103,  0.0502,  0.0607, -1.4274, -0.0937,  0.0509, -0.4751, -0.0926,
         0.0877, -0.2315,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7722, -1.7693,  0.1516, -0.9991, -1.2207,  0.0187, -0.0158,  0.2918,
        -0.1696, -0.2974, -0.6479, -0.3228,  0.1077,  0.1320,  0.1476, -0.7569,
        -0.3088, -0.2072, -0.2633,  0.3416,  0.1833,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2321, -1.1990, -1.0522, -1.2827, -0.2738,  0.1814, -0.9755, -0.0298,
        -0.1818,  0.1155, -0.0875, -0.4020, -0.1124, -0.1286,  0.1263,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0424e-01,  1.5247e-03, -4.3768e-02, -8.3589e-02,  8.4721e-02,
         1.7456e-01,  3.4222e-01, -1.9764e-01,  2.8772e-02,  2.0432e-01,
        -1.4300e-01, -4.2372e-01, -3.6115e+00, -2.3193e-01,  9.0445e-02,
        -1.9127e-01,  1.6966e-01, -1.0227e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6739, -1.8923, -0.5233, -0.6971, -0.0148, -0.0348, -0.1983, -0.5145,
        -0.0701, -0.8180, -0.5950, -0.2126,  0.0981, -0.4936,  0.0597,  0.0103,
         0.0731, -0.1856, -0.0026,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7290e-02, -1.0268e+00, -9.5231e-01, -1.4943e-01, -5.0035e-02,
        -3.9690e-01,  1.1674e-01,  1.5309e-02, -1.1429e-01, -1.7451e-02,
         5.4207e-03, -2.4311e-05, -2.1214e-01,  1.4011e-03, -1.6319e-02,
        -2.1782e-02, -4.0715e-01, -2.7444e-02,  6.1160e-02,  7.3474e-02,
        -4.1546e-01,  5.6838e-03, -3.0584e-01,  4.4375e-03, -4.8936e-01,
         6.1933e-02, -3.0191e-01,  2.0550e-02, -3.0622e-02, -6.7581e-02,
         5.5116e-02,  5.3245e-02, -3.0104e-01,  1.2481e-02, -1.6742e-01,
        -1.4211e-01, -3.9856e-01, -6.4833e-05, -1.0007e-01, -6.4940e-02,
        -4.0764e-02,  2.9907e-02,  2.8551e-02, -7.8179e-02], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1063e-01, -2.1616e+00, -1.6546e-01, -1.0088e+00, -8.6753e-01,
        -2.6689e-02, -3.4238e-01, -1.8460e-01, -7.1893e-01, -1.0272e-02,
        -2.8670e-01,  1.0136e-02,  7.4006e-02, -4.1074e-02,  2.0597e-02,
        -8.9897e-03,  2.3246e-01, -2.4816e-01,  3.3386e-02, -1.5601e-01,
        -2.3141e-01, -2.6281e-02, -3.2780e-02, -1.3138e-02,  4.7955e-02,
         4.3822e-02, -3.3256e-02, -4.3344e-01,  3.6709e-02, -2.6783e-02,
         1.3445e-03, -1.6563e-01,  1.2246e-01, -4.2055e-02,  1.7892e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7488e-01, -4.3088e+00, -4.3717e-02, -7.1651e-02, -1.8009e-01,
         1.2373e-01, -6.1867e-01, -4.9684e-01, -2.2066e-01,  2.7018e-02,
         6.3144e-02, -4.2082e-01, -1.0993e+00,  4.0205e-05,  4.2098e-03,
        -3.3737e-01,  2.2723e-02, -6.5668e-02,  1.8227e-01,  5.6138e-02,
         2.6069e-02, -5.3759e-02,  6.0475e-01,  1.2360e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2063, -1.8918, -0.0036, -0.3850, -0.0967, -0.3348,  0.2092, -0.1258,
        -1.0756, -0.0908, -0.5920,  0.1250,  0.1485, -0.1913, -0.5586,  0.0399,
         0.0584, -0.0999, -0.6403, -0.0794, -0.0914, -0.6957, -0.0332, -0.3955,
        -0.1433, -0.2866, -0.0797, -0.1870,  0.0128, -0.0185, -0.1170,  0.0433,
         0.0274, -0.1195,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3494,  0.1622,  0.0294, -0.1076,  0.1269,  0.1600, -0.9824, -0.0895,
         0.0794,  0.0356, -0.0286, -0.0836, -0.6473, -0.1122, -0.9619, -1.0817,
        -0.1816,  0.0845, -0.0485, -0.1145,  0.0491, -0.0646,  0.0741, -0.6319,
        -0.2909, -0.0871, -0.2873,  0.0048,  0.0041, -0.1411,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.3277,  0.2537, -0.0596, -1.4751, -0.1953,  0.0561, -0.1051, -0.1475,
        -0.6883, -1.0449,  0.0931,  0.4006, -0.2864,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.9517,  0.1148, -0.0443,  0.1355, -0.0199, -0.2148,  0.0668, -0.9426,
        -1.0961,  0.0610, -0.1678, -0.6083,  0.2178, -0.3040, -1.1584, -0.0471,
         0.0659,  0.2735,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0363, -1.4693, -3.1137, -0.2669, -1.0869, -0.0169, -0.2307, -0.0962,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1113e-01, -1.8319e+00, -8.0467e-02, -3.7435e-01, -3.6685e-01,
        -6.8541e-02, -1.8270e-01, -5.7564e-02, -3.5039e-02, -1.0796e-01,
         5.5369e-02,  1.6029e-02, -8.4828e-02, -6.7819e-02, -1.2958e-02,
        -5.6689e-02,  2.6014e-02,  5.2242e-02,  3.8720e-02, -2.7874e-03,
         8.8364e-02, -5.3333e-04, -3.1281e-01, -3.3638e-01, -4.8837e-02,
        -1.7122e-01, -1.1062e-02, -4.3424e-02, -6.7603e-01, -2.3215e-01,
         2.8714e-02, -4.0672e-01, -7.7885e-02, -8.0772e-01, -4.3417e-02,
        -3.7112e-01, -2.5636e-01,  1.4371e-02,  1.4125e-01,  2.6854e-02,
        -1.2978e-01, -6.6024e-02, -6.3128e-02,  4.5027e-03,  4.4519e-02,
        -3.4022e-02,  7.7974e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0164, -1.4466, -0.3151, -2.4959, -0.1165, -0.0648, -0.1959, -0.0950,
        -0.0338, -0.2464, -0.0397,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2343, -1.9352, -0.1668, -0.9525, -0.2050, -0.0439, -0.3572, -1.5245,
         0.0095, -0.5517, -0.2101,  0.2007, -0.4271,  0.0059, -0.0659, -0.0750,
        -0.1331, -0.0995,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4667, -0.8959, -3.0490,  0.2015, -0.1113,  0.0904, -0.8204,  0.0260,
        -0.5261, -0.0778, -0.1016,  0.0644,  0.1162, -0.1792, -0.1652,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3947, -3.8092,  0.1014,  0.1054,  0.3065, -0.7989, -1.5235, -0.1472,
         0.4196, -0.9426, -0.9208,  0.1183, -0.0078,  0.3828,  0.2580,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2122,  0.0873,  0.0404, -0.0040,  0.1111,  0.1204, -0.0271, -0.0443,
        -0.5695, -0.1465, -0.1025, -0.8053, -0.8093, -0.1441,  0.0217, -0.0041,
        -0.1291, -0.0337, -0.0725,  0.0462, -0.4371, -0.6869,  0.1731, -0.0544,
        -0.0557, -0.0734,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0646, -0.1381, -0.1746, -0.3380, -0.0698, -0.1263,  0.0899,  2.4148,
         3.4077,  0.4160, -0.0455, -0.0270, -0.0094, -1.0957,  0.2384,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9332e-01, -2.6285e+00, -7.3713e-02, -6.1321e-01, -4.3517e-01,
        -2.6827e-01, -7.9917e-01, -7.0010e-02,  1.2863e-02, -1.1073e+00,
         4.1657e-02, -6.9629e-02, -4.2184e-01, -4.6558e-02, -1.4604e-01,
        -7.4187e-02, -2.1881e-01, -1.2052e-03, -3.5206e-01, -1.4771e-01,
        -3.4545e-01,  6.9872e-02,  7.8996e-02,  1.0943e-01, -1.9245e-01,
        -1.4955e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2183,  0.0204,  0.0127,  0.0512,  0.0735, -0.1059, -0.0147,  0.1054,
        -0.2140, -0.2347, -0.9006, -0.0135,  0.0805,  0.3608, -0.8779, -0.4555,
         0.0135, -0.1826,  0.0068, -0.4146, -0.1525, -0.1987, -0.0669, -0.3769,
        -0.0359, -0.1651,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-6.1484e-01, -3.5325e+00,  6.8100e-02,  1.6163e-02, -2.2285e-01,
        -1.8593e-01, -4.3817e-01, -4.4474e-01,  2.2337e-02, -2.4583e-01,
        -1.6781e-01, -3.1390e-01, -7.7882e-01,  7.3213e-02, -2.4663e-01,
        -1.4277e-01,  4.8728e-02, -2.2065e-02, -3.0196e-01, -6.6444e-02,
         1.7078e-01, -1.6651e-01,  5.5967e-02, -2.9690e-01, -7.6738e-01,
         9.5861e-02, -2.0953e-01, -8.9812e-02,  2.7336e-01, -9.5817e-03,
        -9.5766e-02, -2.9044e-01, -1.1285e-01,  2.6342e-05, -7.5853e-02,
         1.1076e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3719, -0.6863, -0.1963, -0.0451, -0.5308, -0.0671, -0.5342, -0.5698,
         0.1017,  0.0107, -0.3467, -0.0242, -0.0042,  0.0022, -0.0469, -0.1549,
         0.0036, -0.0104,  0.0202,  0.0788,  0.0016, -0.5599, -0.0989, -0.0170,
        -0.4198, -0.5033, -0.0075, -0.4377, -0.4460, -0.0256, -0.0930, -0.4340,
        -0.6416,  0.1326, -0.1096,  0.1895,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2855,  0.2755, -0.0286, -0.0755, -0.0049, -2.0411, -0.2904, -0.0429,
        -0.1013,  0.2910, -0.1504, -0.6156, -1.9285,  0.3043,  0.1793, -0.1452,
        -0.0109,  0.0600,  0.1161,  0.0506,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1060, -0.7654, -0.2770, -0.0928,  0.0588,  0.0149,  0.0346, -0.4764,
        -0.1059, -0.2255, -0.0426,  0.0307, -0.0404, -0.0412, -0.5204, -0.6737,
        -0.1080, -0.4315, -0.0009,  0.0237, -0.1081,  0.0327,  0.0272, -0.0522,
        -0.0382,  0.0693,  0.0108,  0.0080, -0.0080, -0.3069, -0.5625, -0.1530,
         0.0040, -0.2536,  0.0067,  0.2843, -0.0196,  0.1474,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0052, -1.0190, -0.5397, -0.4760, -0.0397, -0.0150, -0.1281, -0.0215,
        -0.1092,  0.0751, -0.3689, -0.6778, -0.0290, -0.0440, -0.0786, -0.4428,
         0.0739, -0.3811,  0.0119, -0.0132,  0.0067,  0.0198,  0.0182, -0.0131,
        -0.0040,  0.0155, -0.0890, -0.2264, -0.3616,  0.0174, -0.0410, -0.1079,
        -0.0222, -0.0182, -0.1311,  0.0705, -0.1277,  0.0069,  0.0165, -0.1100,
        -0.0880], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3690, -4.1045,  0.1806, -0.0720, -0.0347, -0.9680, -0.1373,  0.1837,
        -0.2893,  0.0808, -0.0115, -0.6620, -0.9354,  0.0298,  0.0677, -0.5195,
        -0.0614, -0.8241, -0.0571, -0.1469,  0.1646, -0.1804,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4820,  0.0562,  0.0345,  0.0294,  0.0538, -0.3003,  0.0040,  0.0513,
        -0.1229,  0.0125,  0.0274, -0.4025,  0.0623, -0.3607, -0.0020, -0.7201,
        -0.0711, -0.3077, -0.0083, -0.2955, -0.6468, -0.0228, -0.0330, -0.1799,
         0.0193, -0.0274, -0.0078, -0.2864, -0.5607,  0.0383, -0.2771, -0.2571,
         0.1061, -0.0117, -0.2713,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4909, -0.1140, -1.0024, -0.2531, -0.6540,  0.0314,  0.0437,  0.0088,
        -0.1794,  0.0264, -0.2849,  0.0613,  0.0808,  0.0158, -0.0433, -0.1921,
         0.1757,  0.0803, -0.5370, -0.1968, -0.5569,  0.2758,  0.0112, -0.3993,
        -0.0612, -0.2461, -0.5511, -0.0461, -0.1529,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4765, -2.1511, -0.3606, -0.3525,  0.1347, -0.3741,  0.2147, -0.1308,
        -0.3174, -0.0231, -0.3669, -0.4621,  0.1017, -0.0794, -0.0092, -0.6989,
        -0.2996, -0.1816, -0.3935,  0.0740, -0.0601, -0.0737, -0.2091,  0.0110,
         0.4477,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3598, -0.1624, -0.2483,  0.1069, -1.7546, -0.3008, -0.0996, -0.8195,
        -0.8429, -0.0334,  0.1811, -0.0132,  0.0362, -0.1632, -0.6752, -0.7877,
         0.0721, -0.5450, -0.1325, -0.1843, -0.0636,  0.2456,  0.2338,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7970e-01, -2.4389e-03, -1.9820e-02,  2.9354e-02,  7.6200e-02,
         9.1181e-02, -3.4436e-03, -1.5705e-02,  5.0951e-03, -3.5108e-02,
        -3.0250e-02, -3.4155e-02, -5.2325e-01, -8.6328e-01, -6.7863e-04,
        -7.1188e-02, -1.1156e-02, -2.5476e-01,  6.3301e-02,  2.7900e-02,
         4.8930e-02,  8.6453e-02,  9.3911e-02, -6.6241e-02, -2.1179e-01,
        -1.7904e-01, -1.1026e+00, -1.6933e-01, -8.8924e-01,  1.4861e-01,
        -1.7339e-01,  8.2092e-02,  5.3160e-02,  3.3076e-02, -1.5493e-01,
         5.8497e-03, -9.4713e-02, -3.5225e-02, -1.6869e-01,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0151,  0.0015,  0.1802, -1.0026, -1.3007,  0.3659,  0.1615, -0.6143,
        -1.3429,  0.4904, -0.1202,  0.0512,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.2754, -1.0984, -1.6272, -0.0381, -0.0267, -0.3189, -0.2172,  0.1724,
        -0.0043, -0.1668,  0.0066, -0.0458,  0.0223, -0.0343, -0.2517, -0.0091,
        -0.1582, -0.3084,  0.0038, -0.1056, -0.1628, -0.1451,  0.0564, -0.0606,
         0.0453,  0.0261, -0.0836,  0.1104, -0.0517,  0.0965,  0.0211,  0.0438,
        -0.1302, -0.0295, -0.0422, -0.1328, -0.2953, -0.0471, -0.0073, -0.0204,
         0.0167, -0.0055,  0.0281,  0.0753, -0.0201,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0062, -3.6091, -0.6061, -1.3709,  0.0234, -0.3209,  0.2258, -0.0380,
        -1.5120,  0.7945, -0.5331,  0.2409,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3983, -5.6009, -0.3005, -2.0886, -0.4672, -0.3812,  0.0878,  0.7310,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2468e-02, -1.5283e+00, -1.6633e-01, -3.7092e-01, -4.7653e-01,
        -8.3293e-02,  1.1859e-01, -1.0170e-01, -3.5958e-01, -4.7366e-01,
         2.6556e-02,  9.8231e-03,  2.6224e-02,  2.0908e-02,  2.0687e-02,
         2.1018e-02,  5.0794e-03,  5.1715e-02, -9.8645e-04, -2.7987e-03,
         3.2287e-03,  3.7939e-02, -9.6708e-03, -6.8477e-02, -4.7163e-03,
         2.1877e-02, -9.4462e-03, -4.1600e-02, -1.3606e-01, -3.5701e-02,
         2.1663e-02, -2.2560e-02,  2.4850e-02,  1.6039e-04, -1.6803e-02,
         1.5767e-02,  1.5095e-02, -1.6930e-02,  2.6876e-02,  5.7139e-03,
        -8.2907e-03, -7.2810e-03, -3.5299e-02, -5.1570e-03,  3.9968e-02,
         3.9438e-02, -5.1483e-01, -4.2342e-02, -2.8964e-01, -7.8528e-01,
        -5.2122e-02, -2.8612e-02, -3.0416e-01,  8.1980e-04, -9.1053e-02,
        -1.6201e-02, -6.8394e-02, -3.0814e-02,  2.3195e-02,  5.7254e-02,
        -7.9175e-03], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1367,  0.0266,  0.0741, -0.3531, -0.4837,  0.0821, -0.4009,  0.0283,
         0.0123,  0.1443,  0.1181, -0.5203,  0.0143, -0.4108, -0.8169, -0.0115,
         0.0218,  0.0912, -0.4009, -0.2002, -0.6467, -0.3240, -0.1188,  0.3223,
        -0.1814, -0.2286, -0.0918, -0.0256, -0.2517, -0.1986, -0.0744,  0.2678,
        -0.1517,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0758,  0.3130,  1.7236,  0.8859, -0.2482, -0.0555,  0.6774, -0.0575,
         0.0710, -0.0889,  0.0363,  0.0350,  0.3459,  0.0317,  0.1347, -0.1687,
         0.3608,  0.7693,  1.4868,  0.0572, -0.0297,  0.2329, -0.3024,  0.3544,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0150, -0.6573, -1.1142, -0.3854,  0.1796, -0.7097,  0.0744, -0.3203,
        -0.1410, -0.5740, -0.4467, -0.6250, -0.8963, -0.0973,  0.0218,  0.0208,
        -0.1147,  0.0325,  0.0997, -0.0363, -0.1710, -0.0534, -0.0962,  0.0120,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0674,  0.0150, -0.8196, -0.6585, -0.0315, -0.0015,  0.0467, -0.4825,
        -0.0862,  0.1121,  0.0558,  0.0053, -0.0157,  0.0516, -0.4061, -0.8426,
        -0.5705, -0.0841, -0.0708,  0.1588, -0.4973,  0.0259, -0.3493, -0.5016,
         0.0314,  0.0463, -0.2824, -0.0456, -0.0079, -0.0972, -0.0117, -0.1278,
         0.4462,  0.1923,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1264,  0.0355, -0.0775,  0.0687, -0.4405, -0.0438, -0.0502,  0.0399,
         0.0216,  0.0091,  0.0515,  0.2639,  0.0751, -0.9897,  0.0321,  0.0213,
        -1.0856,  0.0274, -0.6312, -0.0818, -0.6964, -0.6562,  0.0721, -0.0564,
        -0.0612,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6980, -0.1106, -0.1136, -0.2721, -0.2559, -0.0343, -1.0432, -0.5237,
        -0.7747, -2.2102, -0.7319, -0.1458, -0.3392, -0.0387, -0.1347, -0.0462,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2096, -2.3399, -0.0856, -0.9943, -0.0431, -0.4755, -2.1597, -0.2500,
         0.1054,  0.1034,  0.0086,  0.0394, -0.2567,  0.0733, -0.0318,  0.1799,
        -0.4105, -0.7795, -0.4967, -0.0469, -0.0210,  0.4728,  0.3048,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4698,  0.3496,  0.3277,  2.2434,  4.6791, -0.1280, -0.5898,  0.0557,
         0.2395, -0.6099, -0.2333, -0.9313, -0.2622, -0.3984,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.5683, -1.0138, -0.9980, -1.1951,  0.0680, -0.2609, -0.2486,  0.1114,
        -0.1769, -0.0363, -0.6135, -0.7920, -0.0307, -0.0353, -0.0368, -0.0188,
         0.1188, -0.8574,  0.0332, -0.4946, -0.0067,  0.0876,  0.1253,  0.3910,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.7789, -0.4973, -1.1122, -0.1247, -1.2156,  0.1644, -2.1280,  0.1665,
         0.0159, -0.1109, -0.0994,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4633,  0.2758,  0.2377,  0.0262, -0.0106, -0.4488, -0.4651,  0.0945,
        -0.6491, -0.0712, -0.5259,  0.0784, -0.3755, -0.0240, -0.4145, -0.2053,
        -0.3024, -0.7188, -0.0176, -0.2945, -0.3227,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4464,  0.3275,  0.0794, -0.2067,  0.0334, -0.0875,  0.3001, -2.2606,
        -0.9398, -1.5929, -0.4624,  0.0331,  0.2676, -0.5164,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0882,  0.2373,  0.1100, -0.1877, -0.1070,  0.0738,  0.1453, -0.1154,
        -0.0072, -0.0944,  0.7837,  0.5764,  2.9461,  0.1406,  0.4548,  0.2138,
         0.1744, -0.1065, -0.0637, -0.1563, -0.1813,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3048, -3.6933, -0.0120, -1.3349, -0.2453, -1.1025, -0.2879, -2.1676,
        -0.3159, -0.5201, -0.5631,  0.4466, -0.3522,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1742, -0.8367,  0.0304, -0.0179, -0.0201, -0.0240, -0.1505, -0.4140,
        -0.0912, -0.0051,  0.0467,  0.0030, -0.0248, -0.0192, -0.0153, -0.1544,
         0.0151,  0.0090,  0.0226, -0.2068,  0.0120,  0.0885,  0.0044, -0.0904,
        -0.7161,  0.0821, -0.5379, -0.4884, -0.4884,  0.0716, -0.0515, -0.1218,
        -0.0112, -0.0123,  0.0486,  0.0394, -0.1485,  0.0199,  0.0133, -0.0878,
        -0.0908,  0.0083,  0.0410,  0.0382, -0.0441], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3809, -0.4423, -0.9382, -0.1489, -0.0015,  0.0089, -0.0216, -0.2039,
        -0.0337, -0.0241, -0.5758, -0.1253, -0.2837, -0.1080, -0.5755, -0.1087,
        -1.1824, -0.0174, -0.1103,  0.0831, -0.0067, -0.6486, -0.1084, -0.5611,
         0.0388,  0.0442, -0.0829, -0.0432, -0.9279, -0.1747, -0.2071, -0.1655,
        -0.4860, -0.2086,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6664e-03,  6.6176e-01,  1.8807e-01, -1.7754e-01,  2.6353e-03,
         5.3537e-02, -1.1980e-01,  9.6595e-02,  2.2910e-01,  6.7154e-02,
        -3.0513e-01, -1.6059e-01, -5.0081e-02,  3.1072e-02,  1.5028e-01,
         1.9291e-01,  3.6079e-02,  3.1497e-01, -2.7396e-01, -2.0764e+00,
        -4.0681e+00,  6.4071e-01,  1.7800e-01,  3.1942e-01,  3.3699e-02,
        -4.4341e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4839e-02, -2.2984e+00,  1.3086e-01, -7.3917e-01,  1.4609e-01,
        -1.3777e-02, -2.3847e-02, -3.8880e-01,  2.6968e-02, -8.2123e-02,
         1.0335e-01,  1.3059e-03,  3.4992e-02,  3.2062e-02, -7.4920e-02,
        -3.6687e-01, -5.7065e-01, -3.4159e-02, -2.3623e-01,  3.2417e-02,
        -1.6150e-01,  2.7692e-02,  1.0894e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6712e-01, -1.0816e+00, -1.3385e-01, -4.3816e-01, -6.2320e-02,
        -4.0700e-02, -7.0160e-02, -4.4632e-01, -7.0039e-01, -5.4498e-02,
         1.2746e-03, -2.5184e-02,  1.6221e-02, -3.6567e-02, -1.5125e+00,
         1.5520e-01,  6.5990e-02, -2.3042e-01, -3.5725e-01, -1.5964e-01,
        -1.0745e-02, -3.1575e-02,  2.0204e-02,  1.0399e-01, -5.6158e-03,
        -3.0753e-02, -4.7231e-01, -5.8435e-01, -6.2894e-02, -1.2026e-01,
         9.5226e-03, -2.1017e-02, -9.0137e-02,  2.4765e-02, -3.9778e-02,
         3.9249e-02, -3.1087e-01,  9.8631e-02, -1.0953e-01,  5.4615e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1453, -0.2002, -0.0072, -0.1631, -0.3352,  0.0177, -0.0102, -0.4985,
         0.0396, -0.0460,  0.0052, -0.0202, -0.1872,  0.0498, -0.1569, -0.5023,
        -0.4589, -0.1243, -0.1132, -0.2771, -0.0370, -0.4317, -0.2839,  0.0784,
        -0.0362,  0.0078, -0.0374, -0.3076, -0.0166, -0.0774, -0.1357, -0.0252,
        -0.0884, -0.0072,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.7683e-02, -8.3309e-02, -3.4398e-01, -2.9473e-01, -5.8093e-01,
        -1.1085e-02,  1.3304e-01,  8.7077e-02, -1.3985e-02,  1.4708e-02,
        -2.5796e-02, -8.7388e-02,  1.3582e-02, -3.3352e-02, -4.6395e-01,
        -1.0751e-01, -1.1467e-02, -4.6772e-02, -3.2723e-01, -6.5198e-03,
         2.0616e-02,  1.2148e-02, -3.0721e-02,  7.2933e-02, -1.8486e-01,
        -3.6727e-01,  7.5433e-02,  8.5144e-02,  1.2636e-02,  7.6198e-02,
         8.0673e-02,  2.1651e-02, -9.6825e-03,  2.9021e-02, -1.8689e-01,
         9.5098e-03, -1.4443e-01, -2.6604e-01, -7.2338e-02, -2.9711e-01,
        -2.3170e-01, -1.4565e-01, -2.5851e-01, -3.6433e-02,  4.6684e-02,
         1.6627e-04, -2.1527e-02, -1.3497e-02, -9.8733e-02, -6.8565e-03,
        -3.4056e-01, -4.2205e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2046, -0.1363, -0.0941, -0.0544, -0.6661,  0.1427, -0.1683,  0.2673,
         0.0244, -0.2061,  0.0266,  0.0700, -0.0977, -0.8823, -0.2630, -0.4182,
        -0.7216, -0.2712, -0.4792, -0.1009, -0.1413, -0.2185, -0.0355, -0.3742,
        -0.0122,  0.0613, -0.1384, -0.0032,  0.2028, -0.0651,  0.3180,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1016, -1.6625,  0.0189, -0.1134,  0.0774,  0.0920, -0.0426,  0.0603,
        -0.0811,  0.2450, -0.1002, -0.4771,  0.0104, -0.0381, -0.4834, -0.5208,
        -0.1400, -0.2884, -0.8797, -0.8590, -0.2894, -0.1977, -0.1183, -0.0713,
         0.1024,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1117, -0.1031, -0.0348, -0.0275,  0.1957, -1.5729, -2.4083, -0.4802,
         0.0569, -0.1463,  0.1801, -0.2627,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5157,  4.4934,  0.4558,  0.8044,  1.1593,  0.2599, -0.1696,  0.0139,
         0.1912, -0.0106,  0.1599, -0.0683,  0.4707,  1.0971, -0.0272,  0.7488,
         0.2372,  0.1913, -0.1050, -0.0164,  0.3411,  0.0652,  0.2455,  0.1563,
         0.1251,  0.5537, -0.1703,  0.1243, -0.3581,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5483, -3.2327,  0.0210, -0.1343,  0.1996, -0.7929, -1.6218,  0.1067,
         0.0177, -0.0691,  0.0968, -0.0880,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2210, -3.2283,  0.0540, -0.9302,  0.1305, -0.8228, -1.6247,  0.0369,
        -0.4262,  0.0252,  0.0536,  0.2326,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0875,  0.5084, -1.6089, -0.2504, -0.6664, -0.0239, -0.0179,  0.1722,
        -0.6131,  0.1189, -0.0057, -0.8286, -0.0369,  0.0437,  0.0981, -0.6998,
        -0.0495,  0.2063,  0.0267,  0.0088,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0901, -2.2416, -0.0926, -0.7971, -1.0025,  0.0424, -0.3807, -0.0306,
        -0.3035, -0.1032, -0.0834, -0.4550, -0.1846, -0.0453, -0.0245,  0.0153,
        -0.1649,  0.1118,  0.0877,  0.0208, -0.0613,  0.1948,  0.0295,  0.0197,
        -0.0724,  0.0228, -0.0727, -0.1741, -0.1614,  0.0528,  0.0039, -0.0139,
        -0.1191, -0.1746, -0.1035,  0.1083, -0.0622,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1654,  4.2896, -0.1705, -0.5103, -0.2133,  0.2502, -0.0781, -0.0120,
         0.2536,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0138,  0.0423, -0.0513, -1.0421,  0.0351, -0.0956, -0.3390, -0.3599,
         0.0799, -0.0263, -0.0360, -0.0069, -0.8184, -0.7472, -0.0603,  0.1198,
         0.0066,  0.1262, -0.4646, -0.3640,  0.1655, -0.0376, -0.0344, -0.0392,
         0.0133,  0.4531, -0.4196,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1198, -2.7061,  0.1977, -0.7813, -0.0126, -0.3949, -0.0422,  0.0682,
         0.0404,  0.0538, -0.0410, -0.5343, -0.1213,  0.0640, -0.1058,  0.0063,
        -0.7113, -0.0752,  0.1174,  0.0418,  0.0708, -0.2601, -0.8393,  0.0842,
        -0.0679, -0.1474, -0.1098, -0.2409, -0.7419, -0.0580, -0.0247,  0.1017,
        -0.0613, -0.3811,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 3.4163e-01,  1.9611e-02, -2.5550e-02, -2.0013e-02, -3.3160e-01,
        -4.8691e-03,  5.8191e-02,  3.5545e-02,  2.8238e-02,  1.5584e-01,
        -2.3827e-03, -5.3774e-01, -5.1203e-04, -2.4614e-02, -9.3552e-02,
        -6.7798e-01, -8.2741e-01,  6.0467e-02, -3.5143e-01,  3.5690e-02,
         1.1382e-01, -4.7309e-02, -2.3236e-01,  6.4850e-03,  6.6141e-02,
        -4.3470e-02,  1.9610e-03, -8.2606e-02,  3.8089e-02, -3.1573e-02,
        -6.0257e-02, -4.3882e-01, -7.8610e-01,  2.5311e-02, -9.8684e-02,
        -1.4626e-01, -2.5890e-01, -2.3187e-02,  9.9176e-02, -4.2006e-02,
        -1.7411e-01, -2.3842e-01,  4.8734e-02, -7.5169e-02, -1.2246e-01,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4485,  0.0591, -0.1345, -0.2904, -0.2002, -1.0218, -1.6006, -0.1336,
        -0.0858, -0.9863,  0.1188, -0.0539, -0.6114, -0.0579, -0.0700, -0.1755,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.8681, -0.0446,  0.0415,  0.1410,  0.0957, -0.1276,  0.4038,  0.5073,
         1.6547,  1.4924,  0.1802,  1.0014,  0.1377,  0.6520,  1.5050, -0.0772,
         0.1220,  0.0565, -0.1261,  0.1038,  0.1322, -0.4588, -0.0099,  0.0573,
        -0.0419, -0.1254,  0.0525,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0833, -0.0086, -0.2494,  0.0051, -0.1719, -0.0393, -0.1261, -0.0518,
        -0.0751, -0.0051,  0.0097, -0.0198, -0.1724, -0.0093, -0.0847, -0.0250,
        -0.1177,  0.0059,  0.0133, -0.0995,  0.0638, -0.0767, -0.0770, -0.3069,
        -0.0090,  0.0020, -0.1430, -0.3063, -0.0204, -0.1562, -0.0190, -0.1879,
        -0.2802,  0.0051,  0.0806, -0.2493, -0.1079, -0.2778, -0.0802, -0.0096,
        -0.1244, -0.1796, -0.0544,  0.0394,  0.0211, -0.0120], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1194,  0.1896, -0.0559, -0.0534, -0.1130, -0.2658, -0.0156, -0.9423,
         0.1716,  0.0841, -0.1330,  0.0600,  0.0017,  0.0088, -0.2058,  0.0184,
         0.0398, -0.2311, -1.3861, -1.6021, -0.2022, -0.1389, -0.0262, -0.0902,
         0.1055,  0.0231, -0.5606,  0.1174, -0.1341, -0.0233,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.9789, -0.6010,  0.0897,  0.0967,  0.1113,  0.2895,  0.2858,  3.0034,
        -0.1181,  0.2929,  0.3301,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3559, -1.5541,  0.1070, -0.1245, -0.3059, -0.0131, -0.4636, -0.6356,
         0.0130, -0.0670,  0.0661, -0.0019, -0.0472, -0.1132, -0.1052, -0.4662,
        -0.1422, -0.1649, -0.0373, -0.2720,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4723e-01, -3.3242e+00, -1.2988e+00, -1.6914e-01,  3.2455e-01,
        -6.0115e-01, -7.0552e-01,  1.4845e-01,  3.6540e-02, -1.2473e-01,
         3.1500e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3039, -2.7963, -0.9406, -0.8906,  0.0972, -0.0980, -0.0178, -0.4272,
        -0.6877,  0.0401,  0.0263, -0.0347,  0.1020, -0.0802,  0.0088, -0.4125,
         0.0755,  0.0560, -0.0799, -0.0174,  0.0705,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4551, -1.9925, -1.3368,  0.0334, -0.3299, -1.7727,  0.0825, -0.2936,
        -0.2568, -0.3041, -0.0636, -0.5508, -0.0044, -0.0818, -0.1692,  0.0204,
        -0.1636,  0.4801,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0663, -0.8275, -0.9609, -0.0581, -0.1502, -0.0599,  0.0637,  0.0203,
        -0.0107, -0.1013,  0.0423,  0.0250, -0.0296, -0.2935, -0.3846,  0.0633,
        -0.1978, -0.4206, -0.3121,  0.0025, -0.1164, -0.2845, -0.1853, -0.0048,
        -0.3013, -0.0242, -0.1521, -0.0039, -0.0932, -0.1805,  0.0651,  0.0303,
        -0.0288,  0.0593, -0.0691, -0.0071,  0.0187,  0.1314, -0.0695,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3849, -2.7002, -0.0640, -0.7502, -0.1631,  0.0103,  0.0351,  0.1686,
        -0.3170, -0.0629, -0.0874, -0.5606, -0.1372,  0.0041,  0.0091,  0.0424,
        -0.6211, -0.0729,  0.0102,  0.0432, -0.0467, -0.0082,  0.0245, -0.0079,
         0.0414,  0.0273, -0.1861, -0.0163, -0.4953, -0.1715,  0.0292,  0.0092,
        -0.0366, -0.0434,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.3118, -0.2250, -1.1566, -2.1793, -0.0195, -0.8085,  0.1393, -0.3155,
        -0.0305,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1521, -1.4831, -0.0985, -0.5083, -0.0521, -0.7265, -0.0535, -0.2446,
        -0.3463,  0.0186, -0.3566, -0.1740, -0.0802, -0.0109,  0.0619, -0.4594,
        -0.0335, -0.8441, -0.1081, -0.0795, -0.0107,  0.0478,  0.0016,  0.0691,
        -0.2111, -0.0109,  0.0454, -0.0292, -0.0452, -0.0547, -0.0873, -0.2622,
         0.0882, -0.3202,  0.1060, -0.0192, -0.0310,  0.0340,  0.0255,  0.0567,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2829, -0.8295, -1.0213,  0.0222, -0.4368, -0.0679, -0.0470, -0.0455,
        -0.2784, -0.4785,  0.0313, -0.0212,  0.0162,  0.0104, -0.0138,  0.0321,
         0.0240,  0.0083, -0.3292,  0.0296, -0.3645, -0.0845,  0.0173, -0.0655,
         0.0523, -0.0895, -0.2987,  0.0246, -0.2153, -0.6368,  0.0149, -0.0706,
        -0.0096,  0.0212,  0.0473, -0.0313, -0.2910, -0.4210,  0.0623, -0.0128,
        -0.0231, -0.0434,  0.0311, -0.0021,  0.0065, -0.0050, -0.0491, -0.0302,
        -0.0086,  0.0032, -0.0129,  0.0910,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1372, -1.7613, -0.0318, -0.4622, -0.1375, -0.1924, -0.5911, -0.1658,
        -0.4348,  0.0314,  0.2180, -0.1261, -0.4365,  0.0144, -0.3333, -0.4338,
        -0.4261,  0.0186, -0.8234,  0.0434, -0.0317, -0.1518,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7546e-01, -5.7778e+00, -1.6763e-01,  2.8835e-02, -1.2951e-01,
        -4.8569e-02,  2.4902e-01, -4.0378e-01, -3.2683e-02, -1.8254e-01,
        -1.1574e-01, -6.6794e-01, -6.0780e-02,  2.4342e-02, -3.3239e-02,
        -8.4434e-01, -3.9011e-02, -2.1674e-01, -3.5419e-03, -9.2315e-02,
         7.2595e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6128e-02, -1.0218e+00, -2.1886e-01, -1.3562e-01, -4.4224e-02,
         3.8881e-02, -1.6056e-01,  6.9141e-02, -1.6993e-01, -8.3877e-03,
         5.6343e-02, -9.0290e-02,  1.8337e-02,  3.7447e-02,  1.6116e-02,
        -3.1607e-03,  4.3478e-02, -9.5839e-03, -1.3924e-01, -4.1701e-03,
         3.4082e-02,  3.9685e-02, -2.0077e-02,  1.2751e-02,  1.4648e-02,
         1.7605e-02, -9.1400e-02, -3.2643e-01,  1.2712e-03, -2.7086e-01,
        -4.6695e-01, -3.0890e-01, -1.3856e-02, -2.2013e-01, -1.8054e-01,
         6.3543e-03, -3.9799e-02, -3.0881e-02,  3.6596e-02, -5.9394e-02,
        -4.6761e-01,  5.7913e-03, -2.4057e-02, -1.9726e-03, -1.7480e-01,
         6.1995e-04, -1.7894e-01, -1.4722e-01,  4.9397e-02,  5.2908e-02,
        -1.3514e-01, -9.6295e-03, -9.8165e-02, -7.1410e-02, -8.2787e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1106,  0.0147,  0.1053, -0.1012, -0.0535, -0.0375, -0.2980,  0.0587,
         0.0440,  0.0223,  0.0400,  0.0414, -0.0365, -0.1322, -0.2774, -0.0937,
         0.0177, -0.1898,  0.0268, -0.4137, -0.0382, -0.1436, -0.1026,  0.0124,
         0.0335,  0.0314,  0.0248, -0.3989,  0.0817,  0.0618,  0.0859,  0.0416,
        -0.0921, -0.3944,  0.0056, -0.2532, -0.0139, -0.3578, -0.2303, -0.2805,
        -0.0037, -0.1141,  0.0376, -0.0425, -0.0132,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0650, -0.7642, -0.4113, -0.4674,  0.0410, -0.0618, -0.1049,  0.0732,
        -0.2797, -0.0261,  0.0298, -0.1453,  0.0453, -0.5654, -0.4634, -0.0124,
        -0.1004, -0.4860, -0.0034, -0.2899, -0.3376,  0.0242,  0.0897,  0.0033,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2074,  0.0568,  0.0218,  0.0153, -0.0905,  0.0308, -0.2115, -0.3705,
        -0.0039,  0.0072,  0.0189, -0.0361, -0.0495, -0.4030,  0.0332,  0.0376,
        -0.3074, -0.2701, -0.2918, -0.3649,  0.0356,  0.0189, -0.2474, -0.4312,
        -0.0245, -0.1959, -0.3669,  0.0036, -0.2203, -0.4934, -0.0365, -0.0101,
        -0.1652,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6030e-02, -4.0069e+00,  9.3995e-03, -6.4921e-01,  2.3954e-02,
         1.0782e-01,  1.9030e-03, -2.0998e-01, -1.2606e+00,  8.0829e-02,
         1.4234e-01,  4.0743e-02,  1.6185e-01, -9.9897e-01, -1.6693e-01,
        -1.6758e-01, -1.8730e-01,  5.8523e-02,  1.5186e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3361, -0.0844,  0.1661, -0.1375, -0.5475,  0.0650, -0.6530, -0.4428,
        -0.8201, -0.0443, -0.0197,  0.0640,  0.0508,  0.1173,  0.0308,  0.0475,
        -0.0209, -0.4809, -0.3581, -0.1449, -0.2668, -0.5528, -0.1250, -0.0067,
        -0.2051,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2937, -1.9946, -0.7986,  0.0551, -0.0047, -1.0745, -1.6574,  0.4335,
        -0.1969, -0.4034, -0.0498, -0.0977, -0.4703, -0.0492, -0.1035,  0.0239,
        -0.1679,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.0252,  0.0108, -0.0191,  0.0136,  0.0245, -0.0503, -0.0507, -0.0257,
         0.0273, -0.4173, -0.1433,  0.0891,  0.1244,  0.0540, -0.0344, -0.1167,
        -0.8653, -1.4175, -0.1261, -0.1862, -0.0277, -0.3549, -0.6802, -0.0724,
         0.0487, -0.0907,  0.0458,  0.0045,  0.0400,  0.0487, -0.1789,  0.0473,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6250e-01, -1.2211e+00, -8.6189e-01, -4.6468e-02, -1.5873e-02,
        -7.0387e-02, -4.4724e-01, -9.1855e-01, -1.9136e-02,  8.1643e-02,
         9.1223e-02, -9.0639e-03, -3.0216e-02, -5.2551e-02, -4.6496e-02,
         1.0220e-01, -1.2339e-01, -8.8645e-02, -4.6057e-01, -2.2003e-01,
        -3.2374e-01,  7.0630e-04, -3.1610e-01, -5.7735e-01,  6.1197e-02,
        -1.5118e-01, -4.9924e-02, -1.4592e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2928, -0.1359, -0.0585,  0.0062,  0.0561, -0.8673, -1.0591, -0.3269,
        -0.1464, -0.1247, -0.0642, -0.6037,  0.0273,  0.1415, -0.0232, -0.6006,
        -0.0679, -0.5463, -0.0279, -0.1036, -0.0326,  0.0138, -0.1893,  0.0977,
        -0.4487,  0.0411,  0.0353, -0.0382, -0.2376,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1249, -0.9214, -0.0482, -0.4158, -1.0268, -0.0442,  0.0171,  0.0395,
        -0.0529,  0.2257, -0.0182, -0.1219, -0.3525, -0.7702,  0.2562, -0.3684,
         0.3125, -0.4780, -0.0778, -0.2264,  0.0752,  0.0767, -0.2504, -0.0665,
        -0.2161, -0.2964, -0.2641, -0.0146, -0.0116, -0.0459, -0.0078, -0.1781,
         0.0118,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1016e+00, -2.8315e+00, -2.0977e-01, -7.5376e-01, -9.4924e-02,
        -5.9688e-01, -1.9026e-02, -2.3191e-01, -1.3181e-01, -5.5617e-01,
        -6.8030e-01, -5.0758e-02, -3.6040e-02,  7.5254e-02, -2.5778e-02,
         1.0030e-01, -1.4936e-01,  8.3211e-02, -4.8601e-01,  2.4001e-03,
        -3.2502e-01,  1.9375e-01, -9.4448e-02,  2.0875e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0939, -1.1532, -0.8488,  0.0043, -0.5344, -0.0335, -0.1271, -0.3324,
         0.0735, -0.2707,  0.0292, -0.4136, -0.4472, -0.0948,  0.0277,  0.0419,
         0.1303, -0.2200,  0.0026, -0.6377, -0.3668, -0.0106,  0.1879,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5351e-01, -1.6256e+00,  5.7364e-02, -8.8988e-02, -1.8083e-01,
        -2.6869e-01,  1.2432e-01,  4.1471e-02,  1.0710e-02, -6.2872e-03,
         6.5129e-03,  3.0863e-02, -8.6511e-02,  5.1689e-02,  8.7337e-04,
         1.4126e-02, -3.8572e-01, -3.0455e-01, -1.8405e-01,  2.0522e-02,
        -6.1333e-02, -1.6518e-01, -5.2913e-02, -8.0696e-02, -4.0805e-01,
        -1.2942e-01,  2.0602e-02, -7.7446e-03, -3.6386e-02, -3.9889e-01,
        -3.4225e-01,  1.2420e-02, -2.4759e-01, -6.1614e-02, -2.6737e-01,
        -3.5001e-01, -2.1448e-03, -1.0092e-01,  5.1620e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3448, -0.1269, -0.1088, -0.2281, -0.7893,  0.0287, -0.0199,  0.0840,
         0.0463, -0.0218, -0.0370, -0.0042,  0.0300, -0.0093, -0.1055, -0.3105,
         0.0089, -0.0661,  0.0305,  0.0150, -0.2682,  0.0140,  0.0186,  0.0261,
        -0.3916, -0.5805,  0.0379, -0.2550, -0.0602, -0.2213, -0.0665, -0.0342,
         0.0841, -0.1193, -0.0567, -0.0064, -0.1234, -0.6229,  0.1210,  0.0392,
        -0.0569,  0.0496, -0.0332,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4961e-01, -1.5685e+00, -2.8261e-02, -5.8791e-02,  2.2453e-02,
        -3.8991e-02, -1.7062e-02, -2.4921e-02, -4.0777e-01,  4.1635e-02,
         7.3945e-02, -7.2783e-03, -2.4881e-01, -1.4028e-02, -3.5123e-02,
        -4.6984e-02, -5.1126e-01, -4.0105e-01, -3.6894e-02, -3.4049e-02,
        -7.9213e-03, -7.5230e-03, -4.6069e-02,  1.5015e-02,  2.7164e-02,
         5.7884e-03,  4.3170e-02, -1.1885e-02,  2.9802e-02, -8.4027e-03,
         5.7788e-02, -1.1712e-01, -2.9937e-01,  4.3912e-02, -6.4364e-02,
        -6.7989e-02, -3.0007e-02, -2.2097e-01, -3.8999e-01, -7.9368e-02,
        -2.2550e-01, -3.2350e-01, -8.1818e-03,  1.7083e-02, -4.0498e-02,
         9.7865e-03, -2.6156e-03,  7.7446e-04,  2.7143e-02, -1.2938e-02,
         7.4593e-02, -2.7776e-02, -7.3807e-03,  1.1710e-02,  2.1140e-02,
        -3.0930e-02, -2.4302e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0842, -4.1288, -0.2958, -0.5042, -0.4536,  0.0866, -0.7605, -1.2564,
        -0.1884, -0.0698, -0.4785, -0.0751,  0.5689,  0.6416,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6046e-01, -1.8406e+00, -5.7932e-01, -6.9766e-04, -1.1512e-02,
        -4.7069e-01, -8.1128e-02,  5.1527e-02, -2.4998e-01,  4.1365e-02,
        -3.6641e-01, -6.4357e-01, -1.9090e-01, -1.8355e-01, -5.0461e-01,
         5.0743e-02, -2.7420e-01, -3.5631e-01,  3.2235e-01, -4.8729e-01,
        -9.8222e-01, -1.2650e-01, -2.0980e-01,  9.1443e-02,  6.3760e-02,
         9.9797e-02, -1.6592e-02,  1.1565e-02,  1.3209e-01,  7.3213e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0215, -2.8018, -0.0359, -1.7297, -1.4502, -0.2134, -0.2376,  0.0367,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 4.4767e-01, -1.4282e+00, -1.8374e-01, -2.1143e-01, -2.2387e-01,
        -8.6612e-01,  9.6078e-03,  1.5074e-01, -5.9009e-02, -2.2468e-02,
        -4.5195e-02,  6.4021e-02, -4.5528e-01, -7.4587e-01, -2.1937e-02,
        -2.2871e-02,  8.3740e-02, -4.7321e-02,  3.6804e-02, -2.2407e-04,
        -4.9211e-02,  1.7846e-02, -1.1811e-02,  6.2905e-03, -4.5017e-01,
         1.4091e-02, -3.8040e-01, -7.0325e-02, -3.1633e-01, -8.2141e-01,
         3.0979e-02,  4.1052e-02, -2.1730e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5021e-02, -1.1282e+00, -6.4511e-01, -1.4410e-01, -8.5385e-03,
        -2.0640e-01, -1.1677e-01, -4.5712e-02,  2.6929e-02, -1.7506e-01,
        -6.3749e-02, -8.3180e-02, -1.2522e-01, -3.8875e-03, -2.0019e-02,
         1.9265e-02, -9.6169e-04,  2.8954e-02, -9.0125e-02,  2.4203e-02,
        -7.5838e-03,  6.6437e-03,  5.0943e-02, -1.2248e-02,  3.5007e-02,
         2.9584e-02,  3.2534e-02, -2.3107e-01, -2.2798e-02,  8.2147e-03,
         3.2316e-02, -7.7614e-03, -1.4859e-01, -2.5400e-01, -6.7505e-03,
         1.2615e-02, -1.0371e-01,  5.4781e-02,  1.5425e-03,  1.3856e-02,
         1.0016e-02,  4.4633e-02, -6.0482e-03, -2.2349e-01,  3.1733e-02,
         3.6234e-02, -3.1709e-02,  3.5507e-03, -2.5593e-03,  2.1884e-02,
         3.2237e-02, -1.7553e-01, -9.5383e-02, -1.3257e-01, -4.9857e-02,
         1.6155e-02, -9.0766e-03,  4.2533e-02,  2.7609e-02, -1.4958e-01,
        -1.8356e-02, -8.7446e-02, -3.3812e-01,  1.4204e-02, -1.5091e-03,
        -6.6773e-03, -3.8486e-02,  3.0604e-03,  1.2743e-03, -2.6319e-02,
        -3.1085e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6404, -0.1059, -0.2312,  0.0424, -0.5173,  0.0152, -0.9518,  0.0276,
         0.0250,  0.0652,  0.1228,  0.0455,  0.0855, -0.0135,  0.0472,  0.0773,
         0.0047,  0.1539, -0.1251, -0.8917,  0.0201,  0.0500,  0.1219, -0.5187,
         0.1234, -0.3205, -0.1058,  0.0206,  0.0085,  0.0089, -0.0491,  0.0700,
        -0.8601, -0.7130, -0.1224, -0.0867, -0.6687, -0.0834,  0.0785,  0.0013,
        -0.1136, -0.0958,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4708e-01,  3.5634e+00,  1.8049e-01,  3.1797e-01,  1.6818e-01,
        -1.3138e-01, -1.3654e-01, -1.1970e-02,  3.8369e-01,  6.0642e-01,
         2.1572e-01,  6.5616e-01,  7.4353e-01, -4.1005e-04,  2.0142e-02,
         9.6686e-02,  1.4449e-01,  5.6083e-01,  1.6764e+00, -1.0584e-01,
         1.4870e-01,  2.3453e-01, -7.5526e-02, -2.0351e-01,  4.0917e-01,
         5.0447e-01,  3.5651e-02,  6.0963e-03,  1.1739e+00, -4.6640e-02,
        -7.4711e-02,  1.4669e-01, -1.3024e-02, -4.3602e-02,  1.2600e-01,
         4.2212e-01,  1.9285e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3717, -3.3302,  0.0852, -0.5138, -0.0701,  0.0353, -0.0783,  0.0060,
        -0.0520,  0.0073, -0.5566,  0.2324, -0.2866, -0.1245,  0.0939,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3844, -0.0959,  0.1705,  0.0386, -1.0612, -1.7004, -0.0413, -0.9532,
        -1.1011, -0.1726,  0.1606,  0.0507, -0.4632,  0.0487,  0.2682, -0.1203,
         0.0593,  0.3207,  0.0507,  0.4227,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1703,  0.0088,  0.0060,  0.0711, -0.8536, -0.0371,  0.0316, -0.1492,
        -0.0390,  0.0165, -0.5083, -0.5203, -0.0614, -0.1144, -0.2538, -0.1719,
        -0.0479, -0.1622, -0.0438, -0.1321, -0.0373,  0.0501, -0.0781, -0.0197,
         0.1288, -0.3048, -0.5367, -0.0447,  0.0342, -0.1199, -0.2464,  0.0152,
         0.0193, -0.0314, -0.2374, -0.3177,  0.0367, -0.1470, -0.0494,  0.0148,
         0.1584,  0.1386,  0.3633,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0606, -0.0850, -0.1411,  0.0520, -0.3040, -0.0839, -0.3384, -0.8129,
        -0.2556,  0.0303, -0.0566,  0.0425, -0.0524, -0.4123, -0.5750, -0.0080,
        -0.0117, -0.0158,  0.0165,  0.0944, -0.2440, -0.1052,  0.0448, -0.0125,
        -0.0141, -0.0099, -0.0030, -0.0343, -0.5414, -0.0484, -0.0550, -0.1953,
        -0.0350, -0.2001, -0.2364, -0.0952,  0.1304,  0.0458,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1006, -1.7539, -0.8898, -1.2102,  0.1281, -0.5099,  0.0989,  0.1325,
        -0.1163, -0.4946, -0.0479, -0.0824, -0.2034,  0.0213,  0.0178, -0.0259,
         0.1446, -0.4580,  0.0167,  0.1113,  0.1412,  0.1776,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1112, -1.4490, -0.1723,  0.0602,  0.0302, -0.1591, -0.1791, -0.0059,
        -0.1481, -0.6002, -0.1304, -0.0073, -0.0184, -0.0098, -0.3166, -0.3796,
        -0.4793, -0.1281,  0.0038, -0.2909,  0.0024, -0.0353, -0.2890, -0.1696,
        -0.0553,  0.0162, -0.0816, -0.0089, -0.0400, -0.3861,  0.1070, -0.0498,
         0.1252, -0.0493, -0.1483, -0.3466,  0.0662, -0.0257,  0.0058, -0.1278,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2211e-02, -5.0479e-02,  6.7204e-03,  2.7197e-02, -9.0759e-02,
        -1.8116e-01, -6.7023e-02, -8.7035e-03, -2.3288e-01,  3.4532e-02,
         2.7876e-02, -4.2488e-04, -3.3342e-02, -8.1181e-01, -6.5298e-01,
        -6.9284e-02,  9.8195e-02, -1.4927e-03, -4.1601e-01, -3.9137e-02,
         1.2955e-01,  2.2023e-02,  9.0868e-02, -1.0946e-01, -4.8152e-02,
        -3.5520e-01, -1.4619e-02, -2.0350e-01, -5.8546e-01, -9.4966e-02,
        -1.6167e-01,  9.5564e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1943, -2.3221, -0.0292, -0.0161, -0.0855,  0.0815,  0.0305, -0.3899,
        -1.1699, -0.0512, -0.1430, -0.4240,  0.0439,  0.0717,  0.0049, -0.0235,
        -0.4872, -0.0935,  0.0534,  0.0070,  0.0060,  0.0252, -0.1099, -1.0323,
        -0.0936, -0.2095, -0.3235, -0.0177, -0.1265, -0.0683, -0.0663,  0.0094,
        -0.0063, -0.1273, -0.0305,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 1.0788, -0.3369, -1.0234, -0.7288,  0.1823, -0.2304, -1.0493, -1.0916,
         0.2214, -1.1643,  0.0840, -0.1770, -0.0284, -0.6418, -0.1168, -0.1496,
        -0.5223, -0.1121,  0.1226, -0.2690,  0.6926,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0658, -2.7906,  0.0903, -0.3597, -0.8377, -0.5125, -0.2873, -0.3404,
        -0.8648,  0.3844,  0.0032, -0.0057,  0.0703, -0.0365, -0.5789,  0.0946,
        -0.2306, -0.0814, -0.0197,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2272, -0.1937, -0.5162,  0.1538, -0.3333, -0.0497, -0.0835,  0.2024,
        -0.0153, -0.3335, -0.7080, -0.0946, -0.3434, -0.7003,  0.0710, -0.2827,
        -0.0863, -0.2990, -0.2627, -0.1519,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5669, -2.5339,  0.0901, -0.3151,  0.0776,  0.0171,  0.0212, -0.3503,
        -0.0559, -0.0714,  0.0066, -0.1826,  0.0634, -0.3515, -0.0275, -0.4586,
        -0.6707, -0.1599, -0.2232, -0.3626,  0.0485,  0.3193,  0.0517,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4372, -1.0872, -0.3415, -0.2816, -0.0136, -0.4755, -0.1570, -0.7023,
        -0.6838, -1.0218, -0.0826, -0.4412, -0.2087, -0.7487,  0.2417, -0.0037,
         0.0803, -0.0113, -0.2458, -0.2948, -0.3181,  0.0172,  0.1375, -0.0278,
         0.0390,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7779e-01, -3.2494e+00, -4.9643e-02, -6.5822e-02,  2.8327e-02,
        -6.6360e-03,  2.6771e-01, -1.9680e-02, -3.3374e-02, -4.2853e-03,
        -2.0467e-01,  1.8017e-02, -9.3214e-02,  1.5213e-01, -5.1071e-02,
         1.1695e-01, -7.8647e-04,  9.6987e-03,  2.0973e-01, -2.1172e-01,
        -5.7776e-01, -1.1944e+00, -7.4603e-01, -1.5281e-01, -3.6412e-02,
        -1.9915e-01, -2.0224e-01, -1.2352e-01, -1.7686e-01, -4.1399e-01,
        -1.8954e-01,  7.1205e-02, -1.6067e-01,  4.0007e-02,  6.9035e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1018, -0.1232,  0.0932, -0.7448, -0.0743,  0.1165, -0.4652, -0.0880,
        -0.8705,  0.0384, -0.0392, -0.1566, -0.0919, -0.5014, -0.6142, -1.1110,
         0.0012, -0.6379,  0.0392,  0.0885, -0.2934, -0.0729,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2661, -0.2817,  0.0710, -0.1103, -0.0866, -0.2341, -0.1629, -0.0856,
         0.0076, -1.9774, -0.1515, -0.1650, -0.1912, -0.1330,  0.0986, -0.3734,
         0.2881, -0.1404,  0.1222,  0.1466, -0.4338,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3851e-01,  4.9943e+00, -1.1532e-03, -4.2967e-02,  4.4730e-02,
        -4.6793e-03,  1.0655e+00,  1.1764e+00, -3.2683e-01, -1.3231e-01,
         1.8924e-02, -2.2835e-01,  5.5889e-03,  7.2346e-01, -3.9913e-02,
         5.6003e-01, -7.0470e-02, -3.0865e-01, -2.1664e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3595,  0.0643, -0.2101,  0.0476,  0.0873, -0.2713,  0.0270,  0.0189,
         0.0570,  0.0051,  0.0575, -0.0941, -0.0449, -0.1555,  0.0206, -0.0583,
        -0.2352, -0.3126,  0.0855, -0.2407, -0.0041,  0.0023, -0.2513, -0.0042,
        -0.5035, -0.0599, -0.1094, -0.1576, -0.0019, -0.7515, -0.0037,  0.0194,
        -0.3594, -0.4196,  0.0530, -0.0242, -0.0021, -0.0194,  0.0056, -0.1321,
        -0.0126,  0.0171,  0.0064, -0.0371, -0.0296,  0.0391, -0.0276],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0529, -2.0080, -1.1128, -0.0336,  0.0943, -0.0362,  0.0183, -0.0652,
        -0.3447, -0.0050,  0.0384, -0.0038,  0.1120,  0.1536,  0.0277,  0.0314,
        -0.4312, -0.0452,  0.0202, -0.0886,  0.0492,  0.1680, -0.6637, -0.6227,
         0.1376,  0.0887, -0.0556,  0.0311,  0.0238,  0.0463,  0.0022, -0.0066,
         0.1962,  0.0194,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6684, -3.3784, -0.0091,  0.1888,  0.1910, -0.4293,  0.0936,  0.0424,
         0.0621, -0.3024,  0.2195,  0.0909, -0.3525, -1.0973, -0.0047,  0.0063,
        -0.0498, -0.1175, -0.1245, -0.6326, -0.1082,  0.0559, -0.1519,  0.1473,
        -0.0597,  0.0804,  0.0249, -0.0329, -0.0412, -0.3144,  0.0337,  0.1610,
         0.0337,  0.0643,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.3861,  4.4065,  0.1152,  0.1782,  0.1798,  0.6815, -0.3736, -0.0100,
        -0.1683,  0.8021,  0.1776,  0.0321,  0.2721,  0.9624,  0.0714,  0.2466,
        -0.3467,  0.1515,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8185e-01,  4.1637e-02,  9.4975e-02, -3.1139e-01, -3.6860e-02,
        -4.6110e-02,  2.2486e-01, -1.1325e-01, -7.5017e-01, -2.3748e+00,
        -3.7552e-01, -1.0753e-01, -1.5755e-01, -1.9390e-01,  5.7316e-02,
        -1.1289e-01, -1.7024e-01, -2.5381e-01, -1.9665e-02, -1.6707e+00,
         1.7263e-03,  4.2652e-01,  1.2572e-01,  1.5300e-01,  2.9236e-02,
        -2.1613e-01,  9.8684e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5400e-02,  6.8817e-02,  3.9656e-03, -3.7185e-02,  4.7218e-04,
        -2.6894e-02, -5.2629e-02, -1.0795e-01, -7.6096e-01,  1.9213e-01,
         6.1252e-02, -1.9317e-01, -8.8694e-01, -9.0943e-02,  6.3488e-03,
        -1.8561e-02, -7.2718e-02, -1.3121e-01,  7.9588e-02, -1.1349e+00,
         2.7736e-02,  1.4330e-01, -5.2125e-01, -3.7025e-02, -3.3336e-02,
        -6.4698e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4492, -2.1322, -0.0276, -0.7508, -1.2462, -0.1691, -0.1005,  0.2735,
        -0.6012, -0.3419, -0.7202, -0.2405,  0.1863, -0.1368,  0.0892, -0.6273,
        -0.3005, -0.2792, -0.1652,  0.3853,  0.3730,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1998, -1.6044, -1.4919, -1.5421, -0.0540, -0.2819, -0.8771,  0.0870,
        -0.3916, -0.0269, -0.1554, -0.3692,  0.2939, -0.0667,  0.1258,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3506,  0.0334, -0.0143, -0.2451,  0.3496,  0.1039, -0.2373,  0.2925,
        -0.2144, -0.0633, -0.2569, -0.4382,  2.7078,  0.5504,  0.1405,  0.3010,
         0.0719,  0.4933,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2799, -1.5784, -0.6733, -0.8394, -0.0070,  0.1822, -0.4165, -0.5913,
        -0.1477, -0.4878, -0.3940, -0.2654, -0.0482, -0.4741, -0.0151, -0.0184,
         0.1163, -0.1922,  0.2318,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8624e-01, -1.1363e+00, -7.9343e-01, -3.1502e-01, -6.3445e-02,
        -2.1623e-01,  5.4293e-02, -4.5004e-02, -1.2218e-01, -4.4338e-02,
        -5.8582e-04, -6.3241e-02, -1.7258e-01,  5.5424e-02,  5.9440e-03,
        -5.3872e-02, -2.7625e-01,  3.3357e-02,  6.5079e-02,  9.4917e-02,
        -3.3510e-01,  2.9309e-02, -3.7220e-01, -2.4740e-02, -3.5873e-01,
        -4.2353e-03, -3.0403e-01,  1.2037e-01, -3.1907e-02, -1.7346e-01,
         3.5509e-02,  6.6127e-02, -3.5008e-01,  8.2694e-04, -1.7180e-01,
        -9.1088e-02, -4.1209e-01,  1.1572e-01, -9.0619e-02, -7.1402e-02,
        -2.6737e-02, -1.7801e-02, -1.2241e-01, -1.6851e-01], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9311e-01, -2.0955e+00, -1.5854e-01, -7.3964e-01, -7.8974e-01,
         1.4773e-01, -2.6020e-01, -1.5826e-01, -7.1817e-01, -3.3387e-02,
        -3.1361e-01, -4.9622e-02, -2.9059e-02, -1.6575e-02,  3.6129e-02,
        -2.0096e-03, -6.1199e-02, -6.8500e-01, -1.3113e-02, -1.7688e-01,
        -3.7756e-01, -4.6330e-02, -3.3781e-02,  2.7275e-02,  2.8984e-02,
         8.1752e-03,  3.0966e-02, -4.8027e-01,  4.3844e-03, -6.1473e-03,
        -2.9309e-02, -2.1145e-01,  6.8755e-02, -1.8174e-01,  7.5751e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3528,  4.6202,  0.1773,  0.9419,  0.0276, -0.2542,  1.4228,  0.6060,
         0.4522, -0.0097, -0.0871,  0.4944,  0.9399,  0.0329, -0.1750,  0.5130,
         0.0281, -0.0231, -0.1629, -0.0239, -0.0395, -0.0312,  0.4006,  0.1321,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1201, -1.5511, -0.0160, -0.6223,  0.0845, -0.1640,  0.1474, -0.2707,
        -0.8914, -0.0024, -0.2443,  0.1328,  0.0350, -0.3019, -0.7833, -0.0977,
        -0.0150, -0.0524, -0.4112, -0.0149, -0.0485, -0.3439,  0.0498, -0.3623,
        -0.0689, -0.3497, -0.3661, -0.2539, -0.1522, -0.1652, -0.1897, -0.0561,
         0.0898, -0.0869,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0477, -0.0104, -0.0761, -0.1724,  0.0058,  0.1002, -0.6445, -0.0982,
         0.0706,  0.0079,  0.0053, -0.0130, -0.3390, -0.0734, -0.5360, -0.7854,
        -0.0225, -0.0368, -0.0060,  0.0990, -0.0139, -0.0110, -0.0362, -0.2497,
        -0.2479, -0.0736, -0.1504,  0.0352,  0.0385,  0.0940,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 0.2583,  0.0613, -0.1820, -0.9120, -0.2028,  0.0475,  0.0030,  0.0308,
        -0.7280, -2.0391, -0.4944,  0.2030,  0.3561,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1642,  0.0273,  0.0241,  0.2144, -0.0506, -0.2350, -0.1957, -0.7379,
        -1.0721,  0.1811,  0.0294, -0.5709,  0.0582, -0.2967, -0.8520, -0.0044,
        -0.0052, -0.1326,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9169,  3.3148,  2.0903, -0.6407,  1.2140, -0.0905,  0.1255,  0.4002,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0305, -1.9726, -0.1693, -0.3197, -0.1721,  0.0633, -0.1276,  0.0114,
        -0.0552, -0.0983, -0.0355,  0.0948, -0.1156,  0.0872,  0.0225, -0.1125,
         0.0909,  0.0449, -0.0064,  0.0266,  0.0448, -0.1415, -0.4148, -0.1985,
         0.0411, -0.2822,  0.0174, -0.0632, -0.5625, -0.1693,  0.0482, -0.2485,
        -0.0729, -0.8445,  0.0203, -0.2713, -0.4407, -0.0710,  0.1036,  0.0330,
        -0.1638,  0.0149, -0.1645,  0.0951,  0.1218, -0.2069,  0.1764],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0771, -1.0415,  0.3697, -2.1636, -0.0815, -0.1147, -0.4832,  0.0619,
         0.1402,  0.1034,  0.3284,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0281e-01, -2.6925e+00, -5.7247e-01, -1.6362e+00, -5.5529e-01,
         2.0773e-01, -4.7933e-01, -1.4592e+00, -5.9846e-02, -3.5959e-01,
        -5.6114e-02,  1.2151e-01, -4.1325e-01, -1.0037e-01,  6.5150e-02,
        -2.0938e-04,  7.3047e-02,  8.0194e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.8516, -0.6007, -1.9269, -0.2339,  0.1984,  0.1137, -1.0345, -0.0788,
        -0.5607, -0.2209,  0.0262, -0.1384,  0.0959, -0.0400, -0.2422,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3996, -3.3469, -0.1586, -0.1703,  0.2611, -1.2351, -1.4077, -0.3287,
        -0.2771, -0.6908, -1.0969,  0.0628,  0.0706, -0.5346, -0.1865,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3633,  0.2262, -0.0705,  0.3875, -0.0145, -0.0672, -0.0655, -0.1119,
        -0.3249, -0.1377, -0.0856, -0.5662, -1.3251,  0.0142,  0.1518, -0.2112,
        -0.3468, -0.1633,  0.1299,  0.0164, -0.8581, -1.4243,  0.2609,  0.1586,
        -0.1622, -0.1309,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6229, -0.0635,  0.1374,  0.3442, -0.0330,  0.2794,  0.1440, -1.6528,
        -1.9696, -0.2270, -0.0443,  0.1063,  0.2396,  0.3050, -0.2514,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1206e-01, -3.3482e+00, -5.1904e-02, -4.5167e-01, -7.2543e-01,
        -1.7423e-01, -3.3425e-01, -1.0681e-01, -4.3119e-02, -1.2367e+00,
        -3.2840e-02,  1.4338e-01, -2.6139e-01, -1.0135e-01,  6.6297e-02,
         2.1259e-02, -2.3799e-01, -3.1916e-03, -2.1440e-01,  4.6858e-02,
        -2.2644e-01,  4.2032e-02,  8.4779e-03, -1.3821e-01,  4.4638e-01,
         4.0429e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1423,  0.1006,  0.0681,  0.0771,  0.0308, -0.2265, -0.0446,  0.0427,
        -0.2040, -0.2500, -0.6437, -0.0671, -0.0793,  0.0335, -0.6195, -0.5312,
         0.0362, -0.2106,  0.0481, -0.2745, -0.0482, -0.1718, -0.0215, -0.1079,
         0.0404, -0.0284,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-0.4861, -3.2092, -0.0339, -0.0096, -0.1850, -0.0110, -0.2734, -0.6779,
        -0.0407, -0.2413,  0.0060, -0.2500, -0.6043,  0.0107, -0.1862,  0.0275,
         0.0150,  0.0896, -0.1409, -0.0034,  0.0768, -0.1654, -0.0691, -0.3515,
        -0.5355,  0.1150, -0.2711, -0.0333,  0.1310,  0.0050,  0.0351, -0.2452,
        -0.0046,  0.0491,  0.0838, -0.0810,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0489, -0.3830, -0.3440,  0.1102, -0.5115, -0.0741, -0.4195, -0.5871,
        -0.0189,  0.0292, -0.2651, -0.0128, -0.0071,  0.0413, -0.0162, -0.1365,
         0.0564, -0.0485,  0.0106, -0.0048, -0.0482, -0.3821, -0.0513, -0.0727,
        -0.3298, -0.5058,  0.0198, -0.2659, -0.2651, -0.0434, -0.0698, -0.2934,
        -0.4211,  0.1173, -0.3369,  0.1646,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0316,  0.1111, -0.1167, -0.1903,  0.0724, -1.0524, -0.3026,  0.0751,
         0.0446,  0.0904, -0.0211, -1.1327, -1.3727, -0.2138,  0.0698, -0.1446,
         0.0167,  0.0440, -0.1217,  0.5738,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2686e-01, -8.4048e-01, -1.4425e-01, -1.9010e-01,  1.2175e-01,
        -2.3809e-02,  1.1793e-01, -2.1468e-01, -2.1128e-02, -1.1089e-01,
         1.5984e-02, -4.9118e-02,  2.1195e-02,  1.0433e-01, -1.7831e-01,
        -4.5324e-01, -8.5033e-02, -3.1214e-01,  2.3898e-02, -4.4630e-03,
         6.9789e-02,  3.6859e-02,  3.5493e-02,  1.5974e-01,  4.0882e-02,
         6.5262e-02,  4.6484e-02, -2.3638e-02,  9.9938e-02, -2.3725e-01,
        -5.2843e-01, -8.1177e-02, -8.2920e-02, -3.0704e-01,  6.1190e-02,
         4.1712e-02,  5.6467e-02, -6.1458e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1452, -1.8540, -0.8800, -0.4384, -0.0662,  0.0271, -0.0773,  0.0441,
         0.0371, -0.0416, -0.3225, -0.4515,  0.0045, -0.0226, -0.0656, -0.2922,
         0.0286, -0.6188, -0.0030,  0.0143,  0.0507,  0.0264,  0.1005,  0.0996,
         0.0585, -0.0378, -0.0589, -0.1436, -0.4104, -0.0319, -0.0617, -0.0815,
         0.0348,  0.0143, -0.3867,  0.0262, -0.1454, -0.0567, -0.0573, -0.0404,
         0.0256], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2896, -4.3202, -0.0982,  0.3719, -0.0177, -1.0969, -0.3780,  0.0077,
        -0.1255,  0.0259, -0.0145, -0.6016, -0.9229,  0.0628,  0.0791, -0.3849,
        -0.0049, -0.4417,  0.0709,  0.1602,  0.0365, -0.6799,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1216e-01,  5.3812e-02,  2.4841e-02, -7.8500e-03, -1.0561e-02,
        -5.7751e-02,  4.8517e-02,  1.4282e-02, -1.0355e-01, -4.9746e-03,
        -6.6640e-02, -4.5252e-01, -2.6622e-02, -2.4176e-01, -8.0485e-02,
        -4.6979e-01,  1.1003e-01, -1.8509e-01,  4.5412e-02, -4.5522e-01,
        -5.1420e-01,  6.0496e-02,  6.3624e-02, -3.2737e-01, -2.5121e-03,
        -3.4820e-02, -3.0750e-02, -1.4007e-01, -5.5339e-01,  5.4059e-02,
        -1.5851e-01, -2.5492e-01, -9.0569e-02,  4.3546e-04, -1.4580e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7075e-01, -9.6285e-02, -1.3931e+00, -1.1282e-01, -7.5093e-01,
        -8.4353e-02,  1.0582e-01,  2.7313e-02,  1.9601e-01, -2.9296e-02,
        -3.0065e-01, -2.4356e-02,  2.2527e-02,  5.1774e-02, -2.3730e-02,
         1.5570e-01,  1.6451e-01, -8.0728e-03, -3.5615e-01,  1.0728e-03,
        -2.0739e-01,  2.8383e-02, -2.5060e-01, -6.3249e-01, -7.6657e-02,
        -2.1899e-01, -4.6697e-01,  2.9029e-03, -1.4787e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9315e-02, -1.9309e+00, -9.5611e-01, -3.7997e-01, -1.0035e-01,
        -7.8341e-01, -7.7430e-04, -2.8368e-02, -1.9786e-01,  1.2050e-01,
        -1.9176e-01, -3.5678e-01,  1.1779e-01, -8.4721e-02,  9.8935e-02,
        -9.7759e-01, -4.5774e-01,  2.8163e-02, -4.3401e-01,  2.5616e-02,
        -2.9880e-01, -9.6183e-02, -4.0466e-01,  2.6150e-01,  1.6049e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0340,  0.0830,  0.0249, -0.0210, -1.0570, -0.0691, -0.1252, -0.6513,
        -0.7395, -0.0900, -0.0328, -0.0017,  0.1229,  0.1081, -0.5323, -0.8296,
         0.1658, -0.1592, -0.0182,  0.0425,  0.0014,  0.1300,  0.0993,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0635, -0.1782,  0.0057,  0.0287,  0.0021, -0.0413,  0.0533, -0.0456,
         0.0298,  0.0018, -0.0127, -0.0296, -0.5549, -1.0330,  0.0060, -0.0690,
        -0.0668, -0.6962, -0.0515,  0.0479, -0.0984,  0.0216,  0.0030,  0.0565,
         0.0817, -0.0459, -0.9672, -0.0940, -0.6540, -0.1099, -0.4351, -0.0144,
        -0.1698, -0.2128, -0.2761, -0.1124, -0.0514, -0.1875, -0.0112,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1637, -0.0932,  0.1605, -1.0230, -2.0422,  0.0423, -0.3987, -1.4593,
        -0.9912,  0.5157, -0.2684, -0.0550,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.1363, -0.9399, -1.2849, -0.0745, -0.0773, -0.0789, -0.4232, -0.0690,
         0.0242, -0.3472, -0.0224, -0.0504,  0.0323, -0.1859, -0.8128,  0.0027,
        -0.0582, -0.1979, -0.0237,  0.0088, -0.1000, -0.1474,  0.0513,  0.0612,
        -0.0360,  0.0672,  0.0494, -0.0188, -0.1037,  0.0071,  0.0596,  0.0569,
        -0.1914,  0.0104,  0.0887,  0.1077, -0.3465, -0.0464,  0.0074,  0.0148,
         0.0646,  0.0843,  0.0703, -0.0510, -0.0634,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3773, -6.9852, -0.7150, -1.1155, -0.1476,  0.0280,  0.2747, -0.1312,
        -1.4571, -0.2511, -0.2459,  0.1269,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1581, -4.6333, -0.0131, -1.3358, -0.3152,  0.0557,  0.0482, -0.3857,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4730e-02, -1.6766e+00,  4.7865e-03, -6.6793e-02, -5.5286e-01,
        -3.6871e-02, -3.0013e-02, -1.2128e-01, -2.0202e-01, -5.7503e-01,
        -5.9634e-02, -7.9127e-04, -3.6761e-02,  1.0219e-02,  4.0644e-02,
        -4.1534e-03, -8.3330e-02,  1.4823e-02,  3.8252e-02,  1.2636e-02,
         1.6880e-02,  2.2462e-02, -4.3511e-02, -4.9425e-02, -6.4796e-02,
         1.6455e-02,  8.0856e-03, -2.5279e-02, -2.1996e-01,  5.8881e-02,
         1.9832e-04, -7.8557e-02,  1.5066e-02,  2.4412e-02, -2.6963e-02,
         1.1974e-02, -7.1026e-03, -1.2998e-02,  4.7911e-03,  2.3088e-02,
        -2.4355e-02, -9.1955e-03, -3.8982e-02, -4.1474e-02,  3.9497e-02,
         6.8858e-02, -5.9645e-01, -1.5577e-02, -4.1961e-01, -7.8935e-01,
        -2.1974e-01,  4.9451e-02, -3.7609e-01,  2.8214e-02, -7.0253e-02,
        -1.7423e-02, -1.0209e-01, -4.5668e-02,  3.1012e-02,  1.7382e-01,
         2.2619e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1253, -0.0163,  0.0545, -0.1809, -0.6150,  0.1245, -0.2600,  0.0385,
        -0.0820,  0.0832, -0.0730, -0.5013, -0.0867, -0.3986, -0.7982, -0.0056,
        -0.1187,  0.0700, -0.2052, -0.0501, -0.3312, -0.4577, -0.0826,  0.0658,
        -0.2097, -0.1226, -0.0173, -0.1062, -0.0531, -0.2144,  0.0392,  0.0876,
        -0.0277,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2153, -0.2608, -1.3522, -2.0613, -0.1680,  0.0456, -0.5889,  0.1296,
         0.0570, -0.0225, -0.0146, -0.0247, -0.4547, -0.0770, -0.2350,  0.8517,
        -0.0237, -0.5023, -1.4862,  0.1552, -0.0640, -0.2453,  0.6808,  0.3548,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0920, -0.9466, -0.5368, -0.3657, -0.1428, -0.7371,  0.1357, -0.2977,
        -0.0128, -0.5337, -0.5224, -0.4237, -0.6359, -0.0426, -0.0211,  0.0312,
         0.0944, -0.0414,  0.1071, -0.0034, -0.2088, -0.0557,  0.0583, -0.3337,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0724, -0.0860, -0.4202, -0.7432, -0.1054,  0.0738,  0.0450, -0.4782,
         0.0238,  0.1090,  0.0147, -0.0175,  0.1006,  0.0284, -0.3268, -0.3536,
        -0.4211,  0.0850,  0.1225, -0.1341, -0.7082, -0.0445, -0.4422, -0.7349,
         0.0600,  0.0677, -0.3366,  0.0657, -0.0334, -0.2627, -0.1249,  0.0020,
        -0.0527,  0.0254,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2430,  0.0387, -0.0594,  0.0859, -0.6734,  0.0087,  0.0786,  0.0213,
         0.0537,  0.1491,  0.0036,  0.0828,  0.0228, -1.4208, -0.0801, -0.0540,
        -1.3312,  0.0742, -0.6799,  0.0660, -0.5375, -1.0119,  0.2094,  0.0648,
        -0.0968,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2446,  0.2891, -0.2215, -0.2856, -0.1683, -0.0244, -0.6237, -0.5287,
        -0.8390, -2.0184,  0.0858, -0.1856, -0.4344, -0.0811, -0.4939, -0.0732,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1431, -1.6644, -0.2104, -0.7087, -0.3739, -0.4788, -0.9515, -0.0773,
         0.1016, -0.0531, -0.0501, -0.1118, -0.3175,  0.0490, -0.0258,  0.0031,
        -0.3431, -0.9592, -0.4177, -0.1018,  0.0065, -0.1018, -0.2923,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6226,  0.4016,  0.2399, -1.2856, -3.0866,  0.3746,  0.3509, -0.0504,
        -0.1954,  0.2881,  0.1454,  0.0968, -0.0608, -0.4759,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([-0.2579, -1.4078, -0.5990, -1.7036, -0.0738, -0.0875, -0.5022, -0.0824,
        -0.0324,  0.0377, -0.8389, -0.8371, -0.1005, -0.1795, -0.0339, -0.0112,
        -0.0323, -0.7763, -0.0781, -0.6010, -0.1356,  0.0512, -0.1302,  0.2524,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0235, -0.0181, -0.4727,  0.0047, -1.3862, -0.2162, -1.1749, -0.0329,
        -0.1142, -0.0129,  0.0397,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0357,  0.2067,  0.1008,  0.1141,  0.0981, -0.6242, -0.9190, -0.0143,
        -0.5857,  0.0223, -0.3238,  0.1853, -0.1863,  0.2050, -0.3842, -0.1144,
        -0.2179, -0.5684,  0.0296,  0.1607, -0.2828,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1236,  0.1144,  0.2140,  0.0922,  0.3727, -0.0042, -0.2472, -2.0403,
        -0.9430, -1.6059, -0.0261, -0.0620,  0.0616, -0.1631,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2759,  0.1373, -0.0127, -0.0338, -0.1260,  0.1045,  0.0252, -0.5672,
        -0.3210, -0.1503,  0.2378,  0.5672,  4.1470,  0.3631,  0.1741, -0.0215,
         0.5344,  0.1974, -0.0229,  0.0905,  0.6849,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1609, -2.4486,  0.0796, -1.1438, -0.1043, -1.4500, -0.1434, -1.7679,
        -0.1946, -0.5922, -0.1801, -0.0043, -0.4933,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6351e-01, -1.0079e+00, -6.6721e-02, -3.9740e-02, -4.9539e-02,
        -1.6297e-02, -4.2710e-01, -5.5683e-01, -8.6332e-02,  7.2401e-02,
        -2.1765e-02,  3.0777e-02, -3.5727e-03, -4.6104e-02, -3.6531e-02,
        -1.7156e-01, -1.9443e-02, -1.9741e-02,  2.5704e-02,  9.9523e-02,
        -4.8850e-02,  9.8946e-02,  2.9466e-02, -4.5309e-02, -1.0954e+00,
         3.6115e-02, -4.0386e-01, -5.0950e-01, -5.3184e-01, -8.4040e-02,
        -7.7090e-04,  1.3487e-02, -2.8105e-02, -3.5915e-02,  4.4810e-02,
        -3.7615e-02, -2.9069e-01,  2.1722e-03, -3.0602e-03, -2.1210e-01,
        -2.1793e-01, -4.8107e-02,  6.2799e-02, -9.1271e-02, -2.3403e-01],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2444, -0.2296, -0.7727, -0.0779, -0.0068,  0.1255, -0.0053, -0.1644,
        -0.0196, -0.0395, -0.5552,  0.0589,  0.0962,  0.0534, -0.3386, -0.0324,
        -0.8878,  0.0159,  0.0235,  0.0294, -0.1563, -0.5474, -0.0380, -0.4738,
         0.0592, -0.0832, -0.1393, -0.1398, -0.8457, -0.0527,  0.0318,  0.0981,
        -0.0881,  0.2025,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3548, -0.0530,  0.2486,  0.1448, -0.0228, -0.0873, -0.0343,  0.2119,
         0.1153,  0.0306, -0.0311,  0.0397, -0.0245,  0.0364,  0.0828,  0.1820,
        -0.0090, -0.1142,  0.1469, -1.9823, -2.1709, -0.0318, -0.2239, -0.2230,
         0.1588, -0.1986,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8158e-02, -2.6716e+00,  9.2960e-02, -5.4623e-01,  1.5689e-01,
         8.3866e-02, -1.5964e-01, -4.7240e-01,  2.3590e-01,  1.7674e-02,
         1.5318e-04,  2.4922e-02,  5.1678e-02, -2.5676e-03, -3.4932e-03,
        -7.6749e-01, -1.3022e+00, -1.5531e-02, -2.1240e-01, -4.7696e-03,
        -1.8554e-01, -1.0184e-01, -1.6460e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2164, -1.5676, -0.2426, -0.4437,  0.0023, -0.0099, -0.1458, -0.3382,
        -0.5502, -0.1188, -0.0405, -0.0392, -0.0099,  0.1056, -1.0185,  0.0675,
        -0.0378, -0.2507, -0.4153, -0.0844, -0.0531, -0.0673,  0.0197,  0.0257,
         0.0122,  0.0143, -0.4552, -0.6884,  0.0408,  0.0164, -0.0297,  0.1495,
        -0.0192,  0.0628, -0.0818,  0.0382, -0.2744, -0.0159,  0.1868, -0.1308,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0765,  0.0037,  0.0137, -0.2364, -0.4380, -0.1206, -0.0460, -0.2872,
         0.0330, -0.0831,  0.0263, -0.0489, -0.2917, -0.0285, -0.2171, -0.4202,
        -0.4585, -0.0895, -0.0850, -0.2146, -0.0540, -0.4090, -0.4109, -0.0267,
        -0.0849, -0.0292, -0.0344, -0.1888,  0.0059, -0.1035, -0.3936, -0.0120,
        -0.1142, -0.0484,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 0.2029, -0.1322, -0.0879, -0.0893, -0.6789,  0.0191,  0.1329, -0.0674,
        -0.0113,  0.0231,  0.0305, -0.1501,  0.2177, -0.0197, -0.7804, -0.0644,
         0.0444, -0.0128, -0.3218, -0.0018,  0.0485,  0.0070, -0.1014,  0.1159,
        -0.2783, -0.4483,  0.0772, -0.0167,  0.0120,  0.0675, -0.0931,  0.0089,
        -0.0075,  0.0763, -0.1832, -0.0065, -0.0266, -0.2175, -0.0119, -0.2971,
        -0.3026, -0.0597, -0.1837, -0.0218,  0.0038,  0.0447,  0.0323, -0.0082,
        -0.0807, -0.0090, -0.2098,  0.0204], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0380, -0.0077, -0.0230, -0.0861, -1.0006,  0.1101, -0.1181,  0.1752,
        -0.0190, -0.1203,  0.0264, -0.0427, -0.0672, -0.9132, -0.1423, -0.7370,
        -1.1673, -0.5159, -0.7312,  0.0347, -0.1887, -0.1411, -0.2914, -0.5585,
         0.0578,  0.0585,  0.0141,  0.0713,  0.0984,  0.0229, -0.0487,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2612, -2.0498,  0.0697, -0.1198,  0.2513,  0.1223, -0.1845,  0.1084,
        -0.0497,  0.3924, -0.0046, -0.4579,  0.3687,  0.0108, -0.5276, -0.6918,
         0.3248, -0.5709, -0.9210, -0.6668, -0.0336,  0.0309, -0.2136,  0.1961,
         0.1439,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0960,  0.0223, -0.0154, -0.2900,  0.0932, -1.7207, -3.3786, -0.5271,
        -0.1962, -0.1732,  0.0680, -0.2362,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9103e-01, -3.0224e+00, -2.2228e-01, -1.6131e-01, -9.3434e-01,
        -2.8118e-01, -2.9243e-02, -5.8275e-02,  1.2024e-02, -8.6271e-02,
         7.1091e-02, -1.3674e-01, -6.3078e-01, -7.5334e-01,  3.6606e-02,
        -2.4892e-01, -1.5397e-01, -1.7263e-01, -2.2015e-03,  6.0582e-03,
        -3.6868e-01,  1.4115e-02, -1.6099e-01,  1.0153e-01, -2.3880e-04,
        -3.7685e-01,  1.3587e-01,  6.0638e-03,  2.9274e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5280,  3.2637,  0.1445, -0.4719,  0.2035,  2.1653,  2.3008, -0.4000,
        -0.1318,  0.0733,  0.0583,  0.4446,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3871, -3.0293, -0.1810, -0.4841,  0.2640, -0.9217, -1.1857,  0.0244,
        -0.6609, -0.2078,  0.0648, -0.1716,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6967,  0.3323, -1.2583,  0.0976, -0.4532, -0.0175, -0.0326,  0.0198,
        -1.1444, -0.1159,  0.0670, -0.5761, -0.0597,  0.0428, -0.0114, -0.7583,
         0.0418,  0.2550, -0.1751, -0.2404,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2020e-01, -2.1589e+00, -1.0886e-01, -7.6985e-01, -9.3602e-01,
        -1.1294e-01, -2.8732e-01, -1.3798e-01, -2.9454e-01, -2.1534e-02,
        -5.7643e-02, -2.7189e-01, -3.1335e-01, -1.4684e-01, -5.6362e-02,
        -6.9433e-02, -2.6733e-01,  1.2104e-01, -7.4288e-02,  2.5090e-02,
        -3.5766e-01,  1.0220e-02,  5.9041e-04, -3.4841e-02,  6.8945e-02,
         3.0390e-02,  6.5175e-02, -1.6691e-01, -4.1547e-01,  1.1210e-02,
         3.6922e-02,  1.6901e-02, -1.1476e-01, -3.8329e-01, -5.2234e-02,
        -1.3420e-01,  2.5293e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6089, -6.6515, -0.6195,  0.1071, -0.2593, -0.0358,  0.1664, -0.5341,
        -0.0250,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0432,  0.0193,  0.0740, -0.9095,  0.0644, -0.0959, -0.4560, -0.5446,
         0.1737,  0.0587, -0.1838, -0.2949, -1.0055, -1.0211,  0.2550, -0.1109,
         0.0547,  0.1167, -0.7509, -0.9154,  0.2322,  0.1001,  0.0627,  0.1954,
        -0.0126, -0.3390, -0.1359,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0330, -2.5541,  0.1694, -0.5687, -0.1041, -0.3079, -0.0339,  0.0459,
        -0.0036, -0.1182,  0.2295, -0.3047, -0.1306,  0.0264,  0.0292,  0.1275,
        -0.7795,  0.0318,  0.1059,  0.1145, -0.0077, -0.7937, -1.1388, -0.1511,
        -0.0823, -0.2805, -0.0544, -0.2695, -0.6917, -0.0035, -0.0364,  0.0096,
         0.3586, -0.0042,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.0363, -0.0034, -0.0123,  0.0470, -0.4544, -0.1547,  0.0189, -0.0391,
         0.0377,  0.0507,  0.0052, -0.6529,  0.1599,  0.0080,  0.0657, -0.2546,
        -0.5701, -0.0231, -0.1925,  0.0810,  0.1477,  0.0843, -0.1291,  0.0163,
        -0.0830, -0.0136,  0.0275,  0.0958, -0.0284,  0.0168,  0.1157, -0.4098,
        -0.9327,  0.1175, -0.1433, -0.2283, -0.4654, -0.0595,  0.0167, -0.0242,
        -0.1271, -0.1117, -0.0562,  0.2468,  0.0378,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0860, -0.0500, -0.0391, -0.0379, -0.2583, -0.7461, -1.3135, -0.0125,
         0.0214, -0.7993,  0.0450, -0.0950, -0.2367,  0.1786, -0.1247, -0.1978,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1165,  0.0523,  0.0324, -0.0254,  0.0419, -0.2641, -0.1179, -0.1006,
        -1.1623, -1.6332, -0.4608, -2.2569,  0.3516, -0.5708, -1.0705,  0.0419,
        -0.0662,  0.0439,  0.0975, -0.0199, -0.0779,  0.0919,  0.0631,  0.1084,
         0.1382,  0.1341,  0.5459,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0783e-01,  9.1227e-02, -4.3087e-01, -4.5603e-02, -2.3179e-01,
        -5.7176e-02, -1.5945e-01, -6.1774e-02, -1.9181e-01,  5.7603e-03,
         2.2687e-04, -3.1503e-02, -2.0115e-01,  6.4330e-03, -7.7862e-02,
         8.0791e-03, -2.0229e-01,  3.0674e-02, -1.0371e-01, -3.3638e-01,
         1.3384e-02, -2.3877e-01, -9.4971e-02, -4.7218e-01, -6.2303e-02,
        -1.9006e-03, -1.7672e-01, -4.7170e-01,  4.8755e-02, -2.7512e-01,
        -2.4547e-02, -2.0921e-01, -3.2245e-01, -6.4898e-02, -7.9984e-02,
        -3.3279e-01, -2.1373e-01, -5.3646e-01,  1.0755e-01,  7.8415e-02,
        -1.9944e-01, -3.2556e-01, -1.1175e-02,  1.2598e-01, -2.3036e-01,
        -2.7338e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0464,  0.1136, -0.0391, -0.0287, -0.1359, -0.0923,  0.0127, -1.0351,
         0.0210, -0.1103, -0.1497, -0.1843,  0.0605, -0.0948, -0.1386,  0.0087,
         0.0476,  0.0364, -0.8672, -1.5565,  0.0551, -0.0138,  0.0202, -0.0923,
         0.0130,  0.0905, -0.5221,  0.0212, -0.0771, -0.0751,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3908, -0.1744,  0.3650,  0.1450, -0.0698, -0.1156, -0.0343,  2.5019,
        -0.2083, -0.1641,  1.0979,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3928, -1.6014,  0.0306, -0.0581, -0.4933, -0.0035, -0.3835, -0.5966,
        -0.0727,  0.1031, -0.0133,  0.0454,  0.0528, -0.0535,  0.0904, -0.3846,
        -0.0646, -0.1312, -0.1002, -0.0364,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2618, -2.3773, -1.6311, -0.7116, -0.1758, -0.4214, -0.7700,  0.0754,
         0.1515, -0.0934,  0.1218,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3164,  3.2728,  1.7658,  2.0821,  0.3159,  0.3509,  0.3377,  0.8006,
         1.4274, -0.1532,  0.1260,  0.2088,  0.1661,  0.2912, -0.1536,  0.3991,
        -0.1807,  0.0630, -0.0958, -0.1349, -0.2407,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5818, -2.0438, -1.2192,  0.0947, -0.1491, -1.5677, -0.0217, -0.2503,
        -0.1368, -0.0793, -0.0376, -0.8559, -0.0417, -0.0530, -0.0590, -0.0061,
        -0.2448, -0.4310,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1587, -0.9106, -1.1054,  0.0127, -0.0995,  0.0247, -0.0172,  0.0105,
        -0.0244, -0.0913, -0.0084,  0.0832,  0.0357, -0.2861, -0.3117, -0.0082,
        -0.1073, -0.4043, -0.2395,  0.0737, -0.1466, -0.3344, -0.5594, -0.0454,
        -0.2964, -0.0363, -0.2726, -0.1091, -0.2107, -0.3258,  0.0209,  0.0197,
         0.0767,  0.0466, -0.0594,  0.0089,  0.0295, -0.0555, -0.0174,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.6252, -5.3450, -0.2337, -0.7834, -0.0457, -0.0770, -0.1091, -0.0298,
        -0.0489, -0.2079, -0.1904, -0.7462, -0.1250,  0.0346,  0.0231, -0.0169,
        -0.5143, -0.0450, -0.0878, -0.0703, -0.1095,  0.0775,  0.0241,  0.0549,
         0.0153,  0.0093, -0.1627, -0.1144, -0.4322, -0.7588, -0.0206, -0.0654,
        -0.0849,  0.0121,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 0.2565, -0.3179, -2.0547, -2.0193,  0.3187, -1.1540, -0.4174, -0.5375,
        -0.0330,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0632, -1.6914, -0.0660, -0.8084, -0.1859, -0.7066, -0.0053, -0.1787,
        -0.3257, -0.0142, -0.2541, -0.1682, -0.0017, -0.0701, -0.0932, -0.4620,
         0.0085, -0.8600, -0.0673,  0.0133, -0.0442,  0.0274,  0.0932, -0.0453,
        -0.2951, -0.0022, -0.0321, -0.0757,  0.0363, -0.0043, -0.0704, -0.3314,
         0.0286, -0.3345,  0.0739, -0.0220, -0.0548,  0.1579,  0.0454,  0.0513,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9930e-01, -7.6161e-01, -1.2083e+00,  2.1206e-02, -3.4145e-01,
        -4.9497e-02, -1.8207e-04, -4.6903e-02, -2.7563e-01, -3.9389e-01,
         2.4901e-02, -1.9069e-02,  6.8809e-03, -1.6255e-02, -2.1218e-02,
        -2.7145e-02,  4.9740e-02,  8.9347e-02, -3.1539e-01,  7.5121e-02,
        -2.2628e-01, -2.7666e-02,  3.2884e-02, -2.7312e-02,  5.2832e-02,
        -9.2188e-02, -2.4795e-01, -4.2706e-02, -3.0644e-01, -4.4878e-01,
        -2.1995e-02, -2.7408e-02, -2.1827e-02,  1.4132e-02,  3.6563e-02,
         7.2415e-03, -2.3591e-01, -3.0209e-01, -4.7782e-03, -1.7876e-02,
         9.9673e-03, -5.6483e-02,  1.6759e-02,  2.9222e-02,  1.4558e-02,
         1.8027e-02,  4.0805e-03, -1.4438e-02, -5.7014e-03,  1.0592e-01,
         2.2050e-02,  1.6829e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.4334, -2.1150, -0.0870, -0.5281, -0.1809, -0.0520, -0.7097, -0.2471,
        -0.2800,  0.1020,  0.0347, -0.0682, -0.5510, -0.1013, -0.2179, -0.5956,
        -0.4122, -0.2043, -0.4438,  0.0172,  0.0510, -0.2353,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6781, -3.3274,  0.1984, -0.2982,  0.0460, -0.2636,  0.0110, -0.3821,
         0.1198, -0.3272,  0.0291, -0.3639, -0.1326, -0.4101, -0.0099, -0.8103,
        -0.0662, -0.2601,  0.1964, -0.2150, -0.1169,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6050e-02, -1.6118e+00, -3.2948e-01, -8.6500e-02, -2.5316e-02,
        -5.6057e-02, -3.0966e-01,  4.0361e-02, -1.5408e-01,  2.9438e-02,
         3.0311e-02, -5.0738e-02,  2.3163e-02, -9.2426e-03, -2.5817e-03,
         6.6021e-03,  7.5592e-02,  1.0992e-01, -3.0811e-01,  3.5767e-02,
         4.8807e-02,  5.6827e-02, -3.9163e-02, -1.1177e-03,  1.9687e-02,
        -2.1215e-02, -8.9325e-02, -2.7055e-01,  4.3435e-02, -2.5882e-01,
        -3.8219e-01, -3.4858e-01, -1.8271e-02, -2.7954e-01, -1.9306e-01,
        -5.8421e-02, -4.1036e-02, -9.8513e-02, -3.0542e-03, -5.1236e-02,
        -4.2694e-01, -4.8346e-02,  4.7733e-04,  6.1068e-02, -2.3067e-01,
         2.0911e-02, -2.3351e-01, -2.0879e-01, -3.5181e-02, -2.0375e-02,
        -1.9102e-01, -4.8497e-02, -1.0979e-01, -5.1598e-02,  9.1855e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3622e-02,  4.8208e-02,  1.4855e-01, -8.2788e-02,  9.4826e-03,
        -1.7543e-01, -5.4942e-01,  3.4425e-02, -2.7823e-02,  1.9044e-03,
        -2.7983e-02,  4.4865e-02,  6.5917e-05, -1.0990e-01, -2.8571e-01,
        -1.6734e-02, -2.9557e-02, -2.2993e-01, -5.8167e-02, -4.4002e-01,
        -8.7711e-02, -1.1878e-01, -2.7792e-01,  5.7036e-02,  3.9784e-02,
         4.1038e-02, -2.7370e-02, -2.8607e-01,  7.1341e-02,  3.7322e-02,
         4.8833e-02,  8.4652e-02, -9.7416e-02, -4.1945e-01, -2.3375e-03,
        -1.1877e-01, -2.2828e-02, -4.3327e-01, -3.1260e-01, -2.5577e-01,
        -3.2932e-02, -2.8329e-02,  4.5436e-02, -1.0507e-01, -1.4830e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0564, -1.6575, -0.8214, -0.7538, -0.0539, -0.1193, -0.1632,  0.1275,
        -0.4285,  0.1274,  0.1577, -0.3066,  0.0735, -0.4913, -0.4064,  0.0761,
        -0.0414, -0.4369, -0.0654, -0.3205, -0.4398,  0.0472, -0.0395,  0.0082,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1047,  0.0436,  0.0400, -0.0154, -0.1393, -0.0699, -0.2163, -0.2697,
         0.0257, -0.0792,  0.0211, -0.0045, -0.1582, -0.1405,  0.0215,  0.1613,
        -0.2829, -0.2756, -0.2055, -0.4101, -0.0600, -0.0895, -0.3440, -0.3817,
        -0.1093, -0.1799, -0.2538, -0.0653, -0.1054, -0.4446, -0.0734, -0.1784,
         0.0606,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2524,  4.1128,  0.0437,  0.8919, -0.0921,  0.0612, -0.1698,  0.1037,
         1.0351,  0.0593, -0.1533, -0.0250, -0.0961,  0.7535,  0.1515,  0.5377,
         0.3332,  0.3101,  0.2048,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0148, -0.0065, -0.0372, -0.0913, -0.6030,  0.1021, -0.5253, -0.4908,
        -1.1116, -0.0575, -0.0136,  0.0535, -0.0446,  0.1427,  0.0297,  0.0081,
        -0.1481, -0.3884, -0.2650, -0.0520, -0.2673, -0.6466, -0.0386,  0.0967,
         0.1588,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1029, -2.0895, -0.9857,  0.0243, -0.3023, -0.7062, -2.3087,  0.2393,
        -0.0663, -0.3122, -0.1239,  0.0679, -0.5378,  0.1551, -0.0125, -0.0653,
         0.0380,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 0.5519,  0.0733, -0.1607,  0.0333,  0.0550, -0.1033,  0.0529,  0.0655,
         0.0419, -0.7709, -0.1097,  0.0953,  0.1806,  0.0416,  0.0121, -0.2409,
        -1.0508, -1.2642,  0.1038, -0.7180,  0.0275, -1.0008, -1.1464,  0.0957,
        -0.0831,  0.0533,  0.0045, -0.0065,  0.0678, -0.2491, -0.3404,  0.1452,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8987e-01, -5.6099e-01, -7.1920e-01,  6.3051e-02,  5.3427e-02,
        -1.8778e-02, -3.5188e-01, -6.7530e-01, -2.8433e-02,  7.6684e-02,
         4.0959e-03,  1.2570e-03, -4.3312e-02,  5.3122e-04, -1.1223e-01,
         8.8025e-02, -8.8113e-02,  5.0893e-02, -3.3862e-01, -1.2655e-01,
        -3.3943e-01, -6.3820e-03, -4.9061e-01, -5.7575e-01, -1.3475e-01,
        -1.5666e-01, -1.7462e-01,  9.9594e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0452, -0.2212,  0.0034,  0.1292, -0.2135, -1.0707, -1.3421, -0.3415,
         0.0213, -0.1762, -0.1005, -0.6196, -0.0611,  0.0055, -0.0525, -0.5355,
        -0.1081, -0.6434,  0.0077,  0.0979, -0.0687,  0.0190,  0.0026, -0.0168,
        -0.3462,  0.0381,  0.1251,  0.0598, -0.1504,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2146, -1.2160, -0.0259, -0.4834, -1.1917, -0.0687, -0.0772,  0.0280,
        -0.0290,  0.0992, -0.0323, -0.0857, -0.5235, -0.6478,  0.1717, -0.1900,
        -0.0966, -0.7287, -0.0204, -0.2249,  0.0883,  0.0317, -0.2971,  0.0415,
        -0.0388, -0.4273, -0.2669,  0.0026,  0.0056, -0.0132, -0.0468,  0.0244,
        -0.0212,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0719, -1.5093, -0.0972, -0.7685, -0.0813, -0.3061,  0.0534, -0.3901,
        -0.0718, -0.5163, -0.4728,  0.0426,  0.0535,  0.0317, -0.0772,  0.0422,
        -0.3008,  0.0229, -0.6967,  0.0678, -0.3650, -0.0682,  0.2939,  0.0932,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5919, -0.9355, -1.0874, -0.0408, -0.3245,  0.0353, -0.1006, -0.3569,
         0.1118, -0.3847, -0.0590, -0.4983, -0.7897, -0.0111, -0.0561, -0.0474,
         0.0162, -0.3255, -0.0414, -1.0043, -0.4385,  0.4958,  0.2119,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4921e-01, -1.4064e+00, -7.6844e-02, -2.5306e-02, -3.1948e-01,
        -3.5811e-01, -1.5993e-02, -3.2369e-02,  4.0081e-02,  3.4867e-02,
         2.4245e-02,  7.5707e-03, -1.1607e-01,  2.9845e-03,  5.2228e-03,
        -1.3226e-01, -5.4832e-01, -4.2040e-01, -2.7366e-01,  1.5605e-01,
         3.1807e-04, -3.6678e-01, -5.0944e-02, -6.8269e-02, -3.9210e-01,
        -1.2099e-01, -3.8957e-02, -1.5504e-02, -7.5970e-02, -4.1642e-01,
        -3.4190e-01, -1.1196e-02, -1.2117e-01, -1.1716e-01, -3.1620e-01,
        -4.5396e-01, -2.0647e-02, -1.0513e-01, -8.3190e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1509, -0.3070, -0.0286, -0.1452, -0.6839, -0.0637,  0.0496,  0.0459,
        -0.0574,  0.1564,  0.0376,  0.0095,  0.0160,  0.0306,  0.0339, -0.3730,
        -0.0422, -0.0656,  0.0218, -0.0378, -0.4349,  0.0044, -0.0250, -0.1349,
        -0.6679, -0.4729, -0.1065, -0.3153, -0.0796, -0.5113, -0.1020, -0.0105,
        -0.0505, -0.0286, -0.0441,  0.0287, -0.1696, -0.4420,  0.0524,  0.0146,
        -0.0279, -0.1225,  0.0317,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5368e-01, -1.7546e+00, -1.2365e-01,  1.7961e-02, -1.8826e-01,
         9.2074e-03,  8.0747e-04,  8.2580e-03, -5.1418e-01,  1.5907e-01,
         3.5103e-02, -1.2249e-02, -2.0569e-01, -2.0610e-02,  4.1481e-03,
         2.7542e-02, -2.4446e-01, -3.7618e-01,  2.1924e-02, -3.1718e-02,
         1.0737e-01,  5.1500e-03,  3.4388e-02,  1.3755e-02,  4.9282e-02,
         4.5956e-03,  1.1778e-01, -4.5643e-03,  1.0672e-02,  3.0361e-02,
        -5.2972e-02, -3.9510e-01, -5.5303e-01, -3.4962e-02, -3.8987e-02,
        -5.6158e-02,  2.6217e-02, -3.7016e-01, -3.1025e-01, -5.3335e-02,
        -3.9339e-01, -2.9936e-01, -3.4719e-02, -7.8649e-03,  2.7111e-02,
         8.4447e-03, -1.4229e-02,  9.9225e-03,  1.1276e-02, -9.8590e-03,
         3.1949e-02,  9.2609e-03, -1.1258e-02,  2.6282e-02, -1.6742e-02,
        -5.8773e-02, -9.8903e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2612, -5.4844,  0.2398, -0.1896, -0.6971,  0.0416, -1.1136, -0.9919,
         0.0266, -0.2556, -0.7437, -0.0997, -0.7272,  0.7608,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0204, -1.0759, -0.1957,  0.0644, -0.0529, -0.3480,  0.0015,  0.0053,
        -0.2265,  0.0442, -0.2807, -0.4280, -0.0300, -0.2028, -0.3595, -0.0165,
        -0.1510, -0.1984,  0.1008, -0.3713, -0.7497, -0.1477, -0.2554,  0.0341,
         0.0558,  0.0208, -0.0231,  0.0352, -0.1367, -0.0187,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3210, -1.0735,  0.6312, -2.0593, -1.5730, -0.3153,  0.0472,  0.0992,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 0.1418, -1.2735, -0.1393, -0.2749, -0.5548, -1.1196,  0.0113,  0.0166,
         0.0120, -0.0147, -0.0296, -0.0362, -0.6526, -0.6547, -0.0731, -0.1182,
        -0.0598,  0.0488, -0.0750, -0.0084,  0.0448,  0.0670,  0.0249,  0.0258,
        -0.2910,  0.0600, -0.2983, -0.0142, -0.3847, -0.7544, -0.1118,  0.0760,
         0.1686,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0093, -1.5638, -0.6150, -0.2406, -0.1183, -0.2986, -0.1338,  0.0248,
         0.1260, -0.2163, -0.0615, -0.1492, -0.1176,  0.0396,  0.0689,  0.0530,
         0.0028,  0.0448, -0.1852,  0.0352,  0.0097, -0.0024,  0.0219,  0.0240,
         0.0082,  0.0971,  0.0367, -0.5146,  0.0476,  0.0060, -0.0218,  0.0858,
        -0.1586, -0.1496, -0.0481, -0.0892, -0.2072, -0.0121, -0.0119, -0.0095,
        -0.0112,  0.0302,  0.0067, -0.4852, -0.0604,  0.0346, -0.0147,  0.0040,
        -0.0233, -0.0470, -0.0695, -0.1215, -0.0562, -0.1098, -0.0275,  0.0401,
        -0.0067, -0.0876, -0.0262, -0.3164, -0.0552, -0.1029, -0.3195,  0.0131,
         0.0410, -0.0101, -0.0837,  0.0554,  0.0094, -0.0551, -0.0835],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2223, -0.0676, -0.2695,  0.0051, -0.3267, -0.1070, -0.7193, -0.0271,
         0.0422,  0.0755,  0.0572, -0.0208, -0.0283,  0.0307,  0.0399, -0.0063,
         0.0075,  0.0245,  0.0707, -0.6022, -0.0796,  0.0420,  0.0266, -0.3281,
         0.0025, -0.4174, -0.0402,  0.0363, -0.0294,  0.0039, -0.0048, -0.0291,
        -0.5443, -0.4147, -0.1812, -0.0522, -0.3786, -0.3491,  0.0253,  0.0106,
         0.1520,  0.0551,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4596e-01, -2.5678e+00,  3.8187e-02,  4.9788e-03, -4.6617e-02,
        -4.3892e-02,  7.6921e-02,  1.8803e-01, -1.3613e-01, -3.2943e-01,
        -1.3931e-01, -4.1694e-01, -5.8687e-01, -2.3586e-02,  5.0121e-03,
         5.5916e-03, -2.9756e-02, -5.5176e-01, -4.4131e-01,  2.5646e-02,
        -1.4437e-01, -1.9190e-01, -1.9765e-02, -4.3100e-02, -6.7514e-02,
        -3.4308e-01,  5.6789e-04,  5.0322e-02, -5.5960e-01, -3.2308e-02,
         4.0730e-02, -5.6903e-02,  5.1756e-02,  9.2723e-02,  3.2314e-02,
         7.8156e-02,  2.9617e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2706, -3.1019, -0.4640, -1.6883,  0.0809,  0.0373,  0.0316,  0.0859,
         0.3691,  0.0633, -0.7106,  0.3328, -0.6057, -0.0963, -0.0614,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0621, -0.2829, -0.0926, -0.0202, -0.7980, -1.1364, -0.0443, -0.9239,
        -1.2707, -0.1647, -0.1576,  0.0459, -0.1933,  0.0395,  0.2911,  0.0049,
         0.0488,  0.0594, -0.0180,  0.4194,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0381, -0.0330, -0.0738,  0.0179, -0.4820,  0.0030, -0.0040, -0.0798,
         0.0666,  0.0184, -0.2805, -0.3128,  0.0166, -0.0197, -0.1503, -0.1489,
         0.0028, -0.1113,  0.0468, -0.1377,  0.0147,  0.0059,  0.0137, -0.0091,
         0.0334, -0.3778, -0.6242,  0.0881,  0.0069, -0.2715, -0.4231, -0.0274,
         0.0602,  0.0407, -0.2123, -0.3944,  0.0063, -0.1333,  0.0228, -0.0085,
         0.0214,  0.0518, -0.0028,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3579, -0.0087, -0.0424, -0.1106, -0.3830, -0.1174, -0.3103, -0.6417,
        -0.3540, -0.1018,  0.0791, -0.1154, -0.0912, -0.4252, -0.7420, -0.0306,
         0.0923,  0.0548,  0.0136, -0.0301, -0.5709, -0.0699,  0.0362,  0.0576,
        -0.0222, -0.0848, -0.1695, -0.0564, -0.5280, -0.0138,  0.0749, -0.3874,
        -0.0563, -0.2764, -0.4054, -0.1007, -0.0094, -0.1825,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0389, -1.9224, -0.9689, -0.6984, -0.0117, -0.3293,  0.0304,  0.1324,
        -0.3395, -0.6454, -0.0845, -0.1064, -0.4771,  0.0159,  0.0399, -0.0176,
         0.0447, -0.3849, -0.1482,  0.1709,  0.3892,  0.2032,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1740, -1.3302, -0.1715, -0.0243,  0.0934, -0.1066, -0.0923,  0.1772,
         0.1685, -0.8024,  0.0656,  0.0352, -0.0196, -0.0519, -0.3395, -0.4463,
        -0.6158, -0.0688, -0.0762, -0.4980, -0.0787,  0.0149, -0.3163, -0.0698,
        -0.1269,  0.0769,  0.0260,  0.0683, -0.0754, -0.4661,  0.0052, -0.2272,
         0.0280,  0.1068, -0.2759, -0.5151,  0.0629, -0.0834,  0.3034,  0.2532,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4040, -0.0060, -0.0220, -0.0897, -0.0090, -0.2408, -0.2445,  0.0784,
        -0.3632, -0.0212,  0.0200, -0.0612, -0.1928, -1.0260, -0.5370, -0.0522,
         0.0751, -0.0252, -0.4297,  0.0311,  0.0100,  0.0531,  0.0750,  0.0224,
        -0.0444, -0.2850,  0.1120, -0.3545, -0.3417, -0.0316, -0.1037, -0.0484,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4678e-02, -3.5445e+00, -1.7849e-01, -8.2337e-04, -6.0988e-02,
         5.1916e-02, -1.4427e-01, -8.8735e-01, -1.1770e+00,  2.2944e-02,
        -4.3616e-02, -2.9286e-01,  9.9148e-02,  3.1623e-02,  1.9719e-01,
         8.1930e-02, -5.2807e-01,  2.4000e-02,  7.5738e-02,  1.1014e-01,
        -3.8521e-02, -2.8807e-02, -3.5290e-02, -5.9051e-01, -3.0637e-02,
        -4.6334e-02, -3.8918e-01, -3.1249e-02, -2.8290e-02, -1.3222e-01,
         8.7673e-02,  1.4057e-01, -4.6939e-02,  2.3923e-02, -3.1055e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.0292, -0.0443, -0.7127, -0.7699, -0.1533,  0.0050, -0.8515, -0.8015,
        -0.0522, -1.1448,  0.0256, -0.2303, -0.0318, -0.3663, -0.0630, -0.0462,
        -0.1938, -0.1409,  0.0249, -0.0651,  0.0183,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8019e-02, -2.8888e+00, -2.0848e-03, -5.4323e-01, -9.6320e-01,
        -5.2662e-01, -3.1523e-01, -5.0424e-01, -7.4787e-01,  2.0509e-01,
         1.7932e-02, -1.3953e-02,  9.9078e-02,  3.5568e-02, -6.1708e-01,
        -6.2087e-02, -1.8505e-01, -2.2894e-03, -1.3168e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1538,  0.0160, -0.9132, -0.0271, -0.4112, -0.1019,  0.0277,  0.1566,
        -0.0298, -0.3471, -0.9360,  0.0183, -0.9640, -1.5460, -0.0173, -0.4189,
        -0.2157, -0.2917,  0.2936, -0.1199,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5134e-02, -3.1065e+00,  7.0545e-02, -5.1939e-01, -9.7775e-02,
        -2.6635e-03,  2.1899e-01, -4.4468e-01, -8.1764e-02,  2.6442e-02,
        -6.1466e-02, -2.0710e-01, -1.0113e-02, -1.0531e+00, -1.8938e-01,
        -8.6831e-01, -7.6586e-01, -2.2607e-02, -3.5064e-01, -4.7102e-01,
         5.5938e-03, -7.6682e-02, -6.6865e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0840, -1.3558, -0.2499,  0.0019, -0.1452, -0.4517, -0.0982, -0.2897,
        -0.7252, -0.5983, -0.0347, -0.4161,  0.1452, -0.3864, -0.0355,  0.0324,
        -0.0029, -0.0548, -0.1230, -0.0734, -0.2507, -0.0558,  0.1070,  0.0066,
         0.0900,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.7143, -3.4967, -0.0691, -0.1621,  0.1204, -0.0819, -0.0220,  0.1163,
         0.1295, -0.1086, -0.5803,  0.0368,  0.0422, -0.0335,  0.0769, -0.0147,
         0.0271,  0.0098,  0.0386, -0.1441, -0.9924, -0.9707, -0.2485, -0.2364,
        -0.1959, -0.2287,  0.0201,  0.0171, -0.1176, -0.1230, -0.3204,  0.1390,
        -0.2657,  0.0079, -0.2710,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3977, -0.0699,  0.1534, -1.0905, -0.0973, -0.1440, -0.7226, -0.1802,
        -0.6860, -0.0575, -0.0419, -0.2000, -0.0981, -0.5755, -0.4506, -0.8350,
        -0.0413, -0.6370,  0.2969,  0.0765, -0.1921, -0.0253,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0947, -0.0702, -0.3510,  0.1373, -0.2512, -0.3507, -0.1398, -0.0255,
        -0.0427, -2.4900,  0.1189,  0.1991,  0.3132,  0.1913, -0.4763, -0.6836,
        -0.2703, -0.0536, -0.1873, -0.1960, -0.2037,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8525e-01, -2.8865e+00, -6.3826e-02,  6.0926e-02,  1.2385e-03,
         9.0792e-02, -6.1364e-01, -5.4744e-01,  1.0236e-01, -1.2413e-02,
         1.7823e-01,  2.5731e-01, -2.1574e-02, -1.0837e+00,  4.7264e-01,
        -4.4255e-01,  4.1901e-02, -3.3843e-01,  1.4754e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1196e-02, -3.0073e-02, -1.5163e-01,  2.8613e-02, -5.7479e-02,
        -4.5514e-01,  3.2592e-02,  8.8729e-03,  2.6255e-03, -9.9649e-03,
        -1.4024e-02, -5.7044e-02,  3.2145e-03, -2.2273e-01,  8.4449e-03,
        -9.9076e-02, -3.3156e-01, -3.6767e-01,  3.9795e-02, -1.7238e-01,
        -5.2238e-02, -1.3309e-01, -1.6173e-01,  5.6133e-04, -3.7941e-01,
        -3.7796e-02, -2.5659e-02, -9.9077e-02,  5.1467e-02, -6.9645e-01,
        -7.1060e-02,  2.2791e-02, -3.0451e-01, -7.2305e-01,  3.5472e-03,
        -2.8707e-02,  7.6314e-04,  1.3041e-02, -7.0854e-02, -2.1843e-01,
        -1.7970e-02,  3.2017e-02, -7.2175e-03,  6.4006e-03, -1.5145e-02,
        -8.2720e-02, -2.6250e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0503, -1.7231, -0.8585, -0.0863,  0.0098, -0.0058,  0.0047,  0.0030,
        -0.2848,  0.0690,  0.0543,  0.0417,  0.0090,  0.0688,  0.0648, -0.0032,
        -0.5208, -0.0161,  0.0329, -0.0184,  0.0078,  0.0685, -0.3853, -0.7839,
         0.0650,  0.0946, -0.0733,  0.0085,  0.0146,  0.0592,  0.0479,  0.1008,
        -0.1698,  0.0930,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2440e-01, -2.5692e+00,  1.8997e-01,  6.2032e-03, -3.5268e-01,
        -5.2073e-01,  1.3095e-01,  1.0026e-01,  5.3354e-02, -3.4915e-01,
         1.1291e-01,  7.7006e-02, -5.5940e-01, -9.2332e-01,  7.1108e-02,
        -1.2074e-01,  7.4517e-02,  1.8536e-03, -3.3138e-02, -9.3305e-01,
        -1.7015e-01, -1.7231e-01, -5.5405e-02, -1.0881e-01, -4.0749e-02,
         1.1150e-01,  1.6506e-02, -1.4452e-01,  4.8559e-02, -3.9660e-01,
         1.1130e-01,  2.0661e-01, -4.8891e-01, -1.6513e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 0.1878, -4.7979, -0.1685,  0.0120, -0.2492, -0.8731,  0.0546, -0.0586,
        -0.1527, -0.9787,  0.0557, -0.0605, -0.4499, -1.1943, -0.1009, -0.1961,
         0.0061, -0.2184,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5699, -0.0167, -0.0783, -0.0217, -0.0550,  0.0941, -0.0484, -0.0814,
        -1.3531, -2.3484,  0.2002, -0.0942, -0.3058, -0.1187, -0.0117, -0.0037,
        -0.2394,  0.3695, -0.1209, -1.4239, -0.1587, -0.0051, -0.2132,  0.1100,
         0.0562, -0.0180,  0.5245,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5515e-01,  7.4839e-02,  7.9602e-02, -7.9970e-03,  3.2023e-01,
        -4.3623e-03,  1.4017e-02, -1.6703e-01, -8.2975e-01, -8.7335e-02,
        -1.2867e-02, -3.0116e-02, -1.1936e+00, -1.2201e-01,  1.5111e-02,
        -5.7934e-03, -1.0686e-01, -1.3386e-01, -1.0599e-01, -1.0678e+00,
         3.3714e-02, -9.2203e-02, -2.7531e-01, -8.7538e-04, -1.8717e-01,
        -2.2877e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0255, -2.7671, -0.0470, -0.8319, -0.8435,  0.0671,  0.0254,  0.1518,
        -0.5970, -0.3521, -1.0232, -0.7291,  0.1053,  0.0350,  0.0552, -0.4801,
        -0.1135, -0.2242, -0.1413, -0.0226,  0.3097,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.7293,  2.6413,  1.7566,  1.8108,  0.6309,  0.4200,  1.5293,  0.1100,
         0.1156, -0.0058,  0.0205,  0.6708,  0.2623,  0.0938, -0.1339,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3732, -0.0794, -0.1076, -0.1213,  0.2562, -0.0841, -0.0466, -0.1483,
         0.0191,  0.0971, -0.2292, -0.2332,  3.4423,  1.4224,  0.0728, -0.0513,
        -0.0385, -0.3560,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3912, -2.7301, -0.8711, -0.8167, -0.2048, -0.2037, -0.2886, -0.9020,
        -0.1026, -0.7254, -1.0258, -0.4891,  0.3668, -0.5548, -0.0945,  0.1490,
        -0.0446, -0.0290,  0.0479,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0204, -0.8098, -1.1031, -0.0188,  0.0013, -0.1441,  0.1320, -0.0078,
        -0.1729,  0.0079,  0.0329,  0.0054, -0.1741,  0.0358,  0.0371,  0.1007,
        -0.2409,  0.0519,  0.0705, -0.0401, -0.4455,  0.0509, -0.2826, -0.0310,
        -0.3687, -0.0413, -0.2395,  0.0994,  0.0389, -0.1675,  0.0292,  0.0642,
        -0.2571, -0.0482, -0.2813, -0.1808, -0.4145,  0.0838, -0.1792, -0.0044,
         0.0229, -0.0011,  0.0554,  0.1015], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1650, -2.9109, -0.2415, -0.8733, -0.5098, -0.1216, -0.1656, -0.0974,
        -0.5873,  0.0070, -0.3332,  0.0979,  0.1378,  0.0255,  0.0932, -0.0092,
        -0.0524, -0.7645, -0.0357, -0.2279, -0.3417,  0.0944,  0.0469,  0.0505,
         0.1268,  0.0387, -0.0535, -0.3367, -0.0059, -0.0494,  0.0621, -0.1575,
         0.1503, -0.1076, -0.0513,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3891, -4.6181, -0.0338, -0.5628, -0.0244,  0.2811, -1.3395, -0.7207,
        -0.3298,  0.1009,  0.1333, -0.2899, -0.7322, -0.0213,  0.1140, -0.4406,
         0.0276, -0.0646,  0.0795,  0.0200,  0.0146, -0.2205,  0.2597,  0.1411,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2202, -3.0279, -0.2073, -0.5463, -0.2780, -0.3980,  0.1211, -0.1192,
        -0.8870, -0.0145, -0.3288,  0.1069,  0.2766, -0.2529, -0.5300,  0.0680,
         0.2953, -0.0235, -0.6239, -0.2861, -0.0197, -0.5821,  0.0406, -0.2520,
         0.1146, -0.4244, -0.7566, -0.3463,  0.0377, -0.1946, -0.3005,  0.0729,
        -0.5262,  0.1381,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1480,  0.1410,  0.0824, -0.0145,  0.0920,  0.1061, -0.4787,  0.0088,
         0.1928,  0.0018,  0.0860, -0.0540, -0.6813,  0.0482, -0.9879, -0.8647,
        -0.0673, -0.0822, -0.0693,  0.0228, -0.0184,  0.0614,  0.0354, -0.3245,
        -0.2201, -0.0691, -0.5243, -0.0267, -0.0135, -0.1223,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.6014e-01,  1.3076e-02, -2.9679e-01, -1.3198e+00, -5.7836e-02,
        -1.1922e-03, -1.0657e-02, -5.6407e-02, -8.0138e-01, -1.7135e+00,
        -3.0843e-02,  7.9070e-02, -9.3523e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2397,  0.0064,  0.4364, -0.3108,  0.1032, -0.2421, -0.2473, -1.0103,
        -1.2764, -0.0453, -0.1244, -0.5523,  0.0497, -0.2857, -0.6265,  0.1624,
         0.3689,  0.0988,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1732, -1.1006, -1.7323, -0.0691, -1.4127,  0.4229, -0.2482, -0.2201,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6180e-04, -1.3809e+00, -9.2921e-02, -3.1511e-01, -2.4326e-01,
        -2.1048e-02, -6.3698e-02, -1.1703e-02,  1.9325e-02, -1.5095e-01,
         1.0231e-01,  1.1490e-01, -1.7425e-01,  3.9260e-02,  1.1383e-02,
        -6.0939e-02,  3.6870e-03, -7.4308e-03,  2.2130e-02,  6.2943e-05,
         8.2441e-02,  8.0641e-02, -2.3873e-01, -2.3398e-01,  7.3083e-02,
        -2.3972e-01, -7.2104e-02, -2.9600e-02, -8.2713e-01, -2.7761e-01,
        -1.2992e-02, -2.6734e-01, -1.0032e-01, -7.6811e-01, -8.9061e-02,
        -1.8105e-01, -3.9923e-01, -1.2955e-01,  2.2852e-02,  5.7521e-03,
        -1.1071e-01, -9.3422e-02, -1.0153e-01,  6.6082e-02,  4.7909e-02,
        -7.9482e-02,  2.6119e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5141, -0.0713,  0.0243, -4.2114, -0.3361, -0.0611,  0.4334,  0.1298,
        -0.4293, -0.1871,  0.2044,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1861, -2.8375, -0.4806, -0.4357, -0.1149,  0.1383, -0.4695, -1.3130,
        -0.1443, -0.4817, -0.1119,  0.1382, -0.3941, -0.0039,  0.0942,  0.1008,
        -0.0116, -0.1742,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5799, -1.1179, -1.8403,  0.0541,  0.0323,  0.1036, -0.9358,  0.0234,
        -0.4603, -0.1300,  0.0215,  0.0278,  0.0325,  0.1558, -0.0313,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2977, -3.5488,  0.4687, -0.2411, -0.2184, -0.7470, -0.8128,  0.0864,
        -0.2277, -0.7460, -1.0306, -0.0509,  0.0239, -0.1989, -0.2067,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1555,  0.0182,  0.0879,  0.0289, -0.0154, -0.0874, -0.0961, -0.1206,
        -0.7148, -0.1837, -0.0254, -0.7556, -1.0169,  0.0044, -0.0076,  0.0095,
        -0.3711, -0.3629,  0.1792,  0.0134, -0.7949, -1.0229,  0.1873, -0.1839,
        -0.0993, -0.0410,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7819e-01, -1.7869e-01,  1.0014e-01,  3.0468e-03,  8.2754e-02,
        -7.3025e-03, -3.3702e-02, -1.4338e+00, -3.4984e+00, -1.1292e-01,
         4.7506e-02, -1.9146e-02,  2.1282e-01,  1.6075e-01, -3.9629e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5418, -3.6575, -0.0443, -0.6309, -1.0002, -0.1603, -0.7108, -0.1631,
        -0.1222, -1.6906,  0.1225,  0.0609, -0.4957,  0.0533, -0.0038, -0.1153,
        -0.3187,  0.0592, -0.3077,  0.1308, -0.3154,  0.1966,  0.1123,  0.0961,
         0.1841,  0.2910,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1694,  0.0375,  0.0324,  0.0749,  0.0554, -0.1511,  0.0179,  0.0419,
        -0.4425, -0.2759, -0.7469, -0.1402, -0.1107, -0.0169, -0.5555, -0.3971,
         0.0710, -0.2517, -0.0035, -0.3514, -0.1103, -0.0909, -0.0113, -0.0994,
         0.0975, -0.0242,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-9.5616e-03, -3.6411e+00,  6.2532e-02, -1.4320e-02, -1.1010e-01,
        -1.0562e-01, -3.4467e-01, -1.0448e+00, -7.2096e-02, -4.2669e-01,
        -3.2039e-01, -6.3645e-01, -8.2541e-01, -1.2301e-02, -4.1720e-01,
        -1.2928e-01, -4.9185e-02, -2.3980e-02, -1.7598e-01, -1.4884e-01,
         1.5244e-01, -2.2395e-01,  1.4141e-01, -3.9996e-01, -6.5537e-01,
         1.3811e-01, -1.1138e-01,  8.7862e-02,  2.2028e-01, -2.8410e-02,
        -6.9825e-02, -6.3271e-02,  1.0769e-03, -6.4170e-02,  2.1338e-01,
         8.8537e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7968e-01, -7.9071e-01, -3.0631e-01, -3.1837e-02, -2.3099e-01,
        -4.8825e-02, -4.9387e-01, -4.5125e-01, -1.1102e-02, -5.4682e-02,
        -3.0599e-01, -8.4289e-03,  1.3685e-03,  5.4757e-02,  2.7397e-02,
        -2.2855e-01, -2.3823e-02, -8.8497e-03,  2.1677e-02, -9.1409e-02,
         1.1126e-01, -3.5931e-01,  3.1972e-04, -3.5144e-04, -1.4272e-01,
        -3.6960e-01, -8.1766e-03, -2.6345e-01, -3.9216e-01, -3.2416e-02,
        -5.0788e-02, -1.8202e-01, -3.3900e-01,  3.6116e-02,  2.4985e-01,
        -1.5713e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0108, -0.1958, -0.0345, -0.0862,  0.1196, -1.0479, -0.2813, -0.0168,
        -0.0912,  0.1046, -0.0036, -0.6624, -1.5530,  0.4151,  0.0827, -0.1299,
         0.0457,  0.1123, -0.0578, -0.1186,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1123, -1.0927,  0.0901, -0.0303,  0.0769,  0.0681, -0.0105, -0.2847,
        -0.0573, -0.1722,  0.0244, -0.0326, -0.0282, -0.0325, -0.4843, -0.3201,
        -0.1016, -0.3932, -0.0292, -0.0206,  0.0948, -0.0357,  0.0277,  0.0544,
         0.1923,  0.0971, -0.0029, -0.0371,  0.0253, -0.2706, -0.3596, -0.1278,
        -0.0888, -0.3167, -0.0178,  0.1587,  0.1205, -0.0404,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1279e-01, -1.9679e+00, -7.3660e-01, -5.0834e-01, -4.6552e-02,
         1.1054e-01, -6.2765e-03,  2.1092e-01, -6.4013e-02,  2.4113e-02,
        -1.4885e-01, -5.5607e-01, -1.2777e-02,  1.3950e-02, -4.3661e-02,
        -3.7679e-01, -3.7689e-03, -4.7769e-01, -9.8772e-02, -1.7405e-03,
         4.5974e-02,  1.0274e-01,  2.7299e-02,  4.9822e-02,  1.7672e-02,
         7.5459e-02, -2.4375e-01, -3.6824e-01, -5.4405e-01, -2.9469e-02,
        -6.0377e-02, -3.9359e-02,  4.8928e-02,  9.7077e-03, -1.9135e-01,
         3.8561e-02, -5.3248e-02,  5.2373e-02,  9.9307e-02, -6.7957e-02,
         5.7369e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.5688, -3.7997,  0.1245,  0.0679,  0.0366, -0.7257, -0.0759, -0.0610,
        -0.2731, -0.0192,  0.1358, -0.6275, -1.2743, -0.2919,  0.1015, -0.9019,
         0.1754, -0.6881, -0.0592,  0.0606,  0.0194, -0.0706,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0681,  0.0205, -0.0036,  0.0078,  0.0979, -0.3244,  0.0374,  0.0578,
        -0.0867, -0.0244, -0.0111, -0.5617, -0.0262, -0.3620, -0.0101, -0.4195,
         0.0251, -0.3546,  0.0247, -0.2441, -0.7033,  0.1193,  0.0322, -0.3203,
        -0.0272,  0.0453, -0.0361, -0.2617, -0.6863, -0.1439, -0.2830, -0.2404,
         0.0918,  0.0581, -0.2582,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1174, -0.2517, -1.0327, -0.1312, -0.6523, -0.0354,  0.0538,  0.1681,
         0.2302,  0.0101, -0.4658,  0.0772,  0.0207,  0.0338,  0.0912,  0.0953,
         0.0687, -0.0967, -0.3827,  0.0745, -0.2961,  0.0493,  0.0474, -0.0407,
        -0.0308,  0.0099, -0.3512,  0.0265, -0.0585,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2960, -1.5933, -0.7262, -0.4138, -0.1817, -0.3518, -0.0868, -0.0868,
        -0.1897,  0.0146, -0.2437, -0.3351, -0.1824, -0.0508,  0.0149, -0.3613,
        -0.3797, -0.0501, -0.1788,  0.0893, -0.0895, -0.0039, -0.2660,  0.0969,
         0.0945,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0318, -0.0322, -0.0218,  0.1604, -1.3444,  0.0222,  0.0299, -0.9946,
        -1.4329, -0.1163, -0.0929,  0.2899,  0.0321,  0.4761, -0.8138, -1.0674,
         0.0540, -0.3718, -0.0046, -0.0209, -0.0413, -0.1317, -0.1245,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3439, -0.0503,  0.0343,  0.1475,  0.0299,  0.0768,  0.0558,  0.0191,
        -0.0099, -0.0212, -0.0333, -0.0345, -0.5533, -0.9371, -0.0128, -0.0808,
         0.0026, -0.2192,  0.0539,  0.0344,  0.0239,  0.0494,  0.0734, -0.0271,
        -0.0960,  0.0162, -0.4877, -0.0228, -0.7008,  0.0108, -0.1588,  0.0667,
        -0.0445, -0.1194, -0.2657, -0.0048, -0.0480,  0.0459,  0.0230,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5435, -0.1424, -0.3996, -0.9742, -1.4612,  0.3092, -0.1055, -0.8581,
        -0.9172,  0.1529,  0.0914, -0.0728,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 0.1362, -0.8841, -1.6307, -0.1008, -0.0271, -0.0668, -0.3642,  0.1005,
        -0.0299, -0.2470,  0.0266,  0.1111, -0.1201, -0.0157, -0.3440,  0.0153,
        -0.0866, -0.1494,  0.0090, -0.0675, -0.0162, -0.0824,  0.0891, -0.0122,
        -0.0149,  0.1174,  0.0076,  0.1153,  0.0377, -0.3435, -0.1024,  0.2000,
        -0.5199, -0.0035,  0.0927, -0.0045, -0.2602, -0.0357,  0.0392, -0.0062,
         0.0101,  0.0281, -0.0139,  0.2684,  0.1657,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1730, -4.7932, -0.5066, -1.5526, -0.8381,  0.0172,  0.0668, -0.5886,
        -1.9789, -0.4420, -0.8735,  0.3770,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5948, -5.4108, -0.2669, -1.6235, -0.0822,  0.2486, -0.0621,  0.0464,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7225e-01, -9.4920e-01, -3.2571e-02, -2.0497e-01, -2.8302e-01,
         3.0121e-02,  3.4815e-02, -4.1997e-02, -2.1067e-01, -6.8541e-01,
        -4.1054e-02, -6.4900e-02, -2.2508e-02, -1.7455e-02,  1.2925e-02,
         4.8446e-02, -1.5563e-02, -8.0544e-03,  2.4266e-02,  1.4012e-02,
         8.8935e-03,  3.7295e-02, -1.8722e-02, -3.7463e-02, -2.4778e-02,
        -5.3597e-02, -5.2368e-03, -6.8758e-02, -1.5473e-01, -1.5409e-02,
        -2.1285e-02, -3.1122e-02, -2.1192e-04, -1.2028e-03, -2.3499e-02,
         1.7941e-02,  2.4534e-02,  4.0978e-02,  3.1813e-02,  1.7490e-02,
        -1.9734e-02, -1.2404e-02, -5.2614e-02,  5.0741e-03, -2.7405e-02,
         4.5805e-02, -6.4125e-01, -5.4988e-02, -6.3011e-01, -1.2113e+00,
        -1.8669e-01, -9.1743e-02, -2.4601e-01, -4.0448e-02, -1.0824e-01,
        -2.4263e-02, -5.5337e-02, -7.6689e-02, -6.0574e-03, -3.1351e-02,
         1.5286e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0388,  0.0421,  0.0210, -0.1031, -0.3813, -0.0110, -0.2840,  0.0125,
        -0.0112, -0.0231, -0.1175, -0.5000,  0.0534, -0.4745, -0.3912,  0.0381,
        -0.0463,  0.0319, -0.1530, -0.1363, -0.3616, -0.5927, -0.0784, -0.0971,
        -0.1736, -0.1629, -0.1180, -0.1036, -0.1646, -0.1224, -0.0133, -0.0318,
        -0.1210,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2067,  0.0435, -1.0643, -0.7112, -0.1516,  0.1784, -0.4789,  0.0381,
         0.0440,  0.0349, -0.0559,  0.0051, -0.4155, -0.1205, -0.1860,  0.2345,
         0.1495, -0.9602, -1.9078,  0.2365, -0.1294, -0.0831,  0.3735, -0.5320,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2247, -0.6173, -0.4803, -0.2569,  0.0814, -0.5387,  0.1631, -0.4220,
        -0.0948, -0.4891, -0.4729, -0.5092, -0.9016,  0.0181,  0.0633, -0.0201,
         0.0168, -0.1378,  0.1548,  0.0149, -0.2299, -0.0556, -0.0225,  0.0952,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1252,  0.0075, -0.4886, -0.3582,  0.0492,  0.0204,  0.0108, -0.3129,
         0.0244,  0.1056,  0.0389,  0.0267,  0.0372,  0.0175, -0.2910, -0.3642,
        -0.4123,  0.1045, -0.0060,  0.0202, -0.3495,  0.0603, -0.1938, -0.3676,
         0.0140,  0.0172, -0.1492, -0.0033,  0.0283, -0.1322, -0.0998, -0.0308,
         0.2731, -0.1230,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0326,  0.1486,  0.0888, -0.1472, -0.7594, -0.0167, -0.1026, -0.0048,
        -0.0655, -0.0434,  0.1441,  0.3653, -0.0517, -1.9834, -0.0487,  0.0039,
        -0.6250,  0.1949, -0.7676, -0.1463, -0.4792, -0.6299,  0.2449, -0.1436,
        -0.3923,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2566,  0.2236, -0.2027, -0.1673, -0.2082, -0.0372, -1.4334, -0.9114,
        -0.8846, -2.4035, -0.7079, -0.1809, -0.4247, -0.1815,  0.1318,  0.0653,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0908, -2.0736,  0.0075, -0.8476, -0.0135, -0.5735, -1.1820, -0.0670,
         0.1944,  0.0201,  0.0347,  0.1937, -0.3725, -0.0621, -0.0268, -0.1560,
        -0.3530, -1.1505, -0.4661, -0.1037, -0.0737,  0.5027, -0.0673,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4180, -0.3987,  0.0655, -1.2373, -2.0874, -0.2415,  0.1616, -0.0260,
        -0.2236,  0.0468, -0.3062, -0.0054, -0.0634,  0.1978,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 0.6217, -0.6175, -0.7572, -1.2854, -0.2075, -0.1122, -0.5822,  0.0562,
        -0.2369, -0.0209, -0.5687, -0.7333, -0.1428,  0.0338, -0.0470,  0.1344,
         0.1094, -0.6825, -0.0237, -0.6323, -0.1123, -0.0296,  0.0522,  0.2502,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0276,  0.1398, -0.7993,  0.0441, -1.4378,  0.0357, -1.5722,  0.0641,
         0.2049, -0.2476,  0.0017,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0163,  0.0834, -0.0029, -0.0242,  0.0613, -0.4369, -0.6470, -0.0802,
        -0.3736, -0.0290, -0.2809,  0.0375, -0.2999, -0.0245, -0.1700, -0.0336,
        -0.1766, -0.2698, -0.1058,  0.0457,  0.0659,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4017,  0.0506,  0.1079,  0.3152,  0.2793,  0.0442, -0.0640, -2.5861,
        -0.5819, -1.6342, -0.1573, -0.2060,  0.1706, -0.1081,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6495, -0.3545, -0.2039, -0.0123,  0.0313, -0.0278, -0.0878, -0.4003,
         0.0231,  0.0263, -0.5457,  1.7046, -3.4919, -0.3840, -0.1296, -0.0403,
        -0.4724,  0.2017, -0.0737,  0.1074, -0.3529,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6110, -3.3852,  0.0374, -1.4031,  0.0698, -0.6029,  0.1175, -1.0723,
         0.1070, -0.4840, -0.1977, -0.3988,  0.2787,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0265, -0.9393, -0.0261, -0.0092, -0.0105, -0.0414, -0.1952, -0.3431,
        -0.0032, -0.0086,  0.0475,  0.0273, -0.1016,  0.0023, -0.0408, -0.1830,
        -0.0306, -0.0143,  0.0278, -0.0571, -0.0429,  0.0253,  0.0246,  0.0342,
        -0.8784,  0.2522, -0.5856, -0.5733, -0.8526, -0.0937,  0.0036, -0.0457,
         0.0407, -0.0470,  0.0493, -0.0178, -0.3167, -0.0209,  0.0242, -0.0485,
        -0.1852,  0.0068,  0.0199,  0.0038, -0.2300], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5235, -0.2826, -0.7353, -0.0463,  0.1198,  0.1694, -0.0562, -0.2908,
        -0.0568, -0.1784, -0.8118, -0.1294,  0.1031, -0.0194, -0.5158, -0.1348,
        -0.6470,  0.0853,  0.0478,  0.0151, -0.0935, -0.7834, -0.0124, -0.9458,
         0.0799,  0.0351, -0.0793, -0.1532, -0.8292, -0.3232,  0.1201, -0.0062,
        -0.2434, -0.0997,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0543,  0.6850,  0.0375, -0.3431, -0.2455, -0.1729,  0.0280,  0.0194,
         0.3335,  0.1831, -0.2636, -0.0726, -0.0784,  0.0035,  0.1505,  0.1271,
         0.1643,  0.1782, -0.4369, -1.8812, -3.0479,  0.2576,  0.0239, -0.1568,
         0.0387, -0.1586,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9473e-02, -2.8219e+00,  6.1196e-02, -5.7748e-01,  1.2337e-02,
        -1.4329e-01,  1.3836e-02, -2.4261e-01,  2.4409e-03,  7.0496e-02,
         2.6227e-02,  2.0172e-02,  5.2513e-02,  8.4805e-02,  1.0901e-01,
        -5.8171e-01, -1.2420e+00,  9.5643e-02, -3.2302e-01, -3.8845e-02,
        -2.9842e-01,  1.3551e-01, -6.7154e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1219, -1.2408, -0.0764, -0.5860, -0.0362,  0.0068, -0.0432, -0.2841,
        -0.5167, -0.0820, -0.0495, -0.0669,  0.0212,  0.0231, -1.1987,  0.0273,
        -0.0493, -0.3345, -0.3564, -0.0659, -0.0275,  0.0156, -0.0042, -0.0231,
         0.0051,  0.0474, -0.4927, -0.7338, -0.0700, -0.1144,  0.0027,  0.1705,
        -0.0241,  0.1539, -0.0187,  0.0936, -0.1041,  0.0265,  0.1082, -0.0675,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2114,  0.1729,  0.0056, -0.2044, -0.1877,  0.0650, -0.1222, -0.4033,
         0.0176, -0.0557, -0.0025, -0.0011, -0.1493, -0.0271, -0.1704, -0.4705,
        -0.3397,  0.0358,  0.0023, -0.1993, -0.1224, -0.4144, -0.2490, -0.0489,
        -0.1587, -0.0526, -0.0563, -0.2571, -0.0792, -0.3503, -0.3480,  0.0453,
        -0.0137,  0.1736,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.6605e-01, -1.5054e-01, -3.7286e-01, -2.2835e-01, -3.2076e-01,
        -8.9036e-02,  6.6048e-02,  3.6600e-03,  8.2687e-03,  1.1601e-02,
         1.6372e-02, -2.3713e-01,  2.0234e-02, -4.0971e-02, -9.9234e-01,
        -7.1663e-02,  3.4306e-02, -2.9207e-02, -3.8338e-01, -3.7445e-04,
        -1.6912e-02,  1.7867e-02,  4.0777e-02,  5.2418e-04, -3.6055e-01,
        -7.7401e-01,  2.0798e-02, -2.5961e-02,  5.7292e-02,  3.7673e-02,
         1.0168e-02,  1.5360e-03,  3.2247e-02, -4.3272e-04, -2.3013e-01,
        -2.2764e-03, -1.4853e-01, -2.7392e-01, -5.5526e-02, -1.4889e-01,
        -1.5707e-01, -5.3389e-02, -1.4114e-01, -6.9561e-02,  1.7452e-02,
        -9.5082e-03,  7.0409e-03, -1.5706e-02, -9.4959e-02, -3.2700e-02,
        -4.9565e-04,  1.0748e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0335,  0.0315, -0.0470, -0.0274, -1.0383, -0.1334, -0.1882,  0.3832,
         0.0445,  0.0614,  0.0991,  0.1120, -0.0619, -1.2521, -0.3335, -0.3828,
        -0.8461, -0.1719, -0.3733, -0.0560, -0.1731,  0.0309, -0.5326, -1.2141,
        -0.0229, -0.0458, -0.0109,  0.0290, -0.0177, -0.0215,  0.4261,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1844, -1.4908,  0.1857, -0.3091,  0.0699,  0.0454, -0.2967,  0.0847,
        -0.0633,  0.2750,  0.0810, -0.6221,  0.2183, -0.1266, -0.8867, -0.5079,
         0.0500, -0.6443, -0.6424, -0.7541, -0.2412,  0.0540,  0.0340, -0.0373,
         0.2792,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2618, -0.0889,  0.1547,  0.0918,  0.1371, -1.7772, -2.7438, -0.0444,
        -0.4567,  0.2455, -0.2416, -0.3259,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1755e-01, -3.5368e+00,  2.0764e-01, -7.2248e-01, -7.0147e-01,
        -3.3346e-01,  1.6912e-02, -3.0880e-02, -2.1702e-03, -1.0334e-01,
        -1.2762e-01, -1.3559e-01, -7.5164e-01, -1.2256e+00, -1.6687e-01,
        -4.9296e-01,  3.5077e-02,  2.1529e-01,  2.1081e-02, -1.0601e-01,
        -1.8047e-01, -3.5183e-03, -1.0591e-01,  3.8401e-02, -6.0527e-02,
        -1.2598e-01, -3.4940e-03, -5.4914e-01,  5.4276e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1875, -5.9543, -0.0985,  0.3696,  0.2723, -1.9656, -2.3493, -0.0954,
         0.2358,  0.0360,  0.8251, -0.4199,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1150, -4.4364, -0.1385, -1.1797, -0.3604, -1.4895, -1.9610,  0.2968,
        -0.7885,  0.1217, -0.1170,  0.5195,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1211,  0.3272, -0.8635, -0.0237, -0.6479,  0.0451, -0.0510,  0.0370,
        -0.7440,  0.1013,  0.1168, -1.2486, -0.0944,  0.1070, -0.0055, -0.5834,
        -0.1505,  0.1513, -0.0437,  0.2922,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0316, -1.5436, -0.0119, -0.4611, -0.6680, -0.0993, -0.2445, -0.0322,
        -0.2251, -0.0552, -0.0514, -0.3107, -0.1932, -0.2222, -0.0268, -0.0643,
        -0.2297, -0.0134, -0.0464, -0.0161, -0.2589,  0.0794,  0.0451,  0.0511,
         0.0741,  0.0211,  0.0557, -0.3368, -0.2118, -0.0623, -0.0136, -0.0188,
        -0.1609, -0.3876, -0.1331,  0.2593, -0.0408,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5130,  7.1965, -0.5599,  0.6257, -0.2241, -0.0119,  0.1071,  0.2231,
         0.6727,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0692, -0.0674,  0.1829, -0.9774,  0.0145,  0.1001, -0.4245, -0.5031,
         0.0881,  0.0435, -0.0347, -0.1242, -1.1244, -0.7773,  0.0175, -0.0388,
         0.0813,  0.0126, -0.2012, -0.3420,  0.0181, -0.0090, -0.0103,  0.0405,
        -0.0379,  0.0660, -0.2737,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3028, -2.3970,  0.0808, -0.7837, -0.1295, -0.3447, -0.0228,  0.0493,
        -0.0505, -0.0537,  0.0345, -0.8883, -0.1960,  0.0705,  0.0128, -0.0032,
        -0.4387,  0.0678, -0.0631, -0.0121, -0.0387, -0.3320, -0.8558, -0.0677,
        -0.0762,  0.0503,  0.0376, -0.4154, -1.2262, -0.1597,  0.0540,  0.0151,
         0.1135, -0.2661,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-0.1047, -0.0031,  0.0675,  0.0505, -0.4004, -0.0762,  0.0023,  0.0015,
         0.0799,  0.0760,  0.0306, -0.3820,  0.0901,  0.0276,  0.1207, -0.3883,
        -0.6842, -0.0827, -0.1641,  0.0243,  0.1096,  0.0207, -0.2729, -0.0079,
         0.0368,  0.0331,  0.0754, -0.1282,  0.0121,  0.0426,  0.1591, -0.5174,
        -0.8786, -0.0247, -0.0856, -0.3460, -0.4649, -0.1336, -0.0080, -0.0109,
        -0.1759, -0.1775, -0.0507,  0.1438, -0.2575,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.1443, -0.0567, -0.0276, -0.0019, -0.1293, -1.0604, -1.5303, -0.1547,
         0.0206, -1.0763, -0.1690, -0.1373, -0.3327, -0.0432,  0.0491, -0.1827,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0721, -0.0266,  0.0985,  0.0838, -0.0603, -0.0428,  0.0299, -0.0827,
        -0.9497, -0.8993, -0.1945, -0.8649,  0.0740, -0.3878, -0.7471, -0.0049,
        -0.0386, -0.0286,  0.0453, -0.0924,  0.0373,  0.3300,  0.0407, -0.0368,
        -0.0045,  0.2002,  0.4812,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0175,  0.0417, -0.2284,  0.0152, -0.2326, -0.0371, -0.1026, -0.0040,
        -0.0527, -0.0124, -0.0144, -0.0660, -0.2259,  0.0156, -0.1751,  0.0260,
        -0.1041,  0.0252,  0.0064, -0.1517,  0.0448, -0.1259, -0.0799, -0.2918,
        -0.0101,  0.0159, -0.1069, -0.2541,  0.0609, -0.2295, -0.0196, -0.0992,
        -0.2328, -0.0493,  0.0026, -0.1657, -0.1240, -0.3717,  0.0053,  0.0221,
        -0.1353, -0.2576, -0.0496, -0.0063,  0.1174, -0.1387], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.5066,  0.0658,  0.1219,  0.0797,  0.0102, -0.0182,  0.0203, -1.5843,
         0.2586, -0.1189,  0.0991,  0.1044, -0.0150, -0.0173,  0.0992,  0.0730,
         0.0666,  0.1026, -0.9080, -1.6034, -0.1460, -0.0670,  0.0375,  0.0904,
         0.0721,  0.0081, -0.5733,  0.0638,  0.1616, -0.0825,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1832, -8.2973,  0.8163,  0.3847, -0.0213, -0.0243, -0.3373,  3.8659,
         0.5029, -0.2958,  1.4178,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.4411, -1.8367,  0.0570, -0.2931, -0.4296, -0.2056, -0.5285, -1.0134,
        -0.0615, -0.0869, -0.1179, -0.0415, -0.0369, -0.2186,  0.0088, -0.4020,
        -0.2546, -0.1360,  0.0192,  0.2331,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3917, -2.0844, -0.9704, -0.6814,  0.0228, -0.3732, -0.4334, -0.1831,
        -0.0877,  0.2425, -0.0321,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0307, -2.5823, -1.0342, -0.9372,  0.0960, -0.1833, -0.1584, -0.3464,
        -0.7287, -0.1267,  0.0309, -0.1842, -0.0222, -0.3728,  0.0415, -0.3314,
        -0.0716,  0.0672, -0.1678,  0.0314,  0.3108,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0970, -2.3783, -1.1355, -0.0335,  0.1523, -1.5489, -0.0343,  0.0263,
        -0.1337,  0.0816,  0.0028, -0.3911, -0.0417, -0.0479, -0.1261,  0.1082,
        -0.0139, -0.1393,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0991, -0.9140, -0.9662, -0.0659, -0.1510,  0.0035,  0.0340, -0.0035,
        -0.0316, -0.1004, -0.0099, -0.0199, -0.0529, -0.5083, -0.4131,  0.0446,
        -0.3662, -0.3082, -0.2982, -0.1982, -0.0261, -0.3090, -0.5366, -0.0288,
        -0.2839,  0.0070, -0.2085, -0.0989, -0.0583, -0.3090, -0.0432, -0.0188,
         0.0253,  0.0413, -0.1346, -0.0213,  0.0247,  0.0150, -0.0064,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6583e-01, -5.0045e+00,  2.5171e-02, -7.1014e-01, -1.9180e-02,
        -4.7531e-02,  1.2356e-01,  1.1352e-01, -7.7667e-02, -1.1947e-01,
        -1.7529e-01, -7.6997e-01, -1.5771e-01,  3.7244e-02,  4.0599e-03,
         3.0707e-02, -6.3435e-01,  4.4203e-02, -4.8337e-02, -2.4786e-02,
        -1.8324e-02,  4.0395e-02,  1.7751e-02,  2.5019e-02, -3.5020e-02,
         1.3182e-02, -3.1609e-01,  9.7801e-03, -4.2740e-01, -5.7886e-01,
        -2.7255e-02, -5.0578e-02, -3.3319e-02,  2.3128e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-0.2604,  0.1851, -0.9165, -0.9519,  0.3875, -0.5906,  0.2102, -0.3089,
        -0.2592,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0663, -1.0214,  0.2304, -0.5063, -0.2227, -0.7034,  0.0054, -0.2047,
        -0.2437, -0.0972, -0.3029, -0.1223, -0.0374, -0.0301,  0.0057, -0.5080,
        -0.1369, -0.8289,  0.0766,  0.0456, -0.0157,  0.0723,  0.0409,  0.0143,
        -0.1269,  0.0509,  0.0461, -0.0225, -0.0598,  0.0011, -0.0164, -0.3250,
         0.0124, -0.3678,  0.0023, -0.0423, -0.0554,  0.0817,  0.1159, -0.1465,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1089, -0.7582, -1.0692, -0.0577, -0.3881, -0.0557,  0.0046, -0.0705,
        -0.2180, -0.5343, -0.0174, -0.0087,  0.0026,  0.0014, -0.1210, -0.0408,
         0.0658,  0.0393, -0.2526, -0.0322, -0.3552,  0.0150, -0.0517, -0.1283,
        -0.0222, -0.2289, -0.2372,  0.0147, -0.5246, -0.6453, -0.0175,  0.0047,
         0.0248,  0.0013,  0.0605,  0.0281, -0.2126, -0.3469, -0.0258,  0.0071,
         0.0213,  0.0828, -0.0309,  0.0432,  0.0491,  0.0125, -0.0230,  0.0093,
        -0.0144, -0.0297, -0.0241, -0.1639,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1089, -2.0904,  0.0258, -0.7083, -0.1307, -0.1301, -0.7974, -0.0922,
        -0.4433,  0.0393,  0.0744,  0.0042, -0.6657,  0.0476, -0.3276, -0.4374,
        -0.2957,  0.0180, -0.5366,  0.0234, -0.0249,  0.3003,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1906, -5.5128,  0.3576,  0.2399,  0.1845, -0.0595,  0.0620, -0.7219,
        -0.0294, -0.0705, -0.1252, -0.5233, -0.2445, -0.1481, -0.2927, -0.7793,
        -0.1963, -0.1375,  0.1209, -0.1607, -0.2008,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1390e-01, -1.1046e+00, -1.6891e-01, -2.4520e-01, -6.8657e-02,
         7.2528e-02, -1.3167e-01,  2.6504e-02, -2.1703e-01, -4.4874e-02,
         2.6879e-02, -5.0817e-02,  5.0945e-03,  2.4412e-02,  7.8582e-02,
         2.3959e-02,  7.1607e-02,  1.2486e-02, -1.1559e-01,  1.3860e-02,
         3.3218e-02, -6.3210e-03,  2.3285e-02,  5.3327e-02,  6.2968e-03,
         3.8784e-02, -3.4671e-02, -2.6455e-01, -5.6363e-03, -1.9443e-01,
        -4.1834e-01, -3.5911e-01,  4.0637e-02, -1.9550e-01, -9.6650e-02,
         1.1961e-04, -3.5593e-02, -1.4231e-02,  9.4611e-03, -1.8330e-02,
        -3.7753e-01,  1.3507e-02,  1.4625e-01,  1.0918e-01, -2.2786e-01,
         4.0858e-02, -2.4174e-01, -1.3374e-01,  9.4259e-03,  9.6908e-02,
        -7.9974e-02, -3.9888e-02, -5.5677e-02,  9.7666e-02, -5.5408e-02],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0177,  0.0259,  0.0399, -0.1387, -0.0476, -0.0692, -0.3156,  0.0275,
        -0.0024,  0.0193, -0.0249,  0.0791, -0.0124, -0.1382, -0.3239, -0.0126,
        -0.0170, -0.1379,  0.0238, -0.4714, -0.0759, -0.1413, -0.2239,  0.0054,
         0.0367,  0.0742,  0.0871, -0.3454,  0.0657,  0.0620,  0.0469, -0.0010,
        -0.0601, -0.2971, -0.0496, -0.2032, -0.0384, -0.3223, -0.2703, -0.1807,
        -0.0263, -0.0177,  0.0039, -0.0298, -0.0106,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0149, -1.4533, -0.4486, -0.6719,  0.0393, -0.2074, -0.2746,  0.0137,
        -0.5408, -0.2007,  0.0473, -0.3136,  0.0285, -0.5951, -0.5289, -0.0883,
        -0.1022, -0.6664, -0.0486, -0.4244, -0.6976,  0.0610, -0.1465,  0.1668,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1236,  0.1096, -0.0285,  0.0375, -0.0111, -0.0546, -0.3265, -0.2892,
         0.1470,  0.0390,  0.0567,  0.0493, -0.1388, -0.0704,  0.0254,  0.0681,
        -0.2025, -0.2713, -0.3437, -0.5275, -0.0664,  0.0461, -0.3271, -0.4629,
        -0.0218, -0.2622, -0.3294,  0.0189, -0.1790, -0.4294, -0.1615,  0.1130,
        -0.1450,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3868, -2.8708, -0.2014, -0.8966,  0.1607,  0.0101, -0.0303, -0.2266,
        -1.3853,  0.0640, -0.0540, -0.0598,  0.2467, -0.8647,  0.0281, -0.6115,
        -0.2478, -0.3302,  0.2222,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0282e-01,  6.4828e-02,  1.3915e-01, -7.6106e-02, -5.2094e-01,
         1.2856e-01, -5.9566e-01, -7.0792e-01, -8.8187e-01, -3.8128e-02,
         4.7493e-04, -6.1177e-02, -4.1213e-02,  6.4529e-02,  1.6712e-02,
         5.7564e-03, -7.8072e-02, -3.5145e-01, -5.8674e-01,  1.3325e-01,
        -4.0722e-01, -3.1178e-01, -1.9536e-02, -1.2443e-01, -1.3196e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3036, -1.0330, -0.8492,  0.2569,  0.0185, -0.9286, -1.2950, -0.1700,
        -0.1228, -0.2280,  0.0343, -0.0467, -0.2820, -0.0064,  0.0435,  0.0212,
        -0.2388,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.6331e-01, -5.9112e-02, -8.8563e-02,  1.3720e-03, -9.1028e-02,
         1.2589e-01, -6.0870e-03, -1.9488e-01, -2.7678e-02, -5.5706e-01,
        -1.1963e-01,  2.7196e-02,  6.4960e-02,  3.3708e-02,  8.2602e-02,
        -1.5916e-02, -1.1659e+00, -1.5584e+00, -1.3773e-01, -6.9291e-01,
        -2.6786e-04, -6.4533e-01, -1.5377e+00, -8.1812e-02, -1.0053e-01,
        -8.3575e-02,  1.2140e-01,  1.2627e-01,  3.7867e-02, -1.1240e-01,
         1.7382e-01,  2.6649e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1678, -0.8307, -0.7268, -0.0011,  0.0868, -0.0521, -0.3225, -0.6730,
         0.0318,  0.0583,  0.0751, -0.0758, -0.1044,  0.0455,  0.0151, -0.0286,
        -0.0477, -0.0252, -0.2799, -0.0377, -0.4060,  0.0126, -0.3033, -0.6131,
        -0.1835, -0.0770, -0.1363, -0.0794,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0355e-01, -3.9508e-01, -1.2431e-01, -9.5210e-02, -2.4474e-01,
        -8.2420e-01, -1.4799e+00, -3.4970e-01, -1.5224e-01, -3.1476e-02,
        -2.2423e-02, -7.2848e-01, -3.2492e-02, -1.2643e-02,  5.1714e-02,
        -3.8252e-01, -8.3180e-02, -2.0724e-01, -1.3775e-01,  6.3637e-03,
        -8.7449e-02,  9.2793e-03, -9.4991e-02, -5.5254e-02, -3.0813e-01,
        -2.0548e-04,  2.4966e-01,  6.4659e-02,  6.7980e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0333, -0.7905, -0.0220, -0.4948, -0.8107, -0.0172,  0.0103,  0.0255,
        -0.0138,  0.0482,  0.0604,  0.0354, -0.3038, -1.0188,  0.2604, -0.2603,
         0.0161, -0.5539, -0.0494, -0.1014,  0.1047,  0.0377, -0.2652, -0.0140,
        -0.1647, -0.5529, -0.4329,  0.0357, -0.0422,  0.0072,  0.0215, -0.0731,
         0.1037,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0527, -1.8408, -0.0491, -0.3372,  0.0386, -0.2168,  0.0760, -0.1428,
        -0.0199, -0.3906, -0.4675, -0.0903,  0.0062, -0.0195, -0.0028,  0.0540,
        -0.1620, -0.0296, -0.5825,  0.0856, -0.3712,  0.0436,  0.1018, -0.0921,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3492, -1.2727, -0.8291,  0.0152, -0.3660, -0.0277,  0.0025, -0.2794,
         0.1020, -0.3114, -0.0395, -0.2368, -0.3262, -0.0665, -0.0571, -0.0568,
         0.0187, -0.2014, -0.0581, -0.4281, -0.2127,  0.3596,  0.2044,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6457e-02, -1.2573e+00, -4.3539e-02,  7.1440e-03, -1.6508e-01,
        -2.0054e-01,  2.1956e-02,  4.0080e-02, -3.1036e-02,  1.9911e-02,
        -5.0020e-02, -1.0876e-02, -7.9813e-02,  8.6979e-04, -4.4248e-02,
         7.9966e-02, -5.1536e-01, -2.2704e-01, -2.6943e-01,  1.1801e-02,
         7.1115e-02, -1.2231e-01,  5.4492e-03,  2.8908e-02, -2.1702e-01,
        -8.3336e-02, -1.1575e-02,  5.2739e-03, -7.3930e-02, -1.6013e-01,
        -3.3008e-01, -2.6375e-02, -1.4109e-01, -5.0534e-03, -1.5633e-01,
        -2.6960e-01, -1.0217e-02,  1.4343e-01,  9.4857e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0170, -0.6394, -0.1184, -0.2920, -1.1662, -0.1208,  0.0618, -0.0295,
        -0.0944,  0.0570, -0.0462, -0.0413,  0.0499, -0.0599,  0.0262, -0.5409,
        -0.1460, -0.0566,  0.1064, -0.1205, -0.5036,  0.0312,  0.1427, -0.0157,
        -0.6215, -0.5347,  0.0415, -0.1537, -0.0384, -0.3370, -0.0872, -0.0292,
         0.1070, -0.0292, -0.0616, -0.0587, -0.1194, -0.5184,  0.0514,  0.0412,
         0.0227,  0.2073,  0.0325,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2811, -2.1483, -0.1348,  0.0037,  0.0568,  0.0316, -0.1711,  0.0804,
        -0.4120,  0.0195,  0.0542, -0.0092, -0.1567,  0.0211, -0.0545, -0.0674,
        -0.4542, -0.4682,  0.0328, -0.0123,  0.0469,  0.0340, -0.0487,  0.0516,
         0.0572,  0.0250,  0.0606,  0.0533,  0.0947,  0.0173,  0.0969, -0.3330,
        -0.6664,  0.0143,  0.0034,  0.0098, -0.0351, -0.5477, -0.3195, -0.0307,
        -0.2176, -0.3564,  0.0023,  0.0588, -0.0265,  0.0042,  0.0211,  0.0350,
         0.0200, -0.0033,  0.1480,  0.0039,  0.0156,  0.0188,  0.0092, -0.0997,
         0.0611], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6160,  6.4458, -0.2546,  0.1057,  0.3716, -0.1171,  1.0814,  0.7916,
         0.1616,  0.0531,  0.5844,  0.0377, -0.4223, -0.0397,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1466, -1.1444, -0.2408,  0.0116, -0.1007, -0.2480,  0.0431,  0.0922,
        -0.1009,  0.0730, -0.4067, -0.8966, -0.1865,  0.0981, -0.3734, -0.0215,
        -0.1725, -0.2366,  0.0363, -0.4147, -0.6667, -0.1135, -0.1208, -0.0037,
         0.0770,  0.0179, -0.0048,  0.0338,  0.0655, -0.0543,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0603, 2.7491, 0.0344, 2.0890, 2.3729, 0.1625, 0.4836, 0.6919, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 8.8947e-02, -1.2032e+00, -1.1008e-01, -1.6107e-01, -2.6253e-01,
        -5.5686e-01,  9.7898e-03,  4.9513e-02, -3.3828e-02, -1.0549e-01,
         8.7110e-02,  4.2005e-02, -3.3650e-01, -6.6648e-01,  3.8704e-02,
        -6.4701e-02, -4.6449e-02, -1.4227e-02, -9.6176e-03, -2.9728e-04,
         1.9237e-02,  1.3706e-02, -6.6139e-03, -1.9311e-02, -4.3218e-01,
        -1.2187e-01, -2.0113e-01, -6.5844e-03, -2.6615e-01, -5.0190e-01,
        -5.9931e-02,  6.2076e-02, -6.4591e-02,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.8061e-02, -1.7528e+00, -1.1396e+00, -2.8391e-01, -6.5934e-02,
        -2.4222e-01, -2.3387e-01, -9.9331e-02,  8.3660e-02, -3.5325e-01,
        -4.0547e-02, -6.9221e-02, -9.9230e-02, -4.2811e-02,  1.4943e-01,
         9.9566e-03, -4.6681e-03,  9.9109e-02, -1.9892e-01,  2.5136e-02,
        -3.5552e-03, -3.8261e-02, -4.4574e-02,  1.6030e-02,  3.8674e-02,
        -1.6171e-02, -2.7400e-02, -2.1111e-01,  2.7742e-02, -2.5170e-03,
        -1.0288e-02, -8.3501e-04, -1.7394e-01, -2.5709e-01,  7.4728e-02,
        -8.3420e-02, -1.3674e-01,  4.1029e-02, -6.6783e-02,  2.1151e-02,
        -1.6936e-02, -4.2597e-02, -6.3714e-02, -3.9573e-01, -1.0545e-01,
        -4.7798e-02, -2.8336e-02, -2.1775e-02, -4.0259e-02,  4.8263e-03,
         2.1013e-02, -1.2833e-01, -3.8118e-02, -8.4044e-02,  3.7910e-04,
         2.8201e-02, -3.7880e-03, -1.4790e-02,  3.2168e-03, -2.9563e-01,
        -2.5631e-02, -9.1292e-02, -1.9572e-01,  5.2319e-03,  3.7164e-03,
        -3.0903e-02, -9.4089e-02,  2.3070e-01,  7.5825e-02, -6.7549e-02,
         3.4878e-02], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1684,  0.0926, -0.1640, -0.0151, -0.6000, -0.1566, -0.9264, -0.2133,
        -0.0480, -0.0097, -0.0720, -0.0515, -0.0124,  0.1023,  0.0265,  0.0967,
         0.0056,  0.0842, -0.1031, -0.7066, -0.1252,  0.1218,  0.1894, -0.3942,
        -0.0182, -0.5776, -0.0662,  0.0995, -0.0613, -0.0466,  0.1476,  0.0081,
        -0.5703, -0.6271, -0.1559,  0.0386, -0.5698, -0.3378,  0.0341,  0.0158,
        -0.0556, -0.1423,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8114e-01, -2.5034e+00,  9.5922e-02, -2.1281e-02, -2.8750e-02,
         6.3332e-02,  3.5076e-02,  3.0100e-02, -9.3929e-02, -3.7432e-01,
         1.9592e-03, -2.9854e-01, -6.5960e-01, -1.1442e-01, -8.2740e-03,
        -2.4738e-02, -7.4234e-02, -5.7804e-01, -5.0451e-01, -1.1714e-01,
        -4.6150e-02, -2.1425e-01, -1.2504e-02,  6.5714e-05, -1.6617e-01,
        -5.4516e-01, -5.4063e-02, -2.3495e-02, -6.0604e-01,  1.0929e-01,
         1.5496e-01, -1.3880e-01,  2.7192e-02, -7.3315e-02, -4.8105e-02,
        -4.2594e-02,  1.4773e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2547, -3.2824, -0.3301, -0.5106,  0.0720,  0.0492, -0.1707, -0.0069,
        -0.0585, -0.0921, -0.4487,  0.1608, -0.6253, -0.3425,  0.3695,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4087, -0.3853,  0.0442, -0.1990, -1.3105, -1.2291, -0.1270, -0.5980,
        -1.2985, -0.0036, -0.0491, -0.0695, -0.3145, -0.2067,  0.2789,  0.0069,
        -0.0230, -0.3357,  0.0824,  0.2187,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1446,  0.0254,  0.0369, -0.0113, -0.6447, -0.0467,  0.0296, -0.0851,
         0.0080,  0.0199, -0.1893, -0.2733, -0.0220, -0.0139, -0.2470, -0.1931,
         0.0343, -0.1998,  0.0876, -0.1030, -0.0117, -0.0043, -0.0250,  0.0077,
         0.0117, -0.1779, -0.2860,  0.0320,  0.0415, -0.1934, -0.2937,  0.0384,
         0.0040, -0.0562, -0.2580, -0.4146,  0.0379, -0.2516,  0.0204,  0.0314,
         0.0064,  0.0392,  0.1216,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1112, -0.0379, -0.1929, -0.0566, -0.5521, -0.0341, -0.1999, -0.6494,
        -0.2269, -0.0820,  0.0370,  0.0249,  0.0666, -0.4745, -0.4720,  0.0093,
         0.0016, -0.0221,  0.0131, -0.1236, -0.5385, -0.1466,  0.0844, -0.0647,
        -0.1693, -0.0745, -0.0531, -0.1556, -0.4845,  0.0488,  0.0299, -0.3262,
        -0.0621, -0.1564, -0.2904, -0.0109, -0.0815, -0.2254,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0491, -2.0007, -0.8968, -0.6182,  0.0382, -0.6084, -0.0202,  0.2988,
        -0.3221, -0.8123,  0.0425, -0.1071, -0.3509, -0.0916,  0.0704, -0.0406,
         0.0092, -0.4950, -0.0492,  0.0701,  0.0318, -0.1795,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0354e-01, -1.0285e+00,  1.6662e-01,  2.3793e-04,  1.1026e-02,
        -3.4445e-02,  1.0813e-01,  2.0373e-01,  3.9056e-03, -7.9578e-01,
         1.1451e-03, -6.9058e-02, -4.2496e-02,  3.9041e-02, -5.1598e-01,
        -6.1627e-01, -7.1578e-01, -6.3544e-02, -1.0449e-01, -5.9490e-01,
         7.9764e-02, -1.0665e-01, -3.1722e-01,  1.1665e-01, -6.2657e-04,
        -4.5747e-02, -1.4737e-01, -9.8164e-02, -5.2170e-02, -1.3719e-01,
         1.0568e-01,  3.6895e-01,  1.6782e-01, -1.8914e-01, -4.1390e-01,
        -3.7043e-01, -2.6982e-02, -3.2212e-01,  1.0823e-01, -2.9441e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1861, -0.1537, -0.0670,  0.0468, -0.1105, -0.3157,  0.0254, -0.1099,
        -0.2717, -0.1032, -0.0691, -0.1051, -0.0354, -0.9564, -0.5695, -0.2323,
         0.1111, -0.0244, -0.6651, -0.0895,  0.0119,  0.0059, -0.0307,  0.0362,
        -0.0065, -0.4488, -0.0031, -0.4666, -0.5836, -0.0699,  0.0148,  0.0461,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0745, -1.5569, -0.0407, -0.0124, -0.0697, -0.0438, -0.1821, -0.3965,
        -0.8636,  0.0347, -0.0536, -0.4294,  0.0496, -0.0787,  0.0921,  0.0187,
        -0.6474, -0.1107,  0.0030,  0.0057, -0.0030, -0.0573,  0.0265, -0.4848,
        -0.0488,  0.0692, -0.2137,  0.0121, -0.0584, -0.0911, -0.0794,  0.0502,
         0.0607, -0.3180,  0.3920,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 3.3334e-01, -1.9546e-01, -1.2063e+00, -8.3147e-01, -9.6080e-02,
        -3.4174e-02, -1.0179e+00, -7.4410e-01, -2.5222e-01, -9.4887e-01,
        -1.1267e-01, -3.2544e-01,  9.2620e-02, -1.9991e-01,  6.9086e-06,
        -4.6353e-02, -1.0783e-01, -3.3406e-02, -2.3130e-02,  1.4248e-01,
         2.3222e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7609e-01, -4.3860e+00,  4.6933e-01, -6.7819e-01, -1.3784e+00,
        -5.3195e-01,  2.7467e-02, -5.3899e-01, -5.6433e-01, -6.9281e-02,
        -3.0805e-02,  5.4973e-02,  6.5439e-02,  6.1063e-02, -4.6917e-01,
        -3.2248e-03, -3.2515e-01, -3.6178e-01,  3.5209e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2167,  0.0144, -0.5520, -0.0067, -0.4445, -0.0812,  0.1627,  0.2465,
        -0.0392, -0.5924, -1.2345,  0.1455, -0.4911, -0.9522,  0.0169, -0.4032,
         0.0440, -0.3601,  0.0420, -0.2034,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0782, -3.0287, -0.1421, -0.9354, -0.0093,  0.0110, -0.0719, -0.2719,
        -0.0310, -0.0328,  0.0058, -0.1642, -0.0267, -0.4158, -0.2394, -0.6194,
        -0.5396, -0.0908, -0.1513, -0.2698, -0.0290,  0.0684,  0.4747,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4097e-01, -1.6914e+00, -4.1269e-01, -2.4240e-01, -7.1539e-03,
        -3.1262e-01, -6.9348e-02, -4.7364e-01, -5.6829e-01, -5.1847e-01,
        -1.8736e-01, -3.0139e-01, -1.4793e-02, -2.6535e-01,  3.2395e-05,
        -3.9081e-02,  1.0009e-02,  7.7904e-03, -1.5483e-01, -1.7548e-01,
        -3.1682e-01, -9.8275e-02, -5.9826e-02, -6.1210e-02, -6.1449e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6605e-01, -3.6481e+00, -1.8199e-01, -1.0696e-01,  4.5588e-02,
        -1.8862e-01,  1.3567e-01, -1.1889e-01,  1.3995e-01, -4.8551e-02,
        -1.2193e+00,  1.0406e-01,  8.3886e-02, -3.7631e-02,  7.1064e-02,
        -7.9230e-02,  1.0075e-01,  5.6293e-02,  1.0583e-01, -2.1636e-02,
        -7.6626e-01, -6.8478e-01, -4.3417e-01, -1.6736e-01, -5.9942e-01,
        -2.1944e-01,  3.1628e-02, -1.1306e-03, -1.1632e-01, -3.3536e-01,
        -3.4731e-01,  1.8989e-01, -3.3698e-01, -4.7301e-02, -1.8712e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1509, -0.0970,  0.0842, -0.7028, -0.1885,  0.1169, -0.5358, -0.1548,
        -0.9999,  0.0336,  0.2081,  0.0316, -0.0278, -0.3764, -0.3940, -0.9923,
        -0.0967, -0.8695,  0.0303,  0.0160,  0.0016, -0.1278,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.7247, -0.4046,  0.3201, -0.1283,  0.0508, -0.3433, -0.0726, -0.2194,
        -0.3335, -2.1789,  0.0629, -0.3304,  0.1109,  0.0855,  0.1961, -0.6433,
         0.0110,  0.2590,  0.3416,  0.0440,  0.1028,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1988, -4.9694, -0.4853, -0.1932, -0.0772,  0.0978, -1.8011, -1.3878,
        -0.2335, -0.1249,  0.3222,  0.2117, -0.0838, -0.6522, -0.1029, -0.5447,
        -0.0444,  0.0477,  0.3548,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2856e-02,  2.7162e-02, -9.1085e-02,  1.0584e-01,  3.1966e-02,
        -3.2532e-01,  2.6158e-02,  2.0909e-02,  1.2402e-02, -1.9843e-02,
        -5.2597e-02, -1.1524e-01, -8.0757e-02, -2.3028e-01,  1.5681e-02,
        -7.5752e-02, -4.2472e-01, -4.3880e-01, -5.8326e-02, -3.3408e-01,
        -1.2024e-01,  2.4823e-02, -4.3171e-01, -2.3939e-02, -2.6895e-01,
        -6.2296e-02, -3.0558e-02, -1.8021e-01, -1.0239e-01, -6.3816e-01,
         1.3057e-02, -4.1607e-02, -2.0082e-01, -3.1537e-01,  2.4612e-02,
        -2.9665e-02, -1.0311e-01,  4.8478e-02, -8.4296e-03, -2.1282e-01,
        -1.9977e-02,  1.3939e-02,  2.4927e-02, -1.1629e-01, -5.6630e-02,
         6.7642e-02, -6.1577e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2002, -1.9339, -1.3655, -0.0162,  0.0921, -0.0902,  0.0216, -0.0748,
        -0.5600, -0.0313,  0.0880, -0.0062,  0.0761,  0.0648,  0.0703, -0.1093,
        -0.7098, -0.0866,  0.0524, -0.0500, -0.0192, -0.0868, -1.0268, -0.7574,
         0.0540, -0.0410, -0.1642, -0.0138,  0.0266,  0.0290,  0.0097,  0.1967,
         0.0558, -0.0146,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.2636, -5.6132,  0.0849,  0.3044,  0.1991, -0.4520,  0.0399,  0.1628,
        -0.0087, -0.2560,  0.0569, -0.0566, -0.4962, -0.9411, -0.0739,  0.0270,
        -0.2310,  0.1365,  0.2547, -0.7794, -0.0846,  0.2286, -0.1653, -0.1957,
        -0.0461, -0.0404,  0.0087, -0.0740, -0.1531, -0.5121, -0.2060,  0.0603,
        -0.2996, -0.1267,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.3918e-01,  4.0240e+00, -1.9162e-01,  2.3339e-02,  3.3738e-01,
         9.5956e-01, -1.7664e-01,  1.2608e-01, -3.2486e-03,  1.1562e+00,
         8.6557e-02, -8.6485e-02,  5.5809e-01,  1.0172e+00,  9.4942e-02,
        -7.3098e-02, -1.8594e-01, -1.7643e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0975, -0.1913, -0.0737, -0.3471,  0.0331, -0.1089, -0.0403, -0.0351,
        -1.4108, -2.1960, -0.1196,  0.2205, -0.0531, -0.1867,  0.1025, -0.0683,
        -0.3052,  0.0428, -0.0250, -1.7560, -0.0256,  0.2106, -0.0259,  0.0992,
         0.0131,  0.0310, -0.2799,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.3548,  0.0889,  0.1137, -0.0278,  0.0317, -0.0719,  0.1448,  0.0780,
        -1.0044, -0.0804,  0.0406, -0.2624, -1.5240, -0.1153, -0.0309,  0.0594,
        -0.1134, -0.0515,  0.2263, -1.3873,  0.0377,  0.1719, -0.2848, -0.0883,
        -0.0242, -0.1788,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2621, -1.6818, -0.0175, -0.9340, -0.5213, -0.1578, -0.1878, -0.0871,
        -0.6651, -0.1384, -0.5082, -0.3731, -0.0780, -0.0561,  0.0208, -0.5628,
        -0.3522, -0.3446, -0.0762, -0.0825, -0.3375,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4365, -1.8806, -1.8593, -1.5760,  0.0038, -0.0127, -0.7819, -0.0160,
        -0.3613, -0.0415, -0.0897, -0.5433,  0.0453,  0.1499, -0.2599,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4870e-04, -1.4631e-01, -2.6395e-02, -9.4058e-02, -3.6734e-01,
        -7.9052e-02,  3.4091e-01, -3.1959e-01, -6.7244e-02, -1.2698e-01,
         3.4267e-01,  2.7523e-01, -3.0914e+00, -4.2723e-01,  1.8290e-01,
        -2.8424e-01,  3.1711e-01, -2.8247e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.3333, -3.0719, -1.2014, -1.2538,  0.0628,  0.1266,  0.3827, -0.5422,
         0.0420, -0.6321, -1.0189, -0.4405,  0.4514, -0.8085, -0.0409, -0.0113,
         0.1090,  0.2578,  0.1071,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.1086, -1.3329, -0.7932, -0.0189, -0.0573, -0.0870,  0.0884,  0.0152,
        -0.1135, -0.0277,  0.0056,  0.0204, -0.1388,  0.0043,  0.0322, -0.1294,
        -0.3628, -0.0155, -0.0035,  0.0055, -0.4565,  0.0213, -0.2223,  0.0184,
        -0.2045,  0.0458, -0.3429,  0.0866, -0.0221, -0.1132, -0.0510, -0.0366,
        -0.2157, -0.0298, -0.0978, -0.1069, -0.3121,  0.0297, -0.1645, -0.0620,
        -0.0504, -0.0645,  0.0247,  0.0653], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4969e-02, -1.7647e+00, -8.4179e-02, -9.5464e-01, -7.6669e-01,
        -2.5458e-01, -3.6446e-01, -3.2443e-02, -5.9210e-01, -7.4567e-02,
        -2.2655e-01,  1.1688e-02,  1.0195e-01,  1.6620e-02,  9.2914e-02,
         2.1582e-02,  3.0415e-01, -6.7642e-01,  6.1524e-02, -1.6205e-01,
        -2.5785e-01, -1.0503e-02,  1.2796e-02,  1.8983e-02,  6.4285e-02,
        -5.2716e-03,  7.5600e-04, -3.4293e-01, -4.0257e-02,  4.1701e-02,
         6.3910e-04, -1.7442e-01,  3.5935e-02,  2.2098e-01,  2.7495e-01,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.2916, -4.4423, -0.0222, -0.5535,  0.0311,  0.0522, -0.5283, -0.6405,
         0.0664,  0.0668, -0.0667, -0.5456, -0.8160, -0.0209,  0.0784, -0.5379,
         0.0694, -0.0149,  0.0269, -0.0529,  0.0689, -0.0738,  0.0755,  0.7192,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5248e-01, -2.7905e+00, -4.9597e-02, -7.6732e-01, -6.8450e-02,
        -2.0758e-01,  1.0290e-01, -2.9053e-01, -7.6430e-01, -1.1652e-01,
        -3.5965e-01,  7.3601e-02, -3.0133e-02, -2.9126e-01, -4.2195e-01,
         2.8760e-02, -2.5086e-02, -4.9374e-02, -4.1089e-01, -5.3911e-02,
        -7.8092e-02, -3.3163e-01, -1.3010e-03, -2.2059e-01,  1.0181e-01,
        -3.6490e-01, -2.6869e-01, -2.7131e-01,  1.8534e-02,  2.8876e-02,
        -2.2899e-01,  3.3344e-02, -1.5729e-01, -9.1697e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2501e-01,  1.7735e-01,  3.0440e-02,  6.4607e-02,  6.3974e-02,
         2.1592e-02, -5.8903e-01, -7.5920e-02,  1.8259e-01,  9.0413e-02,
         2.4634e-02, -2.6744e-02, -3.5678e-01, -6.6509e-02, -7.5605e-01,
        -1.2356e+00, -8.7389e-02,  4.1346e-02,  3.9481e-02,  1.3610e-01,
         5.0271e-04,  5.3417e-02,  1.3317e-01, -5.0376e-01, -6.2068e-01,
         1.3093e-01, -3.8895e-01,  2.1994e-01,  2.6514e-03, -8.8900e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-0.3883, -0.1578,  0.0237, -2.1164,  0.1239, -0.0528, -0.1781, -0.0999,
        -0.4895, -2.7260, -0.0182,  0.0589, -0.3569,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.6742,  0.1065, -0.2915, -0.0070,  0.1021, -0.2868, -0.1281, -1.0457,
        -1.2672,  0.0139, -0.1732, -0.5117,  0.0144, -0.2862, -0.6289, -0.0214,
         0.2453, -0.2069,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0212,  2.0357,  3.4703,  0.0958,  1.8422, -0.2573,  0.8383,  0.0333,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0983, -2.3951, -0.1681, -0.3547, -0.3957,  0.0035, -0.1234,  0.0069,
         0.0303, -0.0961,  0.0757,  0.1075, -0.1154,  0.0383,  0.0397, -0.1560,
        -0.0253, -0.0202, -0.0034, -0.0352,  0.0497,  0.0521, -0.0356, -0.2038,
         0.0474, -0.2663, -0.0515,  0.0443, -0.5036, -0.2816,  0.0828, -0.3004,
        -0.2376, -0.8121, -0.2105, -0.3695, -0.3228, -0.0142,  0.0414,  0.0217,
        -0.1937, -0.0684, -0.1094, -0.0512,  0.0729,  0.2033,  0.2826],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.3310, -0.9829,  1.9167, -3.4590, -0.1570,  0.1551, -0.0291, -0.1662,
        -0.3280,  0.4239,  0.1256,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2877, -2.0005, -0.4674, -1.5702, -0.5728,  0.1205, -0.2168, -1.2873,
         0.0094, -0.6316, -0.0535,  0.0370, -0.6061,  0.1434, -0.1180, -0.2542,
        -0.2028,  0.2880,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5377, -0.7251, -2.0887,  0.0189, -0.1615, -0.2070, -0.9345,  0.2431,
        -0.7105, -0.1832,  0.0540,  0.1451, -0.1007, -0.3326,  0.1047,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.8561, -4.2810, -0.0131,  0.0578, -0.0411, -1.0071, -1.1054, -0.2573,
        -0.0622, -1.2776, -0.8026, -0.0390, -0.0965, -0.0069,  0.1310,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5276, -0.0074,  0.0799,  0.1817,  0.0189,  0.1581,  0.0749,  0.0055,
        -0.7489, -0.0206,  0.0236, -0.2596, -0.8963, -0.0162, -0.0097, -0.1820,
        -0.8926,  1.9004, -0.7488,  0.2973, -0.9948, -1.1990,  0.1077,  0.0327,
         0.1204, -0.3838,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1668, -0.2166,  0.2816,  0.0113, -0.0667,  0.0387,  0.0416, -1.2090,
        -1.6152, -0.1362,  0.2866, -0.0561, -0.0361,  0.2650, -0.3226,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0848, -2.0047,  0.0491, -0.7253, -0.7076, -0.1378, -0.5846, -0.1250,
        -0.0246, -0.8832,  0.1528,  0.1668, -0.1707, -0.0422,  0.0425, -0.0355,
        -0.1114,  0.1193, -0.2946,  0.0660, -0.1549,  0.0828,  0.1288,  0.0486,
         0.0510, -0.0103,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0207, -0.0323,  0.0135,  0.0456,  0.0013, -0.1304,  0.0234, -0.0371,
        -0.3117, -0.4033, -0.8354, -0.1091, -0.0609,  0.0384, -1.0967, -0.3238,
         0.0804, -0.1643, -0.0080, -0.4929, -0.1429, -0.2006, -0.0378, -0.2079,
         0.0166,  0.0866,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 0.2033,  4.6327,  0.5252, -0.1635,  0.3853,  0.1865,  0.8584,  0.8468,
         0.0463,  0.5264,  0.1489,  0.6166,  1.1232,  0.0562,  0.3578,  0.5185,
         0.0517,  0.0648,  0.5128,  0.0157, -0.0714,  0.2792, -0.0900,  0.3630,
         0.7588, -0.2095,  0.3852,  0.0826, -0.0545, -0.0100, -0.0142,  0.2813,
         0.0673, -0.0676, -0.1536, -0.1112,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0952, -0.9713, -0.6239,  0.0136, -0.5295,  0.0029, -0.3516, -0.3430,
        -0.0243,  0.1180, -0.3165, -0.0791, -0.0151,  0.1208,  0.0065, -0.1383,
        -0.0339, -0.0308, -0.0084, -0.0188, -0.0173, -0.6011, -0.1452, -0.0373,
        -0.3683, -0.4981,  0.0597, -0.4385, -0.2902, -0.0426, -0.0473, -0.3564,
        -0.3685, -0.0392, -0.1298,  0.0647,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1329,  0.1632, -0.0949,  0.0274, -0.1078, -0.8017, -0.2699, -0.0408,
         0.1341,  0.0735, -0.0475, -0.6156, -1.1977,  0.1625,  0.1379, -0.0154,
         0.1255,  0.0732, -0.0932,  0.0082,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5534e-01, -1.5510e+00, -4.8820e-01, -4.6369e-01,  7.8213e-02,
         2.4503e-01, -7.5640e-03, -2.5019e-01, -1.0310e-01, -3.2316e-01,
         1.3741e-02, -1.6438e-02, -1.2635e-03,  1.0972e-01, -4.8266e-01,
        -4.0436e-01, -4.3790e-02, -2.4249e-01, -3.2861e-02, -5.8110e-02,
        -1.4642e-02, -5.4126e-02,  1.3122e-02,  1.0223e-01,  3.2058e-02,
         5.5999e-02,  3.8623e-02,  3.2794e-02,  5.9777e-02, -2.4354e-01,
        -7.6256e-01, -1.7994e-01, -1.2852e-01, -4.6974e-01,  1.0020e-01,
         3.0476e-01, -1.6602e-01, -1.5663e-01,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0302e-01, -9.6927e-01, -1.0608e+00, -8.0544e-01, -1.0067e-01,
         1.0089e-02, -1.2181e-01,  1.2842e-03,  1.4396e-01, -2.0086e-02,
        -4.8146e-01, -6.0479e-01, -4.8491e-02,  8.9466e-02, -1.9343e-01,
        -5.5835e-01, -1.0387e-01, -5.1099e-01, -1.8391e-02,  1.8067e-02,
         2.0810e-02, -3.9645e-04,  1.0504e-01,  6.1806e-02,  2.8157e-01,
        -7.3929e-02, -3.8116e-01, -5.7494e-01, -5.0123e-01, -1.4164e-02,
        -7.5277e-02, -4.9458e-02,  4.1202e-03,  2.0389e-02, -2.5846e-01,
         8.6801e-03, -1.2095e-01, -1.0404e-01, -2.9243e-02,  2.2336e-01,
         4.2137e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.6981, -4.7028, -0.0093,  0.3454,  0.0117, -1.0262,  0.0359, -0.0203,
        -0.3630, -0.0204,  0.0934, -1.0980, -1.4988, -0.1918, -0.0503, -0.4950,
        -0.0365, -0.8093,  0.0369,  0.1566, -0.1102, -0.0373,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0210, -0.0010, -0.0070,  0.0047, -0.0727, -0.2208, -0.0376,  0.0173,
        -0.3116, -0.0720,  0.0097, -0.5858,  0.0288, -0.1585,  0.0336, -0.6280,
        -0.0226, -0.3156, -0.0812, -0.2915, -0.5431,  0.1024,  0.0612, -0.2027,
         0.0182, -0.0182,  0.0235, -0.2215, -0.3932,  0.0683, -0.1759, -0.3157,
         0.0077, -0.0687, -0.0732,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0372, -0.3107, -1.1295, -0.1077, -1.0939, -0.1760,  0.0159,  0.0611,
        -0.0613,  0.0086, -0.2806, -0.0512,  0.0483,  0.0310, -0.0609, -0.0262,
         0.2233, -0.1740, -0.5479, -0.0073, -0.5635,  0.0388,  0.0844, -0.3938,
        -0.1744, -0.0659, -0.4367, -0.0159, -0.0902,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.1652, -2.2587, -0.6063, -0.3847, -0.1508, -0.5977,  0.0730,  0.0538,
        -0.0265, -0.0811, -0.2390, -0.1277,  0.0429, -0.0486, -0.0912, -0.3360,
        -0.1456, -0.0719, -0.1703, -0.0634, -0.1536, -0.2529, -0.1608, -0.2054,
         0.2603,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.3747,  0.1162, -0.0033,  0.1128, -0.9942,  0.0252, -0.0699, -0.8829,
        -1.2699, -0.4166, -0.1406,  0.1125, -0.0267, -0.1911, -0.4864, -0.7308,
         0.2184, -0.1131,  0.0500,  0.1082, -0.0901,  0.1005, -0.0886,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.5106,  0.0234,  0.1222,  0.0937,  0.0516,  0.0892,  0.0620, -0.0028,
        -0.0051, -0.0264,  0.0306,  0.0173, -0.7751, -0.8725, -0.0624, -0.1829,
        -0.1326, -0.4527, -0.0168,  0.0867,  0.1008,  0.0180,  0.0685,  0.0476,
        -0.1807,  0.0289, -0.7288,  0.0329, -0.6924, -0.1858, -0.6357, -0.0196,
         0.0516, -0.1392, -0.3495,  0.0832,  0.0427, -0.0338,  0.1463,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0186, -0.0704,  0.2976, -1.0181, -2.1070, -0.0756, -0.3185, -1.4044,
        -1.2051,  0.1859, -0.1091, -0.1325,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.1764e-01, -9.7121e-01, -1.2117e+00,  2.1925e-02, -2.4529e-02,
         8.3279e-02, -3.9474e-01,  4.5486e-02, -1.6758e-02, -1.8870e-01,
         4.6282e-03,  4.5518e-02, -1.1698e-01, -6.0346e-02, -3.8040e-01,
         8.0201e-02, -2.2690e-02, -2.9417e-01, -4.1628e-02, -1.8371e-02,
        -1.3459e-01, -1.7946e-01,  2.6915e-02, -1.6081e-03, -1.0522e-01,
         2.6742e-02, -2.3326e-02, -7.1832e-02, -1.0242e-01, -7.4939e-02,
         1.1990e-02,  1.3779e-01, -8.9719e-02,  9.3590e-03, -1.4569e-02,
        -7.3664e-02, -1.5808e-01,  2.8110e-03,  3.3451e-02, -8.0233e-04,
         2.6101e-02,  2.0740e-02, -5.2212e-03, -5.0548e-02,  2.0635e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2350, -4.6991, -0.5877, -0.7487, -0.4376, -0.1748,  0.2104, -0.1292,
        -1.2877,  0.1520, -0.1258, -0.4505,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4924, -6.1283,  0.9401, -0.9745, -0.1137, -0.0523, -0.1575,  0.4803,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5380e-02, -2.3125e+00, -8.1673e-02, -1.0402e-01, -6.3953e-01,
        -1.2902e-01,  1.0956e-01,  6.3817e-02, -2.9752e-01, -5.3410e-01,
        -1.0594e-01, -2.1456e-02, -3.9875e-02,  1.4419e-02,  4.7411e-03,
         1.9377e-02, -9.4376e-03, -4.0221e-02,  2.0382e-02,  1.5751e-02,
        -1.7806e-03,  4.0289e-02, -1.7557e-02, -2.8383e-02, -2.2306e-02,
         2.3282e-02, -1.2567e-02, -2.7065e-02, -1.9517e-01,  2.5162e-02,
        -1.8746e-02, -6.7874e-02,  1.4585e-02,  6.7332e-03, -2.0139e-02,
        -9.0581e-03,  5.8520e-02,  1.0080e-02, -1.7074e-02,  4.8710e-03,
         4.2566e-02,  9.9892e-03, -1.8675e-02, -1.8744e-02,  3.5267e-02,
         1.5878e-01, -5.2587e-01,  5.0721e-03, -5.0666e-01, -1.2400e+00,
        -4.0026e-01, -5.8588e-03, -4.6677e-01, -1.5561e-01, -1.6794e-01,
         1.4099e-03, -2.6308e-01, -3.8438e-02, -1.8473e-02,  7.1250e-03,
         3.4766e-01], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.2844, -0.1276,  0.0289, -0.1106, -0.3366,  0.0226, -0.2440,  0.0216,
         0.0297, -0.0282, -0.0466, -0.3245, -0.0014, -0.3228, -0.4721, -0.0497,
        -0.0017, -0.0640, -0.1760, -0.0563, -0.3324, -0.3032, -0.0737,  0.0150,
        -0.1655, -0.2044, -0.0406, -0.1162, -0.1394, -0.0469,  0.0196,  0.0688,
        -0.1251,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6912e-01, -2.2369e-01, -8.3160e-01, -4.9202e-01,  2.0816e-02,
         9.6848e-03, -4.2936e-01,  1.6357e-02,  3.7834e-02, -2.9340e-02,
         8.5670e-02,  2.3198e-02, -4.4024e-01,  2.3277e-02, -2.5898e-02,
         1.9307e-01, -1.1133e-02, -6.3792e-01, -1.5196e+00,  7.4700e-04,
         1.1250e-01,  7.0439e-02,  9.7012e-02, -2.5866e-01,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4953e-02, -9.0439e-01, -3.3360e-01, -4.1215e-01,  1.0279e-01,
        -4.1804e-01,  3.6558e-02, -3.1829e-01, -2.2372e-04, -6.6878e-01,
        -7.9085e-01, -6.8372e-01, -8.0932e-01, -1.3128e-01,  8.9257e-02,
        -1.0304e-02,  2.7874e-02,  4.8170e-02,  4.3917e-02, -9.6813e-02,
        -1.9300e-01, -1.4772e-01,  2.1076e-01,  1.2039e-02,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1834, -0.0661, -0.2603, -0.6386,  0.0037,  0.0018,  0.0151, -0.2979,
         0.0138,  0.0024,  0.0058,  0.0210, -0.0568,  0.0173, -0.3501, -0.3256,
        -0.4399, -0.0773, -0.0507, -0.0338, -0.5686, -0.0064, -0.4821, -0.5076,
        -0.0730,  0.0399, -0.2872, -0.0416, -0.0357, -0.2841, -0.1409, -0.1127,
        -0.0135, -0.1838,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.2280, -0.1688, -0.0438,  0.0279, -0.4602,  0.0200, -0.0667,  0.0439,
        -0.1631, -0.1295, -0.0723,  0.0336, -0.0368, -1.0302,  0.0520, -0.0909,
        -0.5943, -0.0548, -0.4465,  0.0717, -0.4697, -0.6561,  0.1160, -0.1267,
         0.0079,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.4102, -0.0803, -0.2334, -0.1803, -0.0656,  0.0573, -0.8143, -0.7752,
        -0.6594, -2.3908,  0.0892, -0.2603, -0.4867, -0.1761,  0.2199, -0.5077,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.6203, -3.0067,  0.1315, -1.2669, -0.3363, -0.9190, -1.8108, -0.0240,
         0.3707, -0.0459,  0.1002,  0.0116, -0.3058,  0.1212,  0.2311, -0.1649,
        -0.6538, -0.9761, -0.3342, -0.1054, -0.0937,  0.3146,  0.0489,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.5427,  0.2392,  0.0801, -1.7598, -3.5331, -0.0665,  0.4596,  0.0498,
        -0.2746,  0.6734,  0.1159,  0.3304, -0.0895,  0.1443,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>)]
