Iter #50: [tensor([-0.0593, -0.0130, -0.0008, -0.0016, -0.0032, -0.0138, -0.0030, -0.0022,
        -0.0125, -0.0025, -0.0023, -0.0013, -0.0021, -0.0115, -0.0019, -0.0047,
        -0.0036, -0.0014, -0.0043, -0.0008, -0.0008, -0.0012,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0744, -0.0355, -0.0050, -0.0087, -0.0056, -0.0059, -0.0114, -0.0057,
        -0.0052, -0.0052, -0.0119, -0.0140, -0.0298, -0.0055, -0.0035, -0.0039,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0513, -0.0189, -0.0020, -0.0047, -0.0036, -0.0032, -0.0052, -0.0023,
        -0.0022, -0.0016, -0.0051, -0.0037, -0.0048, -0.0058, -0.0184, -0.0023,
        -0.0016, -0.0023,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0608, -0.0340, -0.0037, -0.0075, -0.0054, -0.0043, -0.0091, -0.0044,
        -0.0035, -0.0038, -0.0055, -0.0144, -0.0053, -0.0094, -0.0055, -0.0094,
        -0.0021, -0.0030,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0863, -0.0216, -0.0037, -0.0044, -0.0033, -0.0051, -0.0042, -0.0035,
        -0.0091, -0.0036, -0.0034, -0.0027, -0.0233, -0.0013, -0.0042, -0.0025,
        -0.0033, -0.0210, -0.0017, -0.0021, -0.0085, -0.0020, -0.0021, -0.0035,
        -0.0007, -0.0014], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0888, -0.0251, -0.0046, -0.0051, -0.0040, -0.0060, -0.0048, -0.0038,
        -0.0111, -0.0059, -0.0032, -0.0041, -0.0261, -0.0025, -0.0061, -0.0031,
        -0.0055, -0.0229, -0.0052, -0.0002, -0.0039, -0.0083, -0.0030, -0.0233,
        -0.0033, -0.0013], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1032, -0.0320, -0.0028, -0.0049, -0.0042, -0.0070, -0.0043, -0.0040,
        -0.0124, -0.0050, -0.0044, -0.0036, -0.0319, -0.0016, -0.0057, -0.0023,
        -0.0051, -0.0269, -0.0030, -0.0040, -0.0115,  0.0005, -0.0035, -0.0105,
         0.0006, -0.0011], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.1648, -0.0178, -0.0082, -0.0177, -0.0104, -0.0226, -0.0586, -0.0063,
        -0.0220, -0.0056, -0.0072, -0.0167, -0.0119, -0.0143, -0.0150, -0.0026,
        -0.0069,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0971, -0.0099, -0.0044, -0.0096, -0.0057, -0.0129, -0.0343, -0.0031,
        -0.0105, -0.0018, -0.0051, -0.0087, -0.0043, -0.0084, -0.0053, -0.0035,
        -0.0031, -0.0043,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0810, 0.0057, 0.0022, 0.0045, 0.0024, 0.0076, 0.0181, 0.0017, 0.0078,
        0.0017, 0.0022, 0.0077, 0.0041, 0.0031, 0.0059, 0.0024, 0.0043, 0.0010,
        0.0034, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0222,  0.0013,  0.0010,  0.0021,  0.0011,  0.0020,  0.0041,  0.0020,
         0.0005,  0.0012,  0.0013,  0.0004,  0.0002,  0.0034,  0.0014,  0.0016,
        -0.0023,  0.0004,  0.0010,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0435, 0.0018, 0.0009, 0.0020, 0.0007, 0.0007, 0.0010, 0.0017, 0.0017,
        0.0007, 0.0008, 0.0022, 0.0009, 0.0017, 0.0010, 0.0011, 0.0025, 0.0021,
        0.0011, 0.0002, 0.0008, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-0.0781, -0.0520, -0.0003, -0.0054, -0.0034, -0.0016, -0.0019, -0.0037,
        -0.0130, -0.0046, -0.0040, -0.0457, -0.0049, -0.0028, -0.0029, -0.0463,
        -0.0027, -0.0113, -0.0043, -0.0116, -0.0117, -0.0003, -0.0023,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([0.0355, 0.0222, 0.0011, 0.0056, 0.0055, 0.0012, 0.0015, 0.0013, 0.0025,
        0.0008, 0.0008, 0.0060, 0.0016, 0.0008, 0.0013, 0.0020, 0.0020, 0.0013,
        0.0014, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0616, 0.0299, 0.0011, 0.0080, 0.0051, 0.0017, 0.0016, 0.0017, 0.0029,
        0.0009, 0.0013, 0.0074, 0.0022, 0.0012, 0.0018, 0.0242, 0.0019, 0.0066,
        0.0003, 0.0036, 0.0017, 0.0039, 0.0013, 0.0012, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0506, 0.0382, 0.0017, 0.0105, 0.0082, 0.0023, 0.0019, 0.0023, 0.0048,
        0.0017, 0.0018, 0.0092, 0.0021, 0.0012, 0.0021, 0.0031, 0.0044, 0.0008,
        0.0020, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0272, 0.0193, 0.0019, 0.0014, 0.0006, 0.0034, 0.0019, 0.0005, 0.0004,
        0.0006, 0.0009, 0.0006, 0.0004, 0.0011, 0.0051, 0.0018, 0.0009, 0.0015,
        0.0008, 0.0009, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0509e-02, 1.0841e-02, 7.9600e-04, 8.0044e-04, 2.5741e-04, 1.7488e-03,
        1.1896e-03, 2.8981e-04, 2.5785e-04, 1.1100e-04, 4.7515e-04, 2.8576e-03,
        1.0638e-03, 6.4608e-04, 5.2029e-04, 5.9972e-04, 1.5789e-04, 4.5876e-04,
        5.4409e-04, 1.2999e-05, 3.0863e-04, 2.3583e-04, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0233, 0.0206, 0.0023, 0.0012, 0.0013, 0.0044, 0.0020, 0.0008, 0.0005,
        0.0005, 0.0010, 0.0059, 0.0009, 0.0052, 0.0007, 0.0016, 0.0012, 0.0010,
        0.0013, 0.0007, 0.0013, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0410, 0.0013, 0.0004, 0.0022, 0.0008, 0.0016, 0.0011, 0.0010, 0.0007,
        0.0005, 0.0002, 0.0007, 0.0028, 0.0008, 0.0006, 0.0012, 0.0007, 0.0007,
        0.0006, 0.0005, 0.0004, 0.0030, 0.0006, 0.0004, 0.0005, 0.0011, 0.0004,
        0.0005, 0.0009, 0.0003, 0.0008, 0.0013, 0.0011, 0.0020, 0.0036, 0.0007,
        0.0004, 0.0008], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0177, 0.0009, 0.0003, 0.0014, 0.0004, 0.0010, 0.0009, 0.0007, 0.0004,
        0.0006, 0.0002, 0.0005, 0.0024, 0.0006, 0.0005, 0.0009, 0.0004, 0.0004,
        0.0003, 0.0010, 0.0004, 0.0008, 0.0003, 0.0007, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7095e-02,  7.3016e-04,  1.0676e-04,  1.9221e-03,  5.3440e-04,
         5.9064e-04,  1.1103e-03,  9.4390e-04,  5.0034e-04,  3.9109e-04,
         1.8090e-04,  3.2746e-04,  1.2514e-03,  7.2666e-04,  5.3282e-04,
         3.7608e-04,  1.1212e-03,  3.2093e-04,  3.3261e-04,  1.3047e-04,
         5.3395e-04, -1.0330e-05,  6.4793e-04,  4.0237e-04,  4.2312e-04,
         2.4947e-04,  2.0813e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0704, -0.0606, -0.0039, -0.0041, -0.0031, -0.0427, -0.0051, -0.0051,
        -0.0019, -0.0017, -0.0024, -0.0348, -0.0042, -0.0035, -0.0036, -0.0051,
        -0.0021, -0.0022, -0.0030,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([0.0752, 0.0507, 0.0041, 0.0047, 0.0034, 0.0390, 0.0042, 0.0065, 0.0022,
        0.0017, 0.0035, 0.0434, 0.0041, 0.0043, 0.0057, 0.0012, 0.0019, 0.0037,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([4.5973e-04, 3.1254e-04, 1.2534e-05, 2.7394e-05, 1.2043e-05, 5.7890e-06,
        6.8411e-05, 1.0972e-05, 2.6541e-05, 3.7536e-04, 1.0032e-05, 9.1029e-06,
        1.0679e-05, 1.2933e-05, 3.9806e-06, 7.1681e-06, 2.0210e-04, 1.5148e-05,
        2.8947e-05, 2.6383e-05, 1.1676e-05, 3.5634e-06, 3.0481e-05, 1.4382e-05,
        1.6789e-05, 1.3214e-04, 1.0213e-05, 3.3654e-06, 1.2480e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.8375e-04, 4.0507e-04, 7.6841e-06, 2.3673e-05, 1.2933e-05, 1.6390e-05,
        1.4569e-05, 4.8706e-06, 1.8394e-05, 1.5917e-05, 2.0449e-06, 5.8339e-06,
        7.8499e-06, 9.8314e-06, 1.4088e-05, 4.3832e-06, 9.0880e-06, 3.1828e-04,
        4.4126e-06, 4.0763e-06, 3.1085e-06, 1.1149e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7203e-04, 5.5962e-04, 1.1381e-05, 2.0827e-05, 1.4875e-05, 1.9157e-05,
        1.8688e-05, 6.7857e-06, 1.6573e-05, 1.3648e-05, 4.3106e-06, 3.9996e-06,
        7.3980e-06, 1.3420e-05, 2.0324e-05, 2.9630e-05, 1.5423e-05, 1.8216e-05,
        1.1800e-04, 1.1557e-05, 5.9562e-06, 7.9355e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.3287e-04, 8.3803e-04, 7.7685e-06, 3.7296e-05, 2.4190e-05, 3.6706e-05,
        1.9551e-05, 2.0166e-05, 2.7088e-05, 2.1008e-05, 5.4365e-06, 4.4329e-06,
        9.7299e-06, 2.7546e-05, 3.6609e-05, 3.8959e-05, 1.5898e-05, 1.5894e-05,
        5.5453e-04, 3.9257e-06, 7.3100e-06, 4.1772e-06, 2.1164e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.4207e-04, 3.4584e-04, 3.2385e-05, 2.7189e-05, 6.3750e-04, 1.7063e-05,
        3.3237e-05, 1.2399e-05, 2.2510e-05, 3.7782e-04, 1.1617e-06, 3.3886e-05,
        2.7152e-05, 8.0069e-06, 1.3988e-05, 4.8382e-05, 1.0774e-04, 2.0219e-05,
        1.0786e-05, 2.8195e-05, 7.3096e-06, 1.6507e-05, 6.7059e-05, 1.2710e-05,
        1.9671e-05, 8.7852e-06, 1.9614e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1792e-03,  8.3091e-04,  5.2146e-05,  3.3041e-05,  7.3725e-04,
         2.2428e-05,  4.0411e-05,  1.8719e-05,  2.9777e-05,  6.3339e-04,
         7.2294e-06,  5.2902e-05,  2.1318e-05,  1.6914e-05,  2.2702e-05,
         1.7878e-04,  3.2936e-05,  1.6124e-04,  1.3578e-04,  2.4348e-05,
         4.6068e-05,  2.9086e-05,  1.2319e-05,  3.8219e-05,  5.3145e-05,
         2.9175e-06,  5.2607e-05,  1.3843e-04,  4.7736e-05, -3.7279e-07,
         6.3232e-04,  4.7899e-05,  1.6150e-05,  1.5729e-05,  1.0255e-05,
         2.0241e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.5612e-03, 3.1831e-03, 1.9666e-04, 7.0102e-05, 2.3669e-03, 1.0692e-04,
        1.2366e-04, 4.1206e-05, 1.0035e-04, 1.7638e-03, 2.3784e-05, 1.7989e-04,
        1.3521e-04, 6.6078e-05, 8.1565e-05, 5.4809e-04, 1.2339e-04, 4.8114e-04,
        1.5086e-04, 2.7399e-03, 7.6780e-05, 1.2264e-04, 8.6679e-05, 2.6397e-03,
        2.0432e-04, 7.1976e-05, 3.4521e-04, 6.9189e-05, 6.6784e-05, 5.3518e-05,
        7.2757e-05, 5.2077e-05, 8.0948e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.4172e-03, 1.7438e-03, 1.1883e-04, 3.9157e-05, 7.6216e-05, 4.4476e-05,
        4.9625e-04, 1.1394e-04, 6.7492e-05, 1.9065e-03, 6.2140e-05, 6.8420e-05,
        2.8339e-05, 4.7967e-05, 1.0978e-04, 5.1227e-04, 1.1802e-04, 3.9311e-05,
        4.6534e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.6188e-03, 1.5318e-03, 8.6979e-05, 4.4710e-05, 7.9240e-05, 5.6163e-05,
        5.3774e-04, 1.1700e-04, 9.7205e-05, 1.7060e-03, 4.8475e-05, 7.5278e-05,
        3.9371e-05, 4.9289e-05, 1.6252e-03, 3.2527e-05, 1.3541e-04, 3.9126e-05,
        6.4620e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.4408e-03, 1.0071e-03, 6.6827e-05, 2.7805e-05, 5.5037e-05, 3.9861e-05,
        2.8355e-04, 8.0174e-05, 4.4621e-05, 5.7740e-04, 3.6306e-05, 4.7009e-05,
        1.7856e-05, 3.4403e-05, 1.4234e-03, 4.2686e-05, 2.4316e-05, 7.4716e-05,
        2.0763e-05, 3.3588e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.5814e-03, 6.6001e-05, 6.9220e-07, 4.9536e-05, 7.6968e-05, 2.4373e-04,
        5.5292e-05, 3.2676e-05, 6.7702e-05, 3.9642e-05, 6.9693e-05, 5.1628e-05,
        7.2331e-05, 5.0105e-05, 1.3489e-03, 3.8252e-05, 2.4914e-05, 3.6351e-05,
        5.7234e-05, 3.8065e-04, 5.7145e-05, 4.9007e-04, 5.1442e-05, 5.0355e-05,
        1.5462e-03, 1.6244e-05, 3.9227e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2138e-04,  1.8143e-05, -1.1571e-06,  1.7334e-05,  2.5001e-05,
         7.5496e-05,  1.6364e-05,  1.2494e-05,  1.8071e-05,  1.4600e-05,
         3.9436e-05,  2.2976e-05,  1.5600e-05,  1.2621e-05,  4.2111e-04,
         9.5341e-06,  6.3244e-06,  7.6669e-06,  1.4187e-05,  1.2747e-04,
         1.1108e-05,  1.2660e-04,  2.5839e-04,  1.7375e-05,  9.0204e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([2.3616e-04, 4.2150e-05, 3.2616e-06, 6.3127e-06, 7.4629e-06, 1.5714e-06,
        1.9810e-06, 5.4513e-06, 4.9790e-06, 5.8290e-06, 5.3985e-07, 8.2519e-05,
        5.4392e-06, 1.5324e-06, 3.2512e-06, 9.8117e-05, 6.2452e-06, 7.4660e-06,
        5.8577e-06, 2.8318e-06, 7.2689e-06, 5.2035e-06, 9.3021e-07, 8.8324e-06,
        3.9906e-06, 3.1933e-06, 7.9056e-06, 3.3362e-05, 8.2298e-06, 7.1214e-06,
        1.0429e-04, 9.6896e-06, 2.4162e-06, 4.0806e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.6450e-04, 9.0111e-05, 2.9005e-06, 7.6750e-06, 9.5120e-06, 2.2537e-06,
        2.9162e-06, 4.4636e-06, 4.5433e-06, 7.0038e-06, 1.9659e-06, 6.4569e-05,
        4.2123e-06, 1.6484e-06, 3.2435e-06, 3.2864e-05, 5.2121e-06, 8.6601e-06,
        3.1091e-07, 1.6828e-06, 2.8220e-06, 3.5629e-06, 3.2902e-05, 1.2806e-05,
        8.5254e-06, 2.3458e-05, 3.7689e-05, 4.1967e-06, 7.4808e-06, 1.3813e-06,
        3.0873e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6137e-04, 5.2831e-06, 8.7849e-06, 4.2018e-07, 3.1320e-06, 2.2884e-05,
        1.8518e-06, 3.2346e-06, 5.5185e-06, 4.7942e-06, 1.4284e-06, 1.6249e-06,
        4.7937e-06, 2.8768e-05, 5.7768e-06, 1.2930e-06, 3.6934e-06, 4.0484e-06,
        3.7017e-06, 1.5304e-06, 3.8509e-06, 4.0412e-06, 5.5606e-06, 2.9836e-05,
        9.4393e-06, 2.9962e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0840e-04,  2.0014e-06,  5.9524e-06,  4.1477e-07,  4.5628e-06,
         2.6304e-05,  1.3229e-06,  3.2781e-06,  2.9324e-06,  1.9039e-06,
         1.3440e-06,  7.2216e-07,  5.0175e-06,  2.4702e-05,  3.6720e-06,
         7.7350e-07,  2.8759e-06,  4.1911e-06,  5.7039e-06,  8.1473e-07,
         3.8173e-06, -3.4539e-07,  4.3123e-06,  3.1831e-05,  8.2500e-06,
         1.6306e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.5536e-04, 4.0303e-06, 8.2893e-06, 9.1635e-07, 3.8061e-06, 2.9187e-05,
        1.9632e-06, 3.7426e-06, 3.8640e-06, 4.7002e-06, 7.4512e-07, 1.9692e-06,
        5.7775e-06, 2.8391e-05, 8.4243e-06, 1.0229e-06, 4.6456e-06, 5.3323e-06,
        5.4711e-06, 4.7944e-06, 3.2210e-06, 8.7812e-05, 6.5348e-06, 4.0313e-06,
        4.4520e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.3281e-04, 8.8993e-05, 7.1000e-06, 9.7755e-06, 1.1297e-04, 2.9851e-06,
        7.8374e-06, 4.2783e-06, 5.0403e-06, 7.8822e-06, 6.8682e-06, 4.9567e-05,
        6.0894e-06, 1.1059e-05, 7.6948e-06, 3.9911e-06, 4.6126e-06, 5.9299e-06,
        9.9660e-05, 8.3839e-07, 3.7143e-06, 2.8270e-06, 5.6535e-06, 1.6195e-04,
        6.3667e-06, 3.0601e-06, 5.5054e-06, 1.2569e-04, 5.5016e-06, 7.7465e-06,
        8.3974e-06, 1.7523e-04, 2.0951e-06, 3.6731e-06, 3.2949e-06, 7.5880e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4039e-04,  4.6330e-05,  4.1704e-06,  3.3929e-06,  7.6343e-05,
        -8.7359e-07,  4.6128e-06,  6.1959e-07,  5.1173e-06,  3.6370e-06,
         2.0575e-06,  2.3665e-05,  3.5573e-06,  6.2276e-06,  4.0334e-06,
         3.0658e-06,  4.0538e-06,  3.0774e-06,  3.9290e-05,  1.5614e-06,
         1.3867e-06,  1.7944e-06,  3.6706e-06,  7.2724e-05,  5.2140e-06,
        -1.2071e-07,  2.1774e-06,  5.1063e-05,  5.2135e-06,  3.1891e-06,
         7.2750e-05,  3.3838e-06,  3.8120e-06,  2.5204e-06,  1.1615e-06,
         1.8732e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7811e-04, 6.9300e-05, 5.0029e-06, 5.1715e-06, 7.4963e-05, 5.4490e-07,
        4.0416e-06, 2.6188e-06, 4.6716e-06, 3.3845e-06, 4.0742e-06, 2.1685e-05,
        4.6634e-06, 1.0338e-05, 4.8145e-06, 4.8409e-06, 5.1496e-06, 4.9496e-06,
        6.7407e-05, 1.9560e-06, 3.5794e-06, 1.2274e-06, 2.5056e-06, 9.0328e-05,
        5.5631e-06, 2.5561e-06, 2.7788e-06, 9.2822e-05, 4.0920e-06, 3.8615e-06,
        4.9740e-06, 1.0169e-04, 1.5915e-06, 2.2775e-06, 3.1321e-06, 2.9258e-06,
        2.2632e-06, 8.7647e-06, 2.2405e-06, 2.1301e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.3554e-04, 4.7068e-06, 3.8137e-05, 1.6964e-04, 5.2283e-06, 8.5478e-06,
        7.0810e-06, 8.8284e-06, 8.6220e-05, 1.2208e-05, 2.2342e-06, 4.6751e-06,
        7.1756e-06, 1.0527e-04, 5.0277e-06, 1.2220e-05, 9.5847e-06, 1.1117e-05,
        9.2398e-05, 1.0136e-05, 5.8448e-06, 7.2349e-06, 1.1357e-04, 7.2172e-06,
        2.5228e-06, 4.7376e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.2989e-04, 8.4295e-06, 4.7652e-05, 1.7555e-04, 9.0113e-06, 1.2415e-05,
        1.2355e-05, 1.1215e-05, 1.9214e-04, 2.0980e-05, 6.3973e-06, 4.3840e-06,
        8.4707e-06, 1.7291e-04, 5.8981e-06, 9.8645e-06, 1.2480e-05, 1.1075e-05,
        1.2971e-04, 1.3507e-05, 8.9283e-06, 8.3140e-06, 2.0043e-04, 8.1251e-06,
        3.1533e-06, 7.5147e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.2121e-04, 2.6144e-06, 2.5947e-05, 9.4497e-05, 2.8767e-06, 6.1104e-06,
        5.0332e-06, 4.0851e-06, 1.0026e-04, 6.7403e-06, 1.5867e-06, 1.4898e-06,
        4.8322e-06, 8.5428e-05, 3.5935e-06, 5.1966e-06, 4.1198e-05, 5.9065e-06,
        4.8185e-06, 4.6159e-06, 7.4266e-05, 2.6915e-06, 2.0734e-06, 4.2323e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4951e-04,  1.0873e-06,  8.3056e-06,  2.4402e-06,  4.1432e-06,
         2.0013e-06,  4.1503e-06,  4.3508e-06,  2.9035e-06,  5.5993e-05,
         2.4666e-06,  4.1902e-06,  8.9693e-06,  9.7284e-07,  2.9565e-06,
         1.9713e-06,  6.5599e-06,  3.6512e-06,  2.0950e-06,  1.2892e-06,
         3.5912e-06, -5.9777e-08,  1.2749e-06,  2.7095e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.8399e-04,  3.9764e-06,  1.3821e-05,  4.4276e-06,  2.0374e-06,
         1.8138e-06,  6.1786e-05,  2.9807e-06,  4.3877e-06,  7.7247e-06,
         1.4394e-06,  2.2038e-06,  2.5292e-05,  1.9763e-06,  1.8882e-06,
         6.4178e-06,  2.4055e-05,  4.8445e-06,  1.1585e-05,  3.4226e-05,
         1.1754e-06, -2.4748e-07,  3.4643e-06,  3.0685e-05,  2.4697e-05,
         5.7562e-06,  1.0461e-05,  4.1202e-06,  9.8173e-06,  6.2000e-06,
         2.8326e-06,  2.0367e-06,  2.4334e-05,  3.0457e-06,  6.9673e-06,
         1.2052e-05,  2.0550e-06,  3.3428e-06,  4.3494e-06,  2.4035e-05,
         2.9861e-06,  1.9847e-06,  4.3864e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6593e-04, 7.8098e-06, 1.7984e-05, 1.4238e-05, 5.0430e-06, 3.8000e-06,
        1.8698e-04, 3.5937e-06, 5.2016e-06, 1.0022e-05, 4.2400e-06, 2.7883e-06,
        4.3303e-05, 4.4429e-06, 5.3557e-06, 7.6603e-06, 3.8690e-05, 7.7701e-06,
        2.0635e-05, 4.7875e-05, 2.1083e-06, 8.9820e-07, 5.1376e-06, 4.6137e-05,
        2.5457e-05, 7.1150e-06, 5.0449e-06, 4.0112e-06, 2.5266e-06, 4.2330e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0631e-04,  5.0142e-06,  1.8278e-05,  8.9311e-06,  4.4380e-06,
         2.4260e-06,  1.2191e-04,  2.8261e-06,  4.7959e-06,  7.2299e-06,
         4.6584e-06,  1.7375e-06,  3.2679e-05,  2.8450e-06,  5.0498e-06,
         7.6944e-06,  2.0207e-05,  5.0937e-06,  1.3886e-05,  3.1084e-05,
         5.2679e-08, -3.2685e-07,  2.9592e-06,  3.9657e-05,  1.0217e-06,
         7.2819e-06,  8.1956e-06,  1.3787e-05,  8.8591e-05,  6.0624e-06,
         6.1965e-06,  1.4222e-06,  3.1916e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.7316e-04, 4.2921e-06, 1.7161e-05, 1.1661e-05, 4.7777e-06, 1.9718e-06,
        8.9620e-05, 2.4436e-06, 5.1469e-06, 8.1972e-06, 2.8075e-06, 1.8001e-06,
        2.6306e-05, 2.3736e-06, 3.8360e-06, 4.8173e-06, 2.0741e-05, 5.1755e-06,
        1.5189e-05, 2.8747e-05, 1.3570e-06, 9.1907e-07, 3.4161e-06, 4.7320e-05,
        2.0220e-05, 1.2302e-05, 2.4917e-05, 3.2014e-05, 3.7588e-06, 2.2440e-07,
        3.7610e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9848e-04, 4.0467e-06, 1.4343e-05, 8.2345e-06, 2.9233e-06, 3.1788e-06,
        1.2277e-04, 3.2371e-06, 3.6370e-06, 7.0607e-06, 4.2392e-06, 1.5185e-06,
        2.6417e-05, 4.2901e-06, 1.9521e-06, 5.8358e-06, 2.0944e-05, 4.6158e-06,
        1.2109e-05, 2.3151e-05, 1.3679e-06, 7.2812e-07, 2.4920e-06, 6.6981e-07,
        3.8502e-06, 3.7689e-06, 1.1100e-06, 3.2165e-07, 3.8075e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9637e-04, 4.3823e-06, 1.8042e-05, 3.5508e-06, 3.5857e-06, 3.6628e-06,
        9.4643e-05, 1.5048e-06, 4.7820e-06, 6.8411e-06, 2.2155e-06, 1.2028e-06,
        2.6713e-05, 1.6519e-06, 3.0876e-06, 7.0773e-06, 2.0762e-05, 5.2314e-06,
        1.3338e-05, 3.3625e-05, 1.3014e-06, 5.7441e-07, 2.2215e-06, 5.1259e-06,
        1.7586e-05, 6.7707e-06, 9.5508e-06, 7.0649e-06, 7.0214e-06, 1.7916e-06,
        1.7236e-06, 3.7300e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8220e-04, 7.9150e-06, 4.6611e-05, 1.2777e-05, 4.9616e-06, 4.5328e-06,
        1.7018e-04, 3.8604e-06, 7.2893e-06, 9.3262e-06, 5.1960e-06, 3.9571e-06,
        3.9790e-05, 4.9305e-06, 6.0653e-06, 1.2498e-05, 4.5441e-05, 8.1272e-06,
        1.3811e-05, 4.9674e-05, 5.6546e-07, 1.6740e-06, 4.4737e-06, 6.2061e-05,
        3.5679e-05, 1.4590e-05, 1.8972e-06, 1.2987e-05, 4.0618e-05, 2.9073e-06,
        4.2486e-06, 3.9907e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0502e-04, 3.0197e-06, 1.5419e-05, 4.3006e-06, 3.8622e-06, 2.2390e-06,
        5.4306e-05, 2.0304e-06, 2.9985e-06, 5.5312e-06, 2.1772e-06, 1.6617e-06,
        2.5099e-05, 1.4416e-06, 2.2941e-06, 5.3173e-06, 1.7428e-05, 3.9793e-06,
        1.2175e-05, 2.0513e-05, 3.5516e-07, 3.6432e-07, 3.4597e-06, 2.5534e-05,
        1.8310e-05, 9.4973e-06, 5.1180e-06, 2.8675e-06, 1.1168e-06, 5.7329e-07,
        2.8025e-06, 2.5413e-06, 7.0542e-06, 3.1382e-07, 3.2256e-06, 3.9880e-06,
        4.2122e-06, 3.2707e-06, 2.4169e-06, 2.3771e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5777e-04,  5.5587e-06,  1.7284e-05,  1.3019e-05,  2.8728e-06,
         2.4432e-06,  5.5725e-05,  2.6851e-06,  4.2823e-06,  6.3272e-06,
         2.4166e-06,  2.1334e-06,  2.6593e-05,  2.7875e-06,  2.8119e-06,
         7.1550e-06,  3.0462e-05,  4.2705e-06,  1.1879e-05,  2.4841e-05,
         1.4048e-06, -6.6115e-07,  3.1562e-06,  7.0962e-06,  2.1454e-05,
         8.7988e-06,  4.7326e-06,  1.4751e-06,  1.4295e-05,  3.3122e-05,
         7.5875e-07,  8.3206e-07,  2.3867e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7436e-04,  4.9046e-06,  2.2744e-05,  9.0085e-06,  4.4487e-06,
         2.6659e-06,  7.5243e-05,  2.7550e-06,  3.7891e-06,  7.3431e-06,
         3.9733e-06,  2.5338e-06,  3.2347e-05,  2.9307e-06,  2.4979e-06,
         8.4173e-06,  3.2249e-05,  5.7993e-06,  1.1486e-05,  3.3380e-05,
         6.2934e-07,  5.2353e-10,  3.8774e-06,  4.3349e-05,  3.6036e-05,
         8.5948e-06,  2.6762e-06,  3.6032e-06,  8.6725e-06, -4.1721e-07,
         1.2621e-06,  3.0907e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6423e-04,  5.6268e-06,  2.4920e-05,  7.7329e-06,  2.9337e-06,
         3.2756e-06,  1.0933e-04,  2.1279e-06,  5.1258e-06,  8.5558e-06,
         3.5327e-06,  1.4268e-06,  3.3998e-05,  4.5680e-06,  4.2604e-06,
         6.9837e-06,  2.1183e-05,  5.8619e-06,  1.0709e-05,  3.4716e-05,
         7.3096e-07, -1.0779e-07,  4.1896e-06,  3.8021e-05,  3.0602e-05,
         5.6038e-06,  4.1751e-06,  5.6145e-06,  1.5874e-05,  7.0381e-06,
         1.0014e-04,  3.6696e-06,  2.3380e-06,  3.3119e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.1433e-04, 7.4750e-06, 2.6931e-05, 4.1434e-06, 1.3883e-05, 4.8069e-06,
        6.0706e-06, 1.1865e-04, 6.2701e-06, 1.7784e-06, 3.2225e-06, 6.3282e-06,
        1.0175e-05, 6.6972e-06, 6.5571e-06, 6.8583e-06, 9.3847e-05, 6.7161e-06,
        3.5162e-06, 7.4503e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([2.3453e-04, 1.4397e-04, 2.9848e-07, 1.0102e-05, 2.7185e-06, 6.6639e-06,
        7.5548e-06, 6.3527e-06, 5.5328e-06, 5.8984e-06, 1.0860e-04, 6.6015e-06,
        9.7303e-06, 8.1932e-06, 7.8843e-06, 1.2224e-05, 1.5643e-04, 5.5232e-06,
        4.4310e-06, 7.9591e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.0488e-04, 1.0227e-04, 4.0030e-06, 1.0534e-05, 6.3409e-06, 1.0779e-05,
        1.0317e-05, 8.7455e-06, 7.8787e-06, 1.5200e-05, 9.0564e-06, 6.6370e-06,
        1.7399e-04, 7.9368e-06, 2.9734e-06, 7.2151e-06, 1.3486e-04, 3.0945e-06,
        1.1445e-05, 3.0424e-04, 6.8207e-06, 1.2298e-05, 9.4732e-06, 3.8302e-06,
        2.4146e-06, 6.3905e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8058e-04,  4.6058e-05,  1.5636e-06,  5.6118e-06,  5.0137e-06,
         6.0921e-06,  6.5414e-06,  8.2297e-06,  6.7409e-06,  1.1924e-05,
         4.3028e-06,  3.4193e-06,  1.2293e-04, -2.3181e-07,  1.1956e-06,
         5.5340e-06,  8.9927e-05,  2.5285e-06,  6.3171e-06,  5.1229e-05,
         2.0554e-06,  3.5962e-06,  2.4327e-06,  6.6303e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.8585e-04, 1.2142e-04, 1.5652e-06, 7.5957e-06, 5.0513e-06, 7.1333e-06,
        7.1523e-06, 6.0404e-06, 7.7949e-06, 7.6077e-06, 6.1267e-06, 4.0937e-06,
        7.7274e-05, 3.3293e-06, 1.3840e-06, 5.9758e-06, 7.7487e-05, 3.1099e-06,
        6.2053e-06, 4.9236e-06, 8.0128e-06, 7.6880e-06, 9.5178e-05, 5.0829e-06,
        3.4400e-07, 6.9722e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.1892e-04, 9.0734e-05, 1.0423e-05, 3.6109e-06, 1.8436e-06, 4.0492e-06,
        4.3136e-05, 1.8227e-07, 9.1692e-06, 4.4905e-06, 1.0183e-04, 4.5744e-06,
        1.3943e-05, 9.5573e-06, 8.2764e-06, 6.7506e-06, 3.1981e-06, 5.8839e-06,
        5.1957e-05, 2.3304e-06, 3.6398e-05, 6.1642e-06, 1.7976e-06, 5.7924e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7393e-04,  1.0968e-04,  4.8365e-06,  4.0731e-06,  1.1081e-06,
         2.5265e-06,  4.3031e-05,  1.3696e-06,  8.0710e-06,  3.8435e-06,
         1.2688e-04,  1.9610e-06,  7.1455e-06,  6.3284e-06,  9.7425e-06,
         5.5434e-06,  3.7812e-06,  6.0406e-06,  3.3648e-05,  6.5010e-07,
         7.7091e-06, -2.7025e-07,  1.0940e-06,  6.2231e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.2234e-04, 9.4548e-05, 5.9546e-06, 3.8301e-06, 6.4469e-07, 3.3772e-06,
        5.6371e-05, 1.4493e-06, 1.3282e-05, 4.7234e-06, 1.2806e-04, 3.1698e-06,
        1.0460e-05, 8.9042e-06, 6.1403e-06, 4.1892e-06, 3.4964e-06, 5.6041e-06,
        4.9507e-05, 5.2622e-06, 7.3179e-06, 4.1352e-06, 1.1734e-05, 8.1357e-06,
        7.6192e-06, 2.3606e-05, 2.4171e-06, 5.8571e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.2410e-04, 1.0811e-04, 1.0333e-05, 7.5312e-06, 7.5906e-07, 6.3912e-06,
        1.6720e-05, 1.3040e-05, 6.6699e-06, 3.6635e-06, 6.4529e-06, 4.3345e-06,
        7.2338e-06, 2.4201e-04, 4.3908e-06, 4.7070e-06, 8.5931e-06, 4.7261e-05,
        1.3317e-05, 1.3623e-05, 2.8976e-05, 6.5197e-05, 3.8050e-06, 1.5930e-05,
        2.7966e-06, 9.8312e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6203e-04, 7.1586e-05, 8.4825e-06, 7.2298e-06, 1.7131e-06, 7.4622e-06,
        9.9806e-06, 7.3048e-06, 3.3713e-06, 1.1445e-06, 6.6283e-06, 2.5672e-06,
        4.6399e-06, 1.1655e-04, 1.0607e-06, 4.3591e-07, 5.7454e-06, 4.9511e-05,
        5.9678e-06, 1.2310e-05, 4.8524e-06, 6.7472e-06, 4.3012e-05, 5.7947e-06,
        1.4166e-06, 5.0180e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5204e-04, 5.9068e-05, 4.6301e-06, 6.0173e-06, 1.1559e-06, 5.6150e-06,
        1.0010e-05, 8.8141e-06, 2.0040e-06, 1.9392e-06, 5.6647e-06, 1.7185e-06,
        2.4784e-06, 1.2857e-04, 1.1838e-06, 2.4365e-07, 5.5445e-06, 4.7030e-05,
        6.8622e-06, 7.5738e-06, 6.5000e-06, 1.0914e-05, 5.2715e-06, 9.1970e-06,
        1.9956e-06, 3.0085e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.3100e-04, 7.2166e-06, 2.8666e-05, 6.2587e-06, 3.9426e-06, 2.3454e-06,
        1.1317e-05, 3.3926e-06, 8.9859e-05, 3.5360e-06, 9.0916e-08, 3.0639e-06,
        4.4975e-05, 9.5223e-06, 6.8516e-06, 5.8836e-06, 1.5065e-05, 4.3011e-05,
        3.0379e-06, 5.8175e-06, 6.3002e-06, 6.7347e-06, 3.6118e-06, 1.7677e-06,
        1.4115e-06, 4.1564e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.7085e-04, 7.1581e-06, 5.2544e-05, 6.5387e-06, 5.6101e-06, 2.7883e-06,
        1.4854e-05, 5.4890e-06, 9.9171e-05, 2.7988e-06, 1.2419e-07, 5.4963e-06,
        4.5182e-05, 7.7128e-06, 1.0577e-05, 6.1909e-06, 1.0043e-05, 1.1650e-04,
        3.8546e-06, 4.0285e-05, 7.0913e-06, 6.0977e-06, 1.0174e-05, 1.4418e-05,
        1.5409e-06, 5.5762e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #350: [tensor([3.5123e-04, 1.2206e-04, 2.2507e-06, 6.4077e-05, 2.6750e-04, 1.0155e-05,
        1.0090e-05, 2.1093e-05, 1.0549e-05, 6.5150e-06, 1.4076e-05, 1.7523e-05,
        6.4820e-05, 1.2463e-05, 5.0848e-06, 9.3368e-06, 9.9518e-06, 1.1519e-05,
        5.5012e-06, 9.1429e-05, 1.9565e-05, 2.9471e-06, 7.1438e-06, 6.2606e-05,
        9.0976e-06, 1.0487e-04, 2.8973e-05, 1.4640e-05, 1.6782e-06, 3.9400e-05,
        5.9260e-06, 7.5567e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8034e-04, 1.2212e-05, 5.2362e-06, 6.9586e-06, 1.7819e-05, 1.5211e-04,
        1.8057e-05, 8.1214e-06, 1.6200e-05, 9.6697e-06, 3.8387e-06, 9.3658e-06,
        2.7610e-05, 8.2513e-06, 4.5546e-06, 9.6696e-06, 7.6916e-06, 1.2033e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8135e-04, 9.2580e-06, 3.5970e-06, 1.2076e-05, 1.2224e-05, 1.6035e-04,
        1.3486e-05, 8.9275e-06, 1.4717e-05, 9.7407e-06, 6.4127e-06, 1.0176e-05,
        1.0247e-05, 2.4411e-05, 8.4365e-06, 6.5913e-06, 6.4820e-05, 8.6741e-06,
        6.2737e-06, 7.1767e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8905e-04, 9.5445e-06, 3.4671e-06, 9.6501e-06, 1.0584e-05, 1.4335e-04,
        1.5901e-05, 8.0471e-06, 1.1743e-05, 9.8985e-06, 1.8416e-06, 7.2869e-06,
        9.3934e-06, 9.6490e-06, 2.1614e-04, 1.2125e-05, 5.5039e-06, 1.0288e-05,
        7.7225e-06, 5.3867e-06, 8.4888e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8175e-04, 2.7858e-04, 8.3525e-06, 3.4551e-06, 8.9045e-06, 2.2745e-05,
        2.6139e-04, 1.6730e-05, 3.7261e-06, 1.0582e-05, 9.1693e-06, 1.3087e-05,
        1.2855e-05, 3.2945e-06, 8.4423e-06, 2.3793e-04, 1.0789e-05, 6.8060e-06,
        8.8447e-06, 1.1711e-05, 7.4171e-06, 3.8013e-05, 1.2797e-05, 1.2193e-05,
        1.8935e-06, 9.2887e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3197e-04,  2.3204e-04,  1.4149e-05,  4.6788e-06,  9.8410e-06,
         4.5066e-05,  1.9534e-04,  1.3554e-05,  4.9237e-06,  9.1511e-06,
         1.1288e-05,  1.4140e-05,  1.2638e-05,  3.1494e-06,  7.4790e-06,
         2.3412e-04,  1.3704e-05,  9.1394e-06,  1.3752e-05,  2.1447e-05,
         1.1695e-05, -1.4476e-06,  9.5925e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.3102e-04, 1.6218e-04, 1.8247e-05, 4.7135e-06, 1.0861e-05, 2.1640e-05,
        2.4784e-04, 1.2267e-05, 4.5178e-06, 9.1544e-06, 7.6557e-06, 1.4071e-05,
        1.3792e-05, 2.1164e-06, 1.0234e-05, 2.0685e-04, 1.4494e-05, 6.1858e-06,
        1.1554e-05, 1.4220e-05, 8.1343e-06, 7.6552e-05, 1.0808e-05, 2.0041e-06,
        8.6603e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7066e-04, 6.5718e-05, 4.0262e-06, 7.0850e-06, 4.1918e-06, 1.8304e-05,
        9.7105e-06, 4.8634e-06, 3.7241e-06, 1.0076e-04, 3.2172e-06, 2.0750e-06,
        1.8861e-06, 6.3554e-06, 1.8182e-06, 1.4635e-05, 4.2416e-06, 9.2727e-06,
        4.0328e-06, 9.5997e-05, 4.8156e-06, 4.8743e-06, 4.3500e-06, 3.6188e-05,
        1.1090e-05, 4.5559e-06, 3.5679e-06, 8.7036e-05, 2.9062e-06, 2.2453e-06,
        1.2100e-06, 7.4542e-06, 3.3422e-06, 1.4183e-05, 2.7621e-06, 1.1741e-05,
        3.4335e-06, 7.0438e-06, 4.3565e-06, 5.4710e-06, 1.2384e-06, 2.8914e-06,
        1.0430e-06, 3.5471e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7930e-04, 1.3628e-04, 2.5701e-05, 1.3549e-05, 1.3803e-05, 5.4813e-05,
        1.6268e-05, 1.4059e-05, 4.4694e-06, 1.4020e-04, 6.4840e-06, 6.4578e-06,
        3.8429e-06, 1.6672e-05, 9.7752e-06, 4.4378e-05, 1.2289e-05, 2.7448e-05,
        8.6016e-06, 2.3156e-04, 4.1343e-06, 6.5949e-05, 1.7760e-05, 1.3000e-05,
        6.4264e-06, 3.0152e-04, 7.3602e-06, 7.1778e-06, 1.0092e-06, 1.0597e-05,
        1.0836e-05, 3.7152e-05, 6.8082e-06, 2.5974e-05, 1.0524e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.3226e-04, 1.1551e-04, 9.4285e-06, 1.0244e-05, 6.2272e-06, 3.3294e-05,
        1.7229e-05, 6.1635e-06, 7.9965e-06, 1.2224e-04, 5.6924e-06, 5.0938e-06,
        2.6919e-06, 1.2183e-05, 7.0201e-06, 2.6778e-05, 7.8667e-06, 1.7388e-05,
        7.1852e-06, 2.3576e-04, 9.3498e-06, 7.4792e-06, 5.9530e-06, 6.8924e-05,
        2.3686e-05, 6.5362e-06, 7.3786e-06, 1.8202e-04, 5.1620e-06, 8.9364e-07,
        1.0563e-05, 1.8444e-05, 4.8151e-06, 3.3346e-05, 7.4492e-06, 2.1313e-05,
        9.1299e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7336e-04, 2.2185e-04, 5.8044e-06, 2.7841e-05, 1.6097e-05, 3.8124e-06,
        8.1355e-06, 3.7398e-06, 1.2286e-05, 1.4938e-05, 2.0740e-04, 8.8350e-06,
        3.8789e-06, 1.4587e-05, 5.8387e-06, 1.2695e-05, 2.0731e-04, 1.2916e-05,
        8.3913e-06, 1.9731e-05, 2.0234e-05, 3.8448e-04, 3.4220e-06, 1.3164e-05,
        7.2170e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4886e-04,  1.7085e-04,  3.8147e-06,  1.2187e-05,  1.5137e-05,
        -3.7893e-07,  6.8319e-06,  8.9951e-07,  9.0671e-06,  9.3101e-06,
         1.2817e-04,  8.4019e-06,  5.6329e-06,  8.9141e-06,  1.6978e-06,
         9.0898e-06,  1.4908e-04,  6.6981e-06,  4.6379e-06,  2.2028e-05,
         1.5839e-05,  2.4591e-04,  1.1478e-05,  1.1891e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([3.7278e-04, 4.0187e-04, 7.3420e-06, 2.4952e-05, 3.5593e-05, 2.1405e-05,
        2.6896e-05, 1.4555e-05, 2.4984e-05, 3.2528e-05, 2.5866e-05, 7.3451e-05,
        4.3823e-04, 2.7770e-05, 2.9430e-05, 3.9889e-04, 2.8085e-05, 3.0047e-05,
        1.0643e-05, 1.9413e-05, 1.1579e-04, 1.7734e-05, 1.3955e-04, 5.4882e-04,
        1.5575e-05, 2.1378e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.4669e-04, 2.1098e-05, 2.6724e-05, 5.9196e-04, 8.4078e-06, 2.1079e-05,
        1.2762e-05, 6.5377e-06, 2.2573e-05, 8.1087e-06, 2.1217e-05, 2.6322e-04,
        4.4512e-06, 1.5301e-05, 1.0495e-05, 1.2283e-05, 1.0388e-04, 3.8185e-05,
        8.0825e-06, 1.4588e-05, 3.0239e-04, 1.4317e-05, 1.2711e-04, 2.0327e-05,
        1.3011e-05, 1.3754e-04, 2.9452e-05, 1.6175e-05, 7.7764e-06, 1.5479e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6564e-04,  1.4381e-05,  1.1636e-05,  3.8762e-04,  5.6609e-06,
         1.3273e-05,  6.5705e-06,  6.2195e-06,  1.5504e-05,  4.3397e-06,
         1.8133e-05,  2.3616e-04,  3.2099e-06,  1.0952e-05,  1.1768e-05,
         1.0942e-05,  7.9053e-05,  2.6198e-05,  1.3205e-06,  1.2394e-05,
         2.1665e-04,  2.0693e-05,  7.9924e-05,  1.3847e-05,  1.2864e-05,
        -1.0377e-06,  1.0251e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4728e-04, 1.6263e-05, 2.0634e-05, 3.5679e-04, 1.9463e-06, 1.2974e-05,
        8.3123e-06, 9.5568e-06, 1.9167e-05, 6.9700e-06, 1.3649e-05, 1.9250e-04,
        2.4288e-06, 5.8192e-06, 1.0475e-05, 6.8832e-06, 7.8764e-05, 2.1948e-05,
        4.4722e-06, 9.6170e-06, 1.6460e-04, 1.4647e-05, 7.7574e-05, 1.2730e-05,
        1.0989e-05, 9.8144e-05, 7.8719e-06, 9.6344e-06, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3545e-04,  1.8996e-04,  2.3761e-05,  2.9859e-05,  1.2473e-05,
         3.0863e-06,  1.8825e-05,  2.7769e-04,  1.2015e-05,  1.0305e-05,
         9.9908e-06,  1.0573e-04,  2.8190e-05,  1.1503e-04,  1.7137e-05,
         1.3693e-05,  1.5048e-05,  1.7265e-05,  1.3754e-05,  1.0452e-05,
         1.5638e-05, -4.8173e-07,  1.3462e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2263e-04, 3.2799e-04, 2.7210e-05, 4.7716e-05, 1.5464e-05, 5.8728e-06,
        4.1128e-05, 4.3498e-04, 3.0528e-05, 1.3183e-05, 2.1588e-05, 3.9840e-05,
        1.6607e-04, 4.7031e-04, 3.6587e-05, 4.1721e-05, 3.1322e-05, 1.5709e-05,
        1.7950e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9496e-04, 4.0581e-04, 3.2609e-05, 6.7925e-05, 2.2869e-05, 6.6596e-06,
        4.2962e-05, 5.6429e-04, 3.0645e-05, 1.8704e-05, 1.5686e-05, 1.7480e-04,
        3.8069e-05, 1.5017e-04, 2.9055e-05, 4.5621e-04, 1.7507e-05, 1.1332e-05,
        2.3392e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.1916e-04, 2.6255e-04, 9.1831e-06, 9.0526e-06, 1.9092e-05, 1.8903e-04,
        1.0674e-05, 2.7895e-05, 4.2920e-06, 7.7355e-06, 1.5107e-05, 1.3259e-05,
        1.2704e-05, 8.2971e-06, 4.7714e-05, 1.0974e-04, 1.8704e-05, 5.9188e-06,
        1.4011e-05, 1.1089e-04, 1.3018e-05, 1.2607e-04, 1.0349e-05, 4.6332e-05,
        1.1930e-04, 1.5269e-05, 5.0765e-06, 7.8791e-06, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.5374e-04, 1.4071e-04, 1.1774e-05, 8.6351e-06, 1.7774e-05, 2.1164e-04,
        1.0381e-05, 2.2317e-05, 5.3097e-06, 7.0429e-06, 1.5187e-05, 1.1240e-05,
        5.4460e-06, 8.4691e-06, 5.0260e-05, 6.3880e-05, 1.9667e-05, 3.5392e-06,
        8.5090e-06, 2.7785e-04, 1.3469e-05, 6.7029e-06, 7.6188e-05, 1.0611e-05,
        2.0144e-06, 1.0603e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.8721e-04, 1.2915e-04, 1.0344e-05, 1.1947e-05, 1.3316e-05, 2.4722e-04,
        8.7716e-06, 3.5815e-05, 9.6790e-06, 9.3023e-06, 1.8467e-05, 1.5013e-05,
        1.4884e-05, 1.0346e-05, 4.0443e-05, 9.1157e-05, 2.2475e-05, 1.0306e-05,
        1.1267e-05, 9.9161e-05, 1.1303e-05, 1.3472e-04, 2.1447e-05, 5.3948e-05,
        4.7194e-04, 1.7045e-05, 3.5763e-06, 9.9349e-06, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4137e-04, 1.9718e-04, 1.8958e-05, 1.3987e-04, 2.1427e-05, 1.2114e-05,
        3.4160e-05, 4.0003e-04, 1.7772e-05, 1.1402e-05, 1.9435e-05, 1.5018e-05,
        8.5100e-06, 1.8994e-05, 1.4239e-04, 2.5123e-05, 9.8122e-05, 2.9376e-04,
        5.6792e-06, 2.2812e-05, 5.9019e-06, 1.5979e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2385e-04, 1.4809e-04, 1.0918e-05, 1.5388e-04, 1.3811e-05, 1.1813e-05,
        3.4528e-05, 3.1579e-04, 1.3294e-05, 1.7512e-05, 2.1958e-05, 1.4922e-05,
        5.9581e-06, 1.2506e-05, 1.0597e-04, 2.2861e-05, 1.0019e-04, 1.7948e-05,
        1.1829e-05, 1.3656e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([8.5734e-04, 5.9874e-04, 4.2918e-05, 9.5904e-05, 1.0974e-05, 1.5273e-04,
        2.5023e-04, 4.7986e-05, 4.6352e-05, 3.3976e-05, 1.8488e-04, 1.3798e-05,
        1.2409e-05, 2.6863e-05, 2.2221e-04, 2.7083e-05, 1.8155e-04, 4.9790e-05,
        5.2844e-05, 4.9680e-05, 2.7399e-05, 5.0567e-05, 1.5518e-05, 6.2440e-05,
        3.0451e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.4907e-04, 4.8913e-04, 3.1546e-05, 6.2385e-05, 2.5314e-05, 9.7622e-05,
        2.3037e-04, 4.8214e-05, 5.9727e-05, 3.7753e-05, 1.7441e-04, 1.7900e-05,
        1.8143e-05, 2.9291e-05, 2.3935e-04, 3.4014e-05, 1.9422e-04, 5.7090e-05,
        2.6181e-05, 3.3873e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.6820e-04, 3.2701e-04, 3.3404e-05, 4.4634e-05, 2.7120e-06, 1.2265e-04,
        2.4839e-04, 5.6796e-05, 6.7809e-05, 3.5979e-05, 2.0346e-04, 2.4821e-05,
        1.9147e-05, 2.7363e-05, 2.6110e-04, 2.5032e-05, 2.0363e-04, 5.2174e-05,
        4.9821e-05, 3.6227e-05, 2.5077e-04, 1.8612e-05, 3.1038e-05, 2.0164e-04,
        4.1559e-05, 2.0148e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7756e-04, 4.8764e-05, 5.3247e-05, 2.5355e-05, 1.6910e-05, 1.1410e-04,
        2.5644e-04, 5.4684e-05, 2.1751e-05, 3.0760e-05, 2.8516e-04, 3.6662e-05,
        1.0356e-04, 7.0660e-05, 8.5858e-05, 1.1921e-03, 5.0506e-05, 3.6448e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.5518e-04, 6.9191e-05, 7.3606e-05, 3.1084e-05, 3.4737e-05, 1.5132e-04,
        3.8552e-04, 7.6152e-05, 4.5580e-05, 3.1396e-05, 4.1721e-04, 4.6990e-05,
        1.3951e-04, 2.3151e-04, 3.7761e-04, 8.4924e-05, 5.8675e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.3079e-04, 7.0834e-05, 6.0893e-05, 3.7624e-05, 1.6944e-05, 1.8944e-04,
        3.6186e-04, 6.2242e-05, 2.0300e-05, 3.6930e-05, 5.0115e-05, 1.0854e-04,
        8.6934e-05, 2.2719e-05, 5.3759e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.1454e-04, 4.0037e-04, 7.9457e-06, 5.3097e-05, 3.4826e-05, 2.0878e-05,
        1.9421e-05, 5.1460e-04, 2.5663e-05, 2.5367e-05, 5.4351e-04, 1.8148e-05,
        4.3183e-05, 2.4947e-05, 4.6517e-05, 2.4244e-05, 4.2684e-05, 2.8843e-05,
        7.9975e-06, 1.5388e-05, 1.7449e-05, 8.5983e-04, 2.1202e-05, 6.4949e-05,
        6.1021e-05, 1.2354e-05, 2.1060e-05, 7.6016e-06, 2.5463e-05, 1.0236e-05,
        1.4598e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2889e-04, 4.5384e-04, 1.9802e-05, 4.6754e-05, 5.6909e-05, 2.2900e-05,
        1.7339e-05, 5.3976e-04, 2.0711e-05, 2.9549e-05, 4.1954e-04, 4.3885e-05,
        3.1170e-05, 2.9813e-05, 3.6346e-05, 3.0138e-05, 4.0653e-05, 2.4456e-05,
        8.4718e-06, 1.5567e-05, 2.2446e-05, 8.4279e-04, 2.2476e-05, 4.5898e-05,
        4.3790e-05, 2.8504e-05, 2.6073e-04, 2.6881e-05, 4.0598e-05, 2.7992e-05,
        3.8087e-05, 1.4347e-04, 1.3257e-05, 2.4779e-05, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.5274e-04, 4.9217e-04, 4.6909e-06, 4.4808e-05, 6.7374e-05, 1.3865e-05,
        1.8267e-05, 4.1900e-04, 1.7916e-05, 2.7607e-05, 5.2354e-04, 1.7045e-05,
        4.9711e-05, 4.1729e-05, 4.2691e-05, 1.8098e-05, 3.3637e-05, 2.5833e-05,
        1.4235e-05, 1.4371e-05, 2.3907e-05, 6.4210e-04, 2.1244e-05, 1.1842e-05,
        2.5918e-05, 6.0299e-06, 2.1055e-04, 2.0351e-05, 4.0644e-05, 4.5552e-05,
        9.2317e-05, 3.6404e-06, 1.6471e-05, 8.3508e-06, 1.6966e-05, 2.1632e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.2501e-04, 3.8381e-06, 2.4117e-05, 1.5774e-05, 5.0934e-06, 1.0839e-05,
        4.6659e-05, 1.3850e-05, 3.3179e-05, 9.7040e-06, 1.1648e-05, 1.8127e-05,
        3.1950e-05, 1.6147e-05, 1.3921e-04, 8.6320e-06, 9.0260e-06, 1.2858e-05,
        1.7309e-05, 9.9563e-06, 1.4191e-05, 1.6029e-05, 3.8179e-04, 1.3946e-05,
        7.2147e-06, 1.3781e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1001e-04,  1.5332e-05,  3.4061e-05,  3.5546e-05,  1.7718e-05,
         2.1632e-05,  4.0225e-05,  2.4443e-05,  3.9972e-05,  1.1762e-05,
         2.7746e-05,  3.7667e-05,  3.4983e-05,  1.4221e-05, -7.7643e-07,
         3.1406e-05,  4.3874e-05,  2.1284e-05,  3.5743e-05,  1.5223e-05,
         2.7993e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.2956e-04, 1.0721e-05, 3.4400e-05, 2.3379e-05, 6.9779e-06, 2.1445e-05,
        4.7050e-05, 2.0749e-05, 3.6989e-05, 1.2850e-05, 2.3438e-05, 3.0597e-05,
        3.0562e-05, 2.3937e-05, 2.1896e-05, 1.7154e-05, 3.2526e-05, 1.1131e-04,
        3.5191e-05, 1.2334e-05, 2.2499e-05, 8.8167e-05, 1.7699e-04, 3.5777e-05,
        4.0091e-05, 3.2461e-05, 5.8601e-06, 2.2148e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([0.0028, 0.0003, 0.0001, 0.0005, 0.0015, 0.0047, 0.0001, 0.0002, 0.0004,
        0.0001, 0.0002, 0.0111, 0.0002, 0.0020, 0.0003, 0.0063, 0.0002, 0.0003,
        0.0003, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.9913e-03, 2.0903e-04, 7.2020e-05, 1.8797e-04, 1.0209e-03, 4.6006e-03,
        7.2317e-05, 1.1962e-04, 2.2028e-04, 8.2994e-05, 1.3238e-04, 1.8520e-04,
        1.5509e-04, 2.3985e-04, 7.0884e-04, 4.6162e-03, 5.4684e-05, 1.2508e-04,
        1.3555e-04, 1.0516e-04, 1.4116e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6562e-04,  2.5989e-03, -5.9957e-06,  7.1624e-05,  2.6355e-05,
         8.3854e-05,  1.3073e-04,  5.4708e-05,  1.5209e-04,  1.6160e-05,
         2.1900e-04,  8.3403e-05,  7.6327e-04,  4.0498e-03,  5.6265e-05,
         1.0692e-04,  6.5629e-05,  8.4683e-05,  7.8953e-04,  1.5251e-04,
         7.3721e-05,  7.3426e-05,  1.6960e-04,  1.4335e-04,  2.2925e-04,
         3.0977e-04,  4.1241e-05,  8.1569e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9868e-03, 2.8035e-03, 6.0133e-05, 1.3764e-04, 1.1392e-05, 9.8754e-05,
        1.7080e-04, 1.1143e-04, 2.1514e-04, 7.7641e-05, 2.6002e-04, 1.4428e-04,
        7.8510e-04, 7.9456e-03, 1.4506e-04, 2.5978e-04, 1.2742e-04, 1.8144e-04,
        1.2744e-03, 3.2043e-04, 1.7189e-04, 1.3077e-04, 7.3280e-04, 8.3233e-03,
        2.3305e-04, 3.2509e-04, 1.0587e-04, 1.8123e-04, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2544e-03, 2.8605e-03, 7.8817e-05, 5.0505e-05, 6.0195e-05, 7.4907e-05,
        8.5033e-05, 9.0093e-05, 1.3195e-04, 4.8640e-05, 2.1735e-04, 7.9480e-05,
        5.1955e-04, 4.3814e-03, 6.8951e-05, 9.3400e-05, 6.1089e-05, 9.1344e-05,
        7.7328e-04, 1.4430e-04, 8.4911e-05, 9.0316e-04, 7.4670e-04, 1.0281e-04,
        1.2030e-04, 8.6593e-05, 1.4527e-04, 1.0309e-04, 2.9777e-05, 8.6981e-05,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8302e-03, 6.7541e-03, 1.7033e-04, 3.4673e-04, 2.7230e-04, 1.1941e-03,
        3.7620e-04, 1.0060e-03, 7.0687e-03, 1.5731e-04, 2.9837e-04, 3.2085e-04,
        2.3682e-04, 1.8545e-04, 3.9566e-04, 9.1688e-05, 2.3307e-04, 7.0096e-03,
        1.8390e-04, 2.1818e-03, 1.2957e-03, 7.0292e-03, 1.9236e-04, 3.1494e-04,
        2.5345e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7280e-03, 4.3404e-03, 1.4657e-04, 2.6988e-04, 2.3768e-04, 7.8716e-04,
        3.3267e-04, 9.1215e-04, 4.1190e-03, 1.4432e-04, 1.9867e-04, 2.0389e-04,
        2.1148e-04, 1.5184e-04, 2.1737e-04, 4.6139e-05, 1.9568e-04, 4.9587e-03,
        1.7367e-04, 2.8284e-04, 2.9055e-03, 7.1177e-05, 1.7475e-04, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0028, 0.0069, 0.0002, 0.0004, 0.0002, 0.0014, 0.0004, 0.0015, 0.0061,
        0.0002, 0.0003, 0.0002, 0.0003, 0.0002, 0.0004, 0.0001, 0.0003, 0.0087,
        0.0003, 0.0018, 0.0001, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.1637e-06, 2.7821e-03, 2.8256e-05, 1.9019e-04, 1.3237e-04, 1.6953e-04,
        5.7630e-04, 2.8157e-04, 1.1504e-04, 1.3669e-04, 1.6463e-04, 2.3450e-04,
        1.5191e-04, 7.4399e-05, 1.0244e-04, 5.5059e-03, 1.6238e-04, 1.0080e-04,
        1.6115e-04, 3.1461e-04, 2.2778e-04, 1.0806e-04, 1.6868e-04, 5.2397e-04,
        3.8285e-04, 1.3453e-04, 1.7495e-04, 6.0874e-05, 1.4724e-04, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2543e-03, 3.2512e-03, 1.2582e-04, 2.5046e-04, 1.7085e-04, 2.6457e-04,
        7.7216e-04, 3.7610e-04, 2.6227e-04, 2.3317e-04, 2.0646e-04, 4.3306e-04,
        2.8717e-04, 1.0347e-04, 1.8435e-04, 5.5430e-03, 2.7251e-04, 1.2057e-03,
        1.6237e-04, 8.0607e-03, 3.3155e-04, 9.5218e-04, 5.3314e-03, 2.2616e-04,
        1.8100e-05, 1.1150e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.2850e-04, 2.0556e-03, 4.6260e-05, 1.2928e-04, 9.4643e-05, 1.0138e-04,
        5.4392e-04, 1.6870e-04, 8.1018e-05, 1.5533e-04, 8.4871e-05, 1.6079e-04,
        1.2321e-04, 5.4293e-05, 9.1751e-05, 2.4606e-03, 1.2343e-04, 8.8404e-05,
        3.2005e-04, 2.6750e-04, 8.3463e-05, 1.5160e-04, 1.1358e-04, 2.6462e-04,
        2.3997e-04, 1.7268e-04, 1.5058e-04, 2.4686e-04, 1.0856e-04, 3.7874e-05,
        1.0054e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.4876e-03, 2.6827e-03, 2.3979e-05, 8.1088e-05, 5.2915e-05, 1.1429e-04,
        1.1891e-04, 4.4091e-04, 3.9117e-03, 1.3327e-04, 1.1567e-04, 1.4363e-04,
        1.9325e-04, 2.0225e-04, 5.8784e-05, 1.2207e-04, 2.7819e-03, 1.5045e-04,
        1.5897e-04, 1.3881e-04, 2.2756e-04, 3.6692e-05, 4.6553e-05, 1.1164e-04,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-0.0362, -0.1655, -0.0004, -0.0005, -0.0017, -0.0763, -0.0019, -0.0002,
        -0.0019, -0.0439, -0.0011, -0.0144, -0.0024, -0.0777, -0.0016, -0.0006,
        -0.0018,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0488, -0.1799, -0.0006, -0.0017, -0.0022, -0.0775, -0.0024, -0.0009,
        -0.0035, -0.1059, -0.0012, -0.0040, -0.0023, -0.1266, -0.0024, -0.0013,
        -0.0022,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0093, -0.0365, -0.0001, -0.0002, -0.0003, -0.0210, -0.0003, -0.0001,
        -0.0004, -0.0166, -0.0003, -0.0026, -0.0007, -0.0006, -0.0212, -0.0009,
        -0.0006, -0.0002, -0.0004,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0422, -0.1344, -0.0020, -0.0213, -0.0008, -0.0023, -0.0134, -0.0138,
        -0.0011, -0.0005, -0.0015, -0.0796, -0.0015, -0.0134, -0.0068, -0.0741,
        -0.0008, -0.0052, -0.0069,  0.0002, -0.0006, -0.0012,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0420, -0.1792, -0.0018, -0.0256, -0.0007, -0.0040, -0.0067, -0.0166,
        -0.0014, -0.0004, -0.0014, -0.1319, -0.0024, -0.0151, -0.0025, -0.0025,
        -0.0112, -0.0036, -0.0011, -0.0021,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7832e-01, -1.6038e+00, -1.3088e-02, -2.4901e-01, -7.4175e-03,
        -2.7687e-02, -6.0900e-02, -1.3037e-01, -1.0211e-02, -1.2079e-03,
        -1.3720e-02, -1.1531e+00, -1.9115e-02, -1.2469e-01, -1.0519e-02,
        -3.2832e-02, -7.0061e-02, -2.2767e-02, -1.1686e-02, -1.1516e-02,
        -4.7515e-03, -1.5594e-02,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2691e-02, -2.3763e-02, -2.5430e-04, -3.3023e-04, -2.3176e-02,
        -2.2663e-04, -4.0166e-04, -4.7811e-04, -5.7126e-04, -8.6198e-04,
        -5.0137e-05, -3.8015e-04, -1.7391e-02, -1.9173e-04, -3.6763e-03,
        -3.6833e-04, -1.6214e-02, -1.2692e-04, -6.9546e-04, -1.8955e-02,
        -5.1907e-04, -4.3527e-04, -2.0047e-05, -4.9151e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([0.1144, 0.4331, 0.0013, 0.0069, 0.4343, 0.0011, 0.0053, 0.0078, 0.0084,
        0.0128, 0.0020, 0.0044, 0.3657, 0.0043, 0.0695, 0.0014, 0.0153, 0.0322,
        0.0085, 0.0062, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0349, -0.2364, -0.0014, -0.0036, -0.1754, -0.0018, -0.0041, -0.0035,
        -0.0039, -0.0043,  0.0006, -0.0021, -0.0040, -0.0209, -0.0034, -0.0021,
        -0.0027, -0.0004, -0.0022,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3101e-03, -1.1142e-02, -3.2622e-05, -2.1543e-04, -8.9477e-03,
        -1.5953e-04, -1.1241e-04, -8.8808e-03, -6.1748e-05, -6.6009e-05,
        -4.3492e-05, -1.8184e-04, -1.2255e-04, -5.6057e-05, -1.2748e-04,
        -6.8948e-03, -1.3328e-04, -1.0002e-03, -1.6914e-04, -7.5260e-03,
        -2.3905e-05, -2.7680e-04, -4.8124e-03, -2.6601e-04, -1.5924e-04,
        -3.9663e-05, -1.7207e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7778e-03, -2.2014e-02, -1.5407e-04, -3.1962e-04, -1.3858e-02,
        -2.3766e-04, -1.8392e-04, -1.4287e-02, -5.0425e-05, -1.3712e-04,
        -2.6344e-04, -3.0831e-04, -1.8089e-04, -1.4116e-04, -2.0022e-04,
        -9.9363e-03, -1.9953e-04, -1.1972e-03, -2.8091e-04, -1.2140e-02,
        -4.2320e-04, -4.8573e-04, -2.6265e-04, -1.0805e-04, -1.7132e-04,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7173e-03, -1.9273e-02, -1.3770e-04, -4.0328e-04, -1.3436e-02,
        -1.3976e-04, -1.8980e-04, -1.4623e-02, -1.4539e-04, -1.2095e-04,
        -8.9475e-05, -2.2551e-04, -1.9253e-04, -9.6073e-05, -1.2804e-04,
        -6.4344e-03, -1.7645e-04, -1.8234e-04, -7.0604e-03,  1.9332e-05,
        -1.1845e-04, -2.4185e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #600: [tensor([-1.7282e-04, -4.5703e-05, -2.6083e-06, -3.4718e-06, -1.7512e-06,
         1.3789e-06, -6.8280e-07,  1.0853e-06,  1.2364e-06,  1.2185e-06,
        -1.4221e-06, -1.1587e-06,  3.0350e-06, -5.8848e-07, -2.2166e-06,
         5.0598e-06, -3.3777e-06, -2.1364e-06, -4.1570e-05, -1.5068e-05,
        -4.3859e-06, -2.9400e-06,  8.3400e-06,  1.4874e-07, -5.8903e-06,
         6.7499e-07, -1.9272e-06, -4.4476e-06, -4.3071e-07, -2.6537e-06,
        -7.4068e-07,  4.1594e-06,  3.1152e-07, -6.9620e-06,  1.2171e-06,
        -4.5354e-07, -4.6364e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6729e-05, -7.9618e-05, -3.6678e-06, -4.8456e-06, -1.7028e-06,
         2.1431e-06, -1.5783e-06,  9.5809e-07, -1.4282e-06, -3.7294e-06,
        -2.6297e-06, -1.9438e-06,  2.7285e-07, -2.7175e-06, -3.2396e-06,
        -1.0664e-05, -3.7685e-06, -1.0552e-06, -8.8753e-05, -1.8191e-05,
        -2.7547e-06, -5.7993e-06, -2.7363e-06,  1.1116e-06, -1.3711e-05,
        -2.2389e-06, -4.6607e-06, -4.6660e-06,  1.9183e-05,  3.2173e-06,
        -1.6457e-05, -7.4966e-07, -4.4595e-07, -1.9462e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3351e-05, -6.0358e-05, -3.6801e-06, -3.2475e-06, -1.2005e-06,
         1.4115e-06, -1.7245e-06,  3.2622e-06,  2.2944e-07, -8.7230e-08,
        -8.4573e-07, -1.3072e-06,  2.1812e-06, -3.4064e-07, -2.7616e-06,
        -4.4084e-06, -2.9031e-06, -4.0784e-07, -4.8959e-05, -1.1270e-05,
        -4.1091e-06, -2.4820e-06,  8.4023e-06, -6.7245e-07, -5.4036e-06,
         3.0634e-08, -4.1265e-06, -4.8067e-06,  1.9550e-05,  3.0622e-06,
         2.0222e-06,  1.2142e-06, -1.4607e-06, -5.1635e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7226e-05, -4.2081e-05, -3.4218e-06, -4.1982e-06, -2.8469e-06,
         1.2429e-06, -1.9878e-06,  2.3092e-06,  7.0725e-07, -2.5386e-07,
        -1.6548e-06, -1.7842e-06,  1.2238e-06, -1.7842e-06, -1.3976e-06,
         5.4652e-06, -3.9495e-06, -1.9231e-06, -4.4586e-05, -1.6409e-05,
        -2.8236e-06, -2.2628e-06,  3.3791e-06, -1.2723e-06, -6.1098e-06,
        -5.1543e-07, -3.1877e-06, -3.1937e-06,  1.8788e-05,  2.4504e-06,
        -4.1585e-06,  2.4726e-06, -4.6042e-07, -2.8050e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3358e-04, -5.3982e-05, -4.6540e-06, -3.8341e-06, -1.7314e-06,
         2.4862e-06, -2.3084e-06,  2.7400e-06,  3.6418e-07, -1.1398e-07,
        -7.8799e-07, -7.5852e-07,  9.4697e-07,  1.3379e-06, -1.5791e-06,
         1.5121e-06, -1.8478e-06, -2.1805e-06, -6.1646e-05, -6.5206e-06,
        -3.8807e-06, -2.2466e-06,  1.3212e-05, -1.2249e-06, -8.6429e-06,
        -7.2507e-07, -5.2190e-06, -1.4699e-06, -3.6677e-05, -1.2716e-06,
        -6.2024e-08,  1.2547e-06,  1.5385e-06, -1.5836e-06,  2.3464e-06,
         3.8805e-07, -3.3205e-05,  2.0897e-06,  1.7349e-06, -3.7086e-07,
        -6.5343e-07,  7.3334e-07, -4.3497e-07, -7.7783e-08, -7.5372e-07,
         8.6680e-06,  1.3046e-06, -1.6743e-06,  4.1371e-08,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4332e-04, -5.2693e-05, -4.6656e-06, -4.8080e-06, -1.1144e-06,
        -8.1961e-07, -3.0209e-06,  3.5199e-06,  9.7919e-07, -3.8134e-08,
        -9.4718e-07, -1.1270e-06,  2.0229e-06, -1.4161e-06, -4.0441e-06,
         7.3295e-06,  6.1909e-08, -2.6907e-06, -3.5458e-05, -8.7298e-06,
        -3.4500e-06, -3.0038e-07,  6.7809e-06, -9.4873e-07, -6.7321e-06,
         2.0207e-07, -7.1335e-06, -3.5898e-06,  9.0089e-07,  2.2206e-07,
         2.8512e-06,  3.4444e-06,  1.3253e-06, -2.2136e-07, -1.9826e-05,
        -7.8150e-07, -5.4909e-07, -6.5777e-07,  2.9548e-07, -3.6322e-07,
         8.2424e-07,  1.5625e-05,  1.7873e-06,  2.4665e-06, -3.9446e-06,
        -2.8712e-07, -9.5109e-07,  5.9939e-07,  9.0884e-07, -4.0884e-07,
         8.0373e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6828e-04, -4.7906e-05, -5.4838e-06, -5.8865e-06, -1.0094e-06,
         3.2867e-06, -1.3165e-06,  4.4247e-06,  1.0489e-06,  1.5524e-06,
        -5.2759e-07,  1.9463e-07,  2.4706e-06,  3.4291e-07, -2.0732e-06,
        -1.4921e-06, -2.4735e-06, -1.9357e-06, -5.3920e-05, -2.4423e-05,
        -3.8240e-06,  2.3345e-06,  7.1827e-06,  2.6442e-07, -4.4816e-06,
         1.0216e-06, -6.6307e-06, -4.0295e-06,  1.5422e-05,  3.9947e-06,
         4.7453e-06, -2.4434e-06,  2.7187e-07, -2.3336e-07,  5.7844e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1702e-04, -8.5129e-05, -4.1512e-06, -4.9931e-06, -9.9631e-07,
         3.3940e-06, -1.2293e-06,  2.4727e-06, -1.7699e-08,  1.9528e-07,
        -1.3564e-06, -1.9335e-06,  1.8342e-06, -3.9697e-07, -4.6883e-06,
        -6.3367e-06, -3.1200e-06, -3.7206e-06, -8.7021e-05, -2.1076e-05,
        -3.6702e-06, -7.1575e-08, -9.7311e-06, -9.9229e-07, -9.7926e-06,
        -4.2191e-07, -5.2367e-06, -4.3852e-06,  2.0184e-05,  3.2373e-06,
        -5.6532e-06, -6.7872e-07,  3.6841e-06,  2.8438e-06, -8.6995e-07,
        -8.8155e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1446e-04, -8.8494e-05, -5.7965e-06, -4.4490e-06, -1.2206e-06,
         2.2842e-06, -2.9565e-06,  2.1254e-06, -3.4112e-07, -8.8128e-07,
         3.1007e-07, -1.2527e-06, -7.9738e-08, -1.9781e-06, -5.2563e-06,
        -7.5576e-06, -1.6456e-06, -1.5708e-06, -6.0992e-05, -1.2285e-05,
        -4.2377e-06, -7.4371e-07,  1.0758e-05,  2.6945e-07, -1.8231e-06,
         1.3330e-06, -6.6692e-06, -5.8576e-06,  5.5755e-07, -9.0975e-08,
         2.9761e-06,  1.3117e-06,  1.9295e-06, -7.4020e-07, -5.5642e-05,
        -4.6858e-07,  1.8993e-06, -3.7965e-06,  2.8179e-07, -5.0671e-07,
        -2.2245e-06, -1.3123e-06, -6.0696e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6553e-05, -6.6096e-05, -1.6518e-06, -3.4916e-06, -8.6097e-07,
        -5.8390e-07, -9.9362e-07, -1.4940e-06,  2.4353e-07, -6.6595e-08,
        -1.0679e-06, -6.6918e-07,  5.9504e-07, -1.0000e-06, -2.6841e-06,
         2.9036e-06, -2.1209e-06, -2.7613e-06, -6.2916e-05, -1.2140e-05,
        -2.6992e-06, -6.0535e-06,  1.9111e-06,  9.8405e-07, -7.1377e-06,
         9.1300e-07, -3.4796e-06, -3.7795e-06, -5.2362e-05,  5.0407e-07,
        -1.2724e-06,  2.3667e-06, -9.2639e-07, -3.1063e-07,  3.9824e-06,
        -1.5658e-06,  1.1420e-05, -1.0901e-06,  2.2229e-07, -7.1724e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3611e-04, -5.3735e-05, -4.5297e-06, -4.9150e-06, -2.3177e-06,
         8.7489e-07, -1.4669e-06,  1.8096e-06,  1.3166e-06,  1.7169e-06,
        -1.2707e-06, -3.8492e-07,  1.6378e-06, -3.8953e-06, -1.8046e-06,
         1.0365e-05, -2.9987e-06, -1.8907e-06, -6.2238e-05, -1.8880e-05,
        -4.2142e-06, -2.2554e-06,  3.6211e-06, -4.3821e-07, -5.8886e-06,
        -8.1630e-07, -7.8433e-06, -4.3942e-06,  8.3376e-07, -1.3032e-06,
        -7.8515e-07,  2.8459e-07, -7.1609e-06,  3.1960e-06,  2.0590e-06,
         1.9258e-06,  4.0599e-06,  9.8214e-08, -1.2311e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0171e-04, -5.7085e-06, -1.4959e-05, -1.5759e-05, -8.6697e-06,
        -2.3062e-04,  4.2292e-06, -5.1078e-06, -1.2715e-05, -7.6576e-06,
         2.8768e-05, -2.3697e-06, -1.1333e-05, -2.7984e-06, -1.5645e-06,
         2.7043e-05, -1.5070e-06, -1.7163e-06, -2.3138e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #650: [tensor([-1.5893e-04,  1.5707e-05,  4.7233e-06,  5.6856e-06, -8.0060e-05,
         6.2056e-06,  1.0067e-05,  9.6283e-06, -6.7193e-05, -2.9621e-06,
         4.9948e-06, -5.0131e-06, -2.5520e-06,  1.1031e-05, -3.4497e-07,
         2.2835e-05,  2.4043e-06,  7.3455e-06,  1.3313e-06, -2.8578e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6341e-04, -1.7589e-06, -4.6231e-06, -1.2559e-05, -7.0640e-06,
        -3.1819e-05, -3.8883e-05, -1.8678e-06, -5.7200e-06, -1.0081e-05,
        -1.8895e-04, -3.7934e-06, -7.8279e-06, -8.5719e-06, -1.0065e-06,
        -1.1187e-06, -1.6453e-08, -8.7346e-06, -2.7666e-06,  5.0917e-07,
        -2.9388e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9275e-04, -7.7270e-06, -1.7778e-05, -1.5308e-05, -8.8767e-06,
        -5.5019e-05, -3.8737e-06, -2.0618e-06, -8.6679e-06, -1.4617e-05,
        -3.1831e-04, -5.0027e-06, -8.1789e-06, -5.2777e-06,  1.5909e-05,
        -8.8600e-07, -1.5368e-05,  2.3183e-06, -3.0712e-06, -1.6251e-06,
        -1.6890e-06,  4.0691e-07, -6.2596e-07,  3.4962e-06, -2.8575e-06,
        -9.8223e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5296e-05,  3.9224e-06, -5.8526e-06, -3.9369e-06, -8.7122e-06,
        -1.2929e-05,  1.9611e-05, -7.7313e-07,  2.2103e-06, -9.9472e-06,
        -1.6911e-04, -3.2284e-06, -5.1483e-06, -5.6512e-06,  7.6360e-07,
         8.5924e-06, -5.1475e-05,  1.3489e-07,  1.6352e-06, -5.9354e-07,
        -2.0541e-07, -2.2871e-06, -2.1013e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1317e-04, -3.5158e-07, -1.2987e-05, -1.4553e-05, -8.2752e-05,
        -5.3816e-06, -3.2940e-04, -5.3511e-06, -8.5652e-06, -1.4137e-05,
         1.2513e-05, -1.3480e-06, -2.0714e-05, -2.5301e-06, -2.1059e-06,
        -3.2019e-06,  2.0116e-05, -3.9068e-06, -2.6267e-07, -1.3487e-06,
        -2.6269e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5527e-04, -1.3913e-06, -1.7451e-05, -1.3291e-05, -3.6411e-05,
        -5.8214e-06, -1.4231e-04,  2.3126e-07, -9.9033e-06, -3.7032e-06,
         2.7089e-06, -3.7054e-06,  1.7266e-06,  1.0713e-06,  1.0642e-05,
         1.6506e-06,  1.2612e-06,  6.1279e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6207e-05,  1.8201e-06, -6.2292e-06, -8.2578e-06, -3.0692e-05,
        -1.6600e-06, -1.8412e-04,  1.3796e-06, -7.3140e-06, -8.0426e-06,
         2.8107e-06,  1.8803e-07, -1.2502e-05, -2.6253e-06,  1.8182e-06,
         5.3671e-06, -1.1053e-07, -2.6629e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3210e-06, -9.5484e-07, -1.2642e-05, -2.4353e-05, -5.5657e-05,
        -8.2284e-06, -2.6980e-04, -7.0710e-06, -1.1971e-05, -1.6841e-05,
        -6.0017e-06, -1.3163e-05,  7.1143e-07,  2.4539e-06, -4.9155e-06,
         2.2013e-07, -5.6025e-07, -2.2726e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9827e-04,  8.7250e-06, -6.7706e-06, -1.2804e-05, -8.2917e-06,
        -5.9128e-06, -1.1842e-04,  1.2758e-05, -7.0270e-06, -8.0163e-06,
         3.2081e-06,  9.3591e-07, -5.2875e-05,  2.4099e-06, -2.2985e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8198e-04,  6.2368e-06, -2.3867e-05, -2.1495e-05, -3.2012e-05,
        -1.2728e-05, -2.1320e-04,  7.3046e-06, -8.9235e-06, -1.8282e-05,
         5.6360e-06, -1.4156e-06,  6.2427e-08, -5.3289e-06, -1.7104e-06,
        -5.9947e-07, -1.0932e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5948e-04,  2.4268e-06, -8.9803e-06, -6.2694e-06, -1.9234e-05,
        -3.9742e-06, -1.5080e-04,  5.4251e-06, -7.5321e-06, -1.1629e-05,
         2.3677e-05, -1.6515e-06, -7.9193e-06, -3.3966e-06, -7.9259e-05,
         1.4394e-06,  4.0494e-08, -1.2376e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2679e-04, -1.8026e-07, -5.0165e-06, -6.1394e-06, -2.3067e-05,
        -3.9986e-06, -1.6004e-04,  5.8197e-06, -6.7357e-06, -5.7273e-06,
         1.8701e-06, -1.9182e-06, -3.5782e-06, -7.0137e-05,  3.9261e-06,
        -3.2730e-06, -1.6416e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #700: [tensor([-1.4425e-04, -6.1656e-05,  3.2196e-06, -5.6897e-08, -6.7888e-05,
         3.6404e-06, -2.3057e-07,  9.1845e-07, -6.6239e-05, -1.4529e-06,
         3.4888e-07, -3.9838e-06, -4.3376e-06, -3.9975e-05,  5.7889e-06,
        -2.5350e-06, -2.9083e-06, -3.2660e-06,  2.1787e-06,  3.1839e-06,
        -1.9583e-06, -2.6536e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1523e-04, -6.5053e-05,  7.4599e-07, -6.0125e-07, -6.4832e-05,
         5.4999e-06, -1.4770e-06,  5.7811e-07, -4.8478e-05, -3.1986e-06,
        -1.5887e-07, -2.4152e-06, -1.7299e-06, -3.5336e-05,  2.7386e-06,
         6.6512e-07, -5.1404e-06,  2.5529e-06, -1.5330e-06, -1.3616e-06,
        -2.7517e-05, -8.4546e-08,  4.6131e-08,  6.6076e-07,  1.6201e-05,
         5.8505e-07, -2.3455e-06, -3.0571e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0376e-04, -6.7872e-05,  3.1851e-06, -1.2846e-05,  1.2768e-05,
        -5.9409e-05,  7.1456e-06, -2.1843e-06,  7.8149e-06, -5.7225e-05,
         1.7616e-06, -5.6076e-06, -2.1431e-06,  5.5072e-05,  8.1835e-06,
         1.6385e-05,  2.4060e-06, -7.9785e-07,  5.4145e-05,  6.4274e-06,
        -2.0744e-06, -1.3488e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7100e-05, -3.4893e-05,  4.7469e-07, -1.3674e-05,  4.1272e-06,
        -2.7299e-05,  3.1812e-06, -1.5583e-06, -4.6169e-06, -2.5462e-05,
         5.7937e-07, -1.7786e-06, -2.5639e-06,  6.8284e-07,  9.0795e-06,
        -2.4107e-05,  1.1663e-06,  3.2963e-06,  1.7019e-06, -1.4814e-05,
         2.2295e-06, -3.3510e-07, -1.2638e-06, -1.2858e-06, -1.4061e-06,
        -1.6423e-06, -2.1176e-05, -1.7279e-06,  5.6144e-07, -1.3884e-08,
        -1.3277e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0691e-04, -4.4824e-05,  2.1583e-06, -6.8046e-06,  9.2287e-06,
        -3.6781e-05,  3.4843e-06,  6.7486e-07, -6.7617e-06, -3.6737e-05,
         1.0729e-06, -1.9118e-06, -1.4885e-06,  8.7837e-07,  1.4809e-05,
        -3.2816e-05,  4.6148e-06,  1.4825e-07, -1.8212e-05,  4.2209e-06,
         6.5862e-07,  3.5036e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5130e-04, -4.2756e-05,  2.9040e-06, -4.9516e-06,  1.0232e-05,
        -3.3321e-05,  7.0148e-06, -1.1064e-06, -2.0818e-06, -3.3322e-05,
         1.7861e-06, -4.8810e-06, -2.5756e-06,  2.1394e-05,  3.3704e-06,
         1.1325e-05,  8.5184e-07,  4.2992e-06,  2.3162e-07,  3.8864e-06,
        -1.0443e-06, -2.2416e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8426e-04, -5.4001e-05,  3.7202e-06, -1.2128e-06,  7.1633e-06,
        -4.2325e-05,  8.2589e-06,  1.0540e-06, -7.1645e-06, -4.7313e-05,
         1.6950e-06, -2.6062e-06, -2.7007e-06, -2.2114e-05,  6.9367e-06,
        -4.0142e-07,  1.2299e-07, -4.1939e-07,  1.7751e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6542e-04, -5.9978e-05,  1.4847e-06, -4.1879e-06,  7.6414e-06,
        -4.3120e-05,  4.6930e-06, -3.7618e-06,  2.1324e-06, -4.7660e-05,
         7.8838e-07, -3.7328e-06, -1.2396e-06,  1.8637e-06, -7.1198e-06,
         5.9914e-06,  1.9120e-06, -3.7119e-06, -1.3121e-05,  3.9970e-06,
        -1.3540e-06,  2.8540e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9483e-04, -8.2249e-05,  8.5093e-06, -1.8323e-05,  1.7397e-05,
        -4.4044e-05,  5.3355e-06,  2.2929e-07,  1.1823e-05, -5.7599e-05,
         2.4552e-06, -4.8246e-06, -3.6262e-06,  5.1744e-05,  1.2767e-05,
         2.7038e-05,  2.2883e-06,  4.5968e-06,  4.6466e-08,  1.3698e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4126e-04, -5.2320e-05,  2.0387e-06, -5.4540e-06,  2.4410e-06,
        -3.7336e-05,  5.1276e-06, -2.0364e-06, -1.4844e-06, -3.1246e-05,
        -7.1982e-07, -3.8369e-06, -1.4577e-06,  7.3718e-07,  3.5490e-06,
         7.0801e-07, -6.5489e-07, -2.7974e-06, -8.2489e-07,  3.0004e-07,
        -9.7687e-07, -7.2750e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0055e-04, -3.0754e-05,  4.3778e-06, -4.0521e-06,  9.5668e-06,
        -2.5070e-05,  2.8720e-06, -3.8493e-06, -6.8718e-06, -3.7532e-05,
        -1.4027e-07, -2.6611e-06, -3.4668e-07,  1.1962e-06, -7.8716e-06,
         4.0332e-06,  2.9817e-06, -5.1828e-06,  2.5469e-05,  2.2381e-06,
        -1.4959e-06, -1.1912e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2951e-05, -3.8505e-05,  1.8901e-06, -6.0853e-06,  9.9798e-06,
        -3.1844e-05,  6.1247e-06, -1.1596e-06,  3.8723e-06, -3.2567e-05,
         2.8998e-07, -2.9609e-06, -4.1870e-07, -6.3474e-06,  5.2499e-06,
        -3.1155e-07,  7.4710e-06, -1.8106e-06,  1.1673e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #750: [tensor([-4.8990e-05, -7.4921e-05, -2.1932e-06, -5.7920e-06, -7.5196e-05,
        -2.8790e-06,  4.3440e-07,  2.4311e-06,  1.0740e-06,  1.8605e-06,
        -3.9265e-06, -3.1269e-06, -3.1307e-06,  2.4381e-06, -3.5040e-07,
        -4.8327e-08,  4.9638e-07,  1.1990e-06, -6.9048e-06,  2.4387e-06,
         3.3532e-07, -3.2949e-05,  1.1655e-06,  7.3302e-07, -1.5082e-06,
         1.1055e-07,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2175e-04, -1.2251e-04, -3.3680e-06, -6.9702e-06, -9.8550e-05,
        -1.4146e-06, -4.6693e-07,  2.4742e-06,  1.8764e-06, -3.6658e-07,
        -4.6818e-06, -3.6133e-06, -9.6337e-05, -3.7974e-06, -1.4723e-06,
        -3.5804e-06,  2.0794e-06, -7.9137e-06, -4.4318e-05,  1.7955e-06,
        -3.9949e-07, -2.4439e-06, -4.1288e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1246e-05, -5.3900e-05, -6.3821e-06, -5.2789e-06, -5.9317e-05,
        -1.0476e-06,  1.2343e-06,  8.0885e-07,  1.8454e-06, -1.0841e-06,
        -4.2335e-06, -4.3185e-06, -2.4255e-05, -2.5080e-06, -3.3670e-06,
        -5.5390e-07, -4.7478e-07, -3.4470e-05, -1.2032e-06, -6.7477e-06,
         5.7009e-07, -1.5837e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4607e-05, -5.4012e-05, -2.9300e-06, -5.5296e-06, -6.6929e-05,
         7.3736e-07,  1.2275e-06,  2.7680e-06,  7.2139e-07,  1.8524e-06,
        -5.4205e-06, -2.4162e-06, -4.1619e-05, -1.1032e-06, -3.3578e-06,
        -4.4308e-05,  6.6363e-07, -8.6364e-07, -4.4826e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5288e-04, -7.6032e-05, -9.2240e-06, -1.6060e-06, -8.1789e-05,
        -1.7674e-07,  5.1750e-07,  3.9346e-07,  1.4722e-06, -1.3081e-06,
        -2.3556e-06, -9.8336e-07, -7.2597e-05, -4.1479e-06, -5.8540e-06,
        -1.9659e-06, -1.4024e-07,  1.5154e-06,  1.2287e-07, -3.0079e-06,
        -5.6698e-05, -1.2630e-07, -7.4092e-07, -1.2496e-06, -2.4814e-06,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7160e-04, -1.2433e-04, -8.9988e-06, -1.1250e-05, -1.3182e-04,
        -3.2955e-06,  1.1401e-06,  2.7492e-06,  1.2538e-06,  8.2350e-07,
        -5.0092e-06, -2.3636e-06, -9.1818e-05, -1.4961e-06,  1.3170e-06,
        -8.1062e-06, -7.2872e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6510e-05, -1.0325e-04, -4.9348e-06, -9.2768e-06, -1.0661e-04,
        -8.0140e-07, -5.9544e-07,  3.0641e-06, -1.6007e-06,  1.1765e-06,
        -5.5927e-06, -3.6662e-06, -9.2783e-05, -3.3591e-06, -2.8821e-06,
        -5.8609e-06, -1.1116e-05, -3.4177e-06, -6.6491e-06, -3.7216e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4907e-05, -4.6606e-05, -2.0269e-06, -2.2377e-06, -7.0918e-05,
         1.3038e-06,  3.4883e-07,  3.2606e-06,  2.9554e-06,  1.9390e-07,
        -1.3850e-06, -2.5565e-06, -6.0954e-05, -3.0400e-06, -9.0763e-06,
        -1.7327e-06, -1.9332e-05, -1.1020e-06, -1.2001e-06, -1.6177e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5792e-04, -1.1976e-04, -7.0064e-06, -7.6014e-06, -9.2980e-05,
        -8.0060e-07,  1.6487e-06,  4.2706e-06,  7.6665e-06,  4.3238e-06,
        -4.4945e-06, -4.0454e-06, -5.9517e-05, -1.8705e-06,  1.0945e-05,
        -3.1807e-06, -1.5269e-05, -6.7317e-07, -5.3827e-07,  6.4100e-06,
        -1.2025e-06, -2.7185e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0297e-04, -7.1649e-05,  1.6201e-06, -3.4600e-07, -3.9916e-06,
        -1.3624e-05,  4.3624e-06, -1.5895e-06, -2.6510e-06, -5.8134e-06,
        -6.0339e-05,  2.0018e-06,  3.3508e-08, -3.1433e-06, -3.3900e-06,
         2.9953e-05,  4.7284e-07, -2.2036e-07,  1.6236e-06,  2.9532e-06,
        -4.2290e-07, -3.0505e-06, -1.0923e-06, -2.6202e-05, -1.3752e-06,
        -3.3276e-07, -4.2012e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0373e-05, -7.2768e-05,  2.7344e-06, -1.8485e-06, -3.5498e-05,
        -9.0832e-06, -3.4521e-07, -2.2242e-08, -6.0460e-06, -6.8354e-06,
        -1.3206e-04, -7.8771e-07, -6.4983e-07, -4.1075e-06, -4.9439e-06,
         2.9449e-05,  3.4125e-06, -7.7706e-08, -5.1765e-06, -2.4635e-06,
        -5.9807e-06, -5.8700e-07,  2.3538e-05, -2.4823e-06, -2.2857e-06,
        -1.8374e-06,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2477e-04, -1.0261e-04, -9.3331e-07, -8.7722e-08, -2.8461e-05,
        -1.6495e-05,  3.3114e-06, -9.1525e-07, -2.1894e-06, -4.2774e-06,
        -1.2777e-04,  2.6322e-06,  1.3164e-06, -1.6790e-06, -5.0656e-06,
         3.0413e-05, -2.5433e-06, -1.8220e-06,  4.0740e-06, -1.8537e-05,
        -2.5217e-06, -8.0305e-05, -2.6268e-06, -2.9625e-06, -2.1414e-06,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #800: [tensor([-2.2916e-04, -1.4757e-04, -5.6403e-06, -3.7553e-06, -9.4869e-07,
        -8.9550e-06, -2.5511e-05, -1.5464e-04,  2.9871e-06, -4.9130e-06,
        -8.2943e-06, -4.1385e-06, -4.0025e-06, -5.3869e-06,  3.2289e-05,
        -2.9752e-06, -1.1479e-05,  7.0656e-07,  2.0004e-06, -3.1717e-06,
        -4.0811e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0839e-05, -9.3849e-06, -1.3538e-06, -4.3196e-06, -2.3393e-06,
        -1.0206e-05, -9.0029e-06, -3.9033e-06, -1.3871e-05, -6.4551e-06,
        -5.8309e-04, -3.7035e-06, -3.0665e-06, -6.0635e-06, -1.2678e-04,
        -2.1071e-05, -1.7416e-06, -7.4388e-06, -4.2418e-04, -8.7995e-07,
        -1.5592e-06, -2.2744e-06,  1.3863e-07, -4.9261e-06, -5.2210e-06,
        -5.5668e-06, -4.4044e-06, -4.3403e-05, -6.8147e-07, -3.6318e-06,
        -3.1575e-06, -4.1599e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6422e-04, -6.6499e-06, -6.1489e-06, -1.1093e-05, -9.8918e-06,
        -7.7731e-06, -4.4496e-06, -4.3817e-06, -1.5890e-05, -1.3110e-05,
        -3.0563e-04, -2.4899e-06, -5.4037e-06, -7.2232e-06, -1.6045e-04,
        -1.7683e-05, -7.2026e-06, -8.8235e-06, -3.6964e-06, -8.6368e-06,
        -4.0161e-06, -3.1569e-05, -5.0137e-04, -2.1559e-06, -6.4109e-06,
         1.8046e-06, -3.6205e-06, -6.9474e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1045e-05, -1.1069e-06, -6.6174e-06, -6.0735e-06, -3.6498e-06,
        -4.4824e-06, -4.1463e-06, -6.5938e-06, -1.1342e-05, -1.4493e-05,
        -2.7320e-04, -2.7394e-06, -3.0411e-06, -1.0550e-05, -6.2614e-05,
        -1.0176e-05, -3.4318e-06, -4.9684e-06, -1.7585e-06, -6.1559e-07,
        -5.1148e-07, -1.0779e-06, -2.9316e-06, -5.2960e-06,  9.0697e-07,
        -3.6439e-04, -3.2215e-06, -1.2909e-06, -3.9769e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3694e-06, -7.8530e-05, -6.3902e-06, -4.9436e-06, -3.0417e-06,
        -1.8148e-06, -5.9069e-05,  1.1397e-07,  2.2227e-06, -6.5162e-06,
        -5.5894e-06, -2.8269e-06, -1.1036e-06,  6.0778e-06, -3.6197e-06,
        -1.6128e-06, -1.2410e-06, -1.2629e-05, -1.3893e-07, -3.4021e-06,
        -4.2427e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5827e-04, -1.2674e-04, -2.8042e-06, -6.3724e-06, -3.4588e-06,
        -1.7209e-07, -9.8490e-05, -6.3364e-07,  2.5396e-06, -6.9748e-06,
        -5.6734e-06, -3.0379e-05, -2.3843e-06,  1.3684e-06, -1.1199e-05,
         3.6687e-05,  2.7728e-06, -3.6585e-06, -4.3765e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1229e-05, -8.4136e-05, -3.2375e-06, -5.2638e-06, -3.3904e-06,
        -2.0586e-06, -8.0971e-05, -1.1865e-06,  3.3294e-06, -3.4519e-06,
        -4.3141e-06, -7.2594e-05, -5.7267e-06, -2.8556e-06, -5.9239e-05,
        -2.2874e-06, -3.1012e-07, -7.4527e-05,  1.4807e-06,  2.6778e-06,
        -5.5360e-07, -3.4248e-06, -2.6624e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6390e-04, -1.1761e-05, -8.4004e-06, -1.3210e-04, -5.0199e-05,
        -1.7102e-05, -3.6105e-05, -8.1358e-06, -9.3031e-06, -7.5652e-06,
        -2.0807e-05, -1.7498e-05, -4.0966e-05, -2.1971e-03, -6.3298e-06,
        -1.6997e-05, -2.4952e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1628e-04, -1.9427e-05,  4.5181e-05, -2.8533e-04, -1.1355e-04,
        -2.1346e-05, -1.1588e-04, -1.9824e-05, -4.4883e-05, -1.0259e-05,
        -4.0982e-05, -8.2228e-05, -8.6072e-05, -7.5679e-05, -1.4099e-04,
        -1.1134e-02, -4.5589e-05, -4.6494e-05, -1.0016e-04, -3.5309e-05,
        -4.5183e-05, -3.9967e-05, -6.4567e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9044e-03, -2.0745e-04,  1.2885e-04, -1.5253e-03, -4.5492e-04,
        -3.5833e-04, -7.3371e-04, -2.4140e-04, -4.8141e-05,  5.4288e-05,
        -2.4851e-04, -3.6660e-02, -2.4270e-04, -2.4799e-04, -4.9548e-04,
        -1.8290e-04, -2.4506e-04, -1.5455e-04, -4.1869e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9676e-04, -2.6604e-04, -5.5023e-06, -3.1015e-06,  2.1689e-06,
        -2.9284e-06, -9.4812e-06,  1.7502e-06, -3.8066e-06,  6.9365e-06,
         1.6343e-06, -8.0243e-06, -1.1254e-05, -1.6661e-04,  1.3539e-07,
         1.8283e-06, -2.1553e-07, -1.5920e-06, -2.1123e-06,  1.4947e-05,
        -3.8974e-06, -2.5343e-06, -1.7946e-06, -5.1349e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4049e-04, -3.1601e-04, -2.3893e-06, -4.2803e-06, -9.5956e-07,
        -4.2112e-06, -8.8681e-06, -1.2787e-05, -2.1571e-04, -9.4024e-06,
        -5.8976e-06, -5.8448e-06, -1.0631e-05, -5.8989e-04, -1.4341e-06,
        -4.1525e-06, -1.0716e-06, -1.5215e-06, -5.4289e-06, -8.9528e-06,
        -4.2477e-06, -4.5504e-06, -2.4146e-06, -2.6874e-06, -3.6116e-06,
        -6.0957e-07, -4.9987e-06, -3.5660e-06, -5.2575e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #850: [tensor([-9.0597e-03, -9.1759e-04, -4.2512e-03, -1.5464e-03, -2.0346e-03,
        -2.9586e-04, -1.0716e-04,  2.9803e-04, -8.6495e-03, -1.7428e-03,
        -1.5249e-03,  6.6071e-05, -3.0978e-04, -4.6431e-03, -6.5787e-04,
        -4.5364e-04, -4.3846e-04, -4.7260e-04, -3.9600e-03, -4.2701e-04,
        -1.2214e-04, -1.9408e-03, -4.8396e-04, -6.2281e-04, -2.0564e-04,
        -9.2402e-03, -1.6114e-04, -3.6462e-04, -6.3221e-03, -6.4853e-04,
        -2.0005e-04, -5.1989e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0185, -0.0036, -0.0025, -0.0009, -0.0049, -0.0005, -0.0002,  0.0004,
        -0.0153, -0.0024, -0.0022,  0.0004, -0.0012, -0.0109, -0.0018, -0.0009,
        -0.0012, -0.0042, -0.0008, -0.0351, -0.0013, -0.0010, -0.0003, -0.0004,
        -0.0012,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0573e-04, -9.5747e-06, -4.3941e-06, -1.3943e-05, -5.3212e-05,
        -2.5445e-04, -6.2096e-07, -5.9299e-06, -6.7606e-06, -3.2892e-05,
        -5.7575e-06,  3.4646e-07, -6.1934e-06, -3.0935e-06, -7.2681e-06,
        -4.7694e-04, -9.3059e-06, -2.0944e-05, -4.7028e-04, -4.6805e-06,
        -6.8399e-06, -9.4963e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5216e-05, -1.4263e-05, -3.2677e-05, -1.4928e-05, -1.1930e-04,
        -5.0070e-04, -2.6072e-06, -1.6141e-05, -7.8043e-06, -4.0235e-05,
        -1.2940e-05, -6.8588e-06, -5.8341e-06, -6.3487e-06, -1.2787e-05,
        -1.5351e-05, -5.6401e-06, -5.8840e-06, -2.3695e-06, -9.6918e-06,
         1.2848e-06, -1.3304e-06, -3.2146e-06, -7.0938e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1631e-04, -1.3882e-05, -1.1401e-05, -1.2373e-05, -2.7834e-05,
        -2.2862e-04, -3.0338e-06, -6.0298e-06, -4.5402e-06, -2.0223e-05,
        -6.0794e-06,  8.9962e-08, -6.9573e-06, -8.4871e-06, -7.3193e-06,
        -3.8527e-04, -3.5286e-06, -4.0631e-06, -1.2409e-05, -2.9625e-04,
        -6.4186e-07, -5.1286e-06, -6.0981e-06, -9.7857e-06, -6.4383e-06,
         4.3365e-07, -4.5558e-06, -2.5882e-06, -4.3122e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0917e-05, -7.6191e-05, -3.3468e-06, -6.6060e-05,  2.1536e-07,
        -3.5683e-07, -1.7470e-06, -8.9258e-05, -6.9975e-07, -3.9576e-06,
        -4.7305e-05, -1.1437e-04, -1.8751e-07, -5.4478e-06, -3.5236e-06,
         1.7370e-05,  1.4970e-06, -2.5617e-06, -8.1425e-06, -2.5139e-06,
         1.9253e-06, -1.6866e-07, -2.1751e-06, -4.4449e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3672e-05, -4.1352e-05, -3.3604e-07, -4.0800e-05, -4.4865e-07,
        -1.3044e-06,  8.4578e-07, -7.5135e-05,  2.7510e-07, -3.4611e-06,
        -1.2673e-05, -6.1711e-05,  2.4070e-07, -3.2768e-06, -2.9028e-06,
         1.8657e-05,  4.1878e-08, -1.3515e-05,  7.1967e-07, -7.0761e-05,
         8.9163e-07, -9.7731e-07, -7.5836e-09, -2.3773e-06, -5.8405e-05,
        -2.2556e-07, -2.5061e-06, -1.8378e-06, -2.9863e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3292e-04, -1.2404e-04, -5.4655e-06, -6.2577e-05, -5.8648e-07,
        -2.5530e-06, -4.8813e-06, -2.0039e-04,  1.0768e-06, -8.2746e-06,
        -4.5501e-05, -1.9537e-04, -2.4214e-06, -5.8233e-06, -4.7546e-06,
         2.8546e-05,  7.6022e-07, -1.3946e-05, -3.7203e-06, -7.3056e-06,
        -4.6301e-06,  3.9575e-06, -3.6957e-06, -1.1735e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8846e-05, -5.2914e-05, -2.4805e-06, -7.9345e-05, -1.3474e-06,
        -1.4940e-04,  2.2440e-06, -1.9023e-06, -9.5371e-06, -1.4162e-04,
         1.6808e-06, -2.7154e-06, -4.4702e-06, -3.6423e-06,  3.4858e-05,
        -3.4885e-06, -2.2859e-05,  4.6544e-06, -3.1568e-06, -2.3108e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2245e-05, -7.4239e-05, -1.3823e-06, -1.4089e-04,  9.9422e-07,
        -1.2326e-04,  2.7100e-06, -1.8621e-06, -4.1599e-06, -1.5056e-04,
        -3.8537e-06, -3.8614e-06, -5.8660e-06, -1.0477e-05,  2.1322e-05,
        -4.4860e-06, -3.2807e-05,  4.7822e-07, -4.7303e-06, -8.5157e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3889e-05, -1.0613e-04, -2.2097e-06, -1.1746e-04, -6.3187e-06,
        -1.2265e-04,  8.1290e-07, -3.5980e-06, -2.7628e-06, -1.6827e-04,
        -1.9531e-06, -4.2523e-06, -4.2848e-06, -3.7262e-06, -3.3677e-05,
        -1.6751e-06, -1.7041e-05,  3.4914e-07, -3.4619e-06, -1.5532e-06,
        -2.5098e-06, -4.0143e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4882e-05, -1.8137e-05, -1.0839e-04, -1.2246e-05, -8.6877e-06,
        -3.8222e-04, -5.2387e-06, -8.3831e-06, -4.4198e-06, -7.8551e-06,
        -8.4693e-06, -6.8121e-06, -2.5560e-05, -5.8216e-04, -3.8626e-06,
        -5.3755e-06, -6.7506e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #900: [tensor([-7.5204e-06, -3.0764e-04, -3.8102e-05, -1.4666e-05, -1.6569e-06,
        -2.2756e-06,  1.8268e-06,  1.0149e-06,  1.9175e-06, -3.8629e-04,
        -5.6450e-06, -1.2703e-05, -1.7224e-05,  1.2308e-05, -5.8298e-06,
        -8.6741e-06, -2.0770e-06, -4.7104e-06, -7.8385e-06, -5.8317e-06,
        -1.1126e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1739e-04, -4.5539e-04, -3.0177e-05, -1.3696e-05, -2.5713e-05,
         4.5997e-07,  1.5095e-06, -4.9176e-06, -1.0225e-07, -5.1980e-04,
        -9.8952e-06, -2.7723e-06, -8.2977e-06, -1.8727e-05, -1.0971e-05,
        -1.4432e-05, -2.0363e-06,  2.1926e-06, -3.6202e-05,  5.2198e-06,
        -2.3784e-06, -6.7204e-06, -1.3607e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3143e-04, -2.1626e-05, -2.9644e-04, -3.3256e-04, -3.6565e-05,
        -2.1653e-03, -1.2713e-04, -1.5571e-05, -1.3478e-05, -5.4286e-04,
        -1.6920e-05, -7.4494e-06, -2.4559e-05, -1.3241e-05, -5.2604e-05,
        -7.1001e-05, -2.6130e-05, -1.5944e-05, -1.8227e-05, -2.5996e-04,
        -1.5502e-05, -6.3921e-06, -2.2587e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9356e-05, -9.9956e-06, -2.1071e-04, -9.8594e-05, -8.2120e-06,
        -8.7718e-04, -1.1558e-04, -1.7496e-05,  1.0300e-06, -5.2587e-04,
        -6.1807e-06, -2.8044e-06, -1.5424e-05, -4.0912e-05, -6.0591e-05,
        -2.7610e-05, -3.3240e-05, -3.8358e-05, -1.1464e-03, -1.0336e-05,
        -5.2846e-06, -2.2567e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1179e-04, -1.3108e-05, -1.0884e-04, -5.8717e-05, -1.4549e-05,
        -5.9349e-04, -2.5928e-05, -1.3600e-05, -1.1866e-05, -2.4955e-04,
        -1.0963e-06, -5.9739e-06, -1.3072e-05, -1.5257e-05, -2.9675e-05,
        -9.9646e-06, -4.9215e-06, -1.1846e-03, -1.8282e-05, -3.1595e-06,
        -1.4681e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8408e-04, -2.7226e-04, -8.5465e-06,  4.4571e-07, -7.0354e-06,
        -3.5114e-05, -5.6962e-06, -1.4527e-05, -2.4141e-06, -3.7416e-04,
        -5.3320e-06, -4.1355e-06, -1.5619e-05, -7.4375e-04, -8.9653e-06,
        -5.0757e-05, -9.9344e-06, -2.5919e-06, -7.8730e-06, -1.1129e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9143e-04, -2.7838e-04, -1.9289e-07, -1.9745e-06, -6.5677e-06,
        -2.1379e-05, -1.1875e-05, -1.6004e-05,  3.7396e-07, -2.7628e-04,
        -2.5849e-06, -8.6186e-06, -1.6276e-05, -7.3298e-04, -2.2349e-06,
        -5.1898e-05, -6.2788e-06, -1.0110e-05, -9.4390e-06, -1.1722e-05,
        -6.7460e-06, -9.7227e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2904e-05, -2.4975e-04, -9.4694e-06, -2.8789e-06, -5.9081e-06,
        -9.9177e-06, -5.9933e-06, -1.3673e-05, -4.6665e-06, -2.6285e-04,
        -1.4760e-06, -3.6467e-06, -1.1171e-05, -7.2261e-04, -4.2196e-06,
        -7.4806e-05, -8.0688e-06, -7.4678e-06, -9.2956e-06, -3.4359e-06,
        -9.6437e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1354e-04, -8.4058e-04, -6.8960e-05, -6.0768e-06, -1.0596e-04,
        -1.6796e-04, -6.1298e-05, -1.0599e-06, -1.6327e-03, -3.6151e-05,
        -1.9656e-05, -7.6812e-06, -2.8141e-05, -3.9113e-05, -8.8100e-05,
        -4.8197e-05, -9.0672e-06, -5.9216e-05, -9.6567e-05, -6.3177e-05,
        -7.7438e-06, -2.3943e-03, -2.8826e-05, -3.8940e-05, -1.1313e-05,
        -4.5950e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1299e-04, -4.9007e-04, -5.9360e-05, -1.8315e-06, -3.4425e-04,
        -2.2895e-04, -5.3078e-05, -6.0951e-06, -2.6299e-03, -4.5211e-05,
        -2.8772e-05, -5.0973e-06, -3.4819e-05, -8.5394e-05, -9.1085e-05,
        -2.9972e-05, -2.1353e-05, -7.9512e-05, -7.4002e-05, -7.7695e-06,
        -1.1889e-05, -3.0807e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8924e-04, -3.2251e-04, -6.4507e-05, -6.4789e-06, -5.2731e-05,
        -2.6669e-04, -5.7924e-05,  3.6077e-06, -1.0508e-03, -1.3464e-05,
        -5.2173e-06, -1.8340e-05, -2.3012e-05, -3.9302e-05, -7.0363e-05,
        -2.1118e-05, -4.7185e-06, -1.8100e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9623e-04, -3.5924e-05, -4.4496e-04, -1.6591e-05, -7.4384e-06,
        -1.7639e-03, -4.2159e-06, -1.5845e-05, -3.2193e-05, -1.3419e-05,
        -5.5814e-05, -6.6742e-05, -4.9549e-05, -1.6632e-05, -2.4562e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #950: [tensor([ 1.2190e-02, -3.4435e-02, -5.3919e-04, -3.1431e-04,  2.2964e-07,
        -7.1870e-02, -3.4692e-04, -7.7528e-04, -8.6549e-04, -3.7907e-04,
        -7.9436e-04, -3.7471e-04, -5.1080e-04, -7.6365e-04, -9.2303e-04,
        -1.2409e-03, -9.9360e-05, -6.1129e-04, -6.6948e-04, -2.4278e-03,
        -5.4686e-04, -4.1552e-04, -4.4299e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0294e-02, -1.9622e-02, -2.8537e-04, -3.2210e-04, -6.9205e-05,
        -3.1105e-02, -1.3242e-04, -4.0541e-04, -4.8592e-04, -3.7945e-04,
        -7.1113e-04, -3.8135e-04, -2.3990e-04, -2.9588e-04, -4.6173e-04,
        -5.7788e-04,  3.2291e-05, -3.0355e-04, -1.1051e-03, -4.6007e-04,
        -2.0901e-04, -2.3309e-02, -3.7325e-04, -3.8021e-04, -7.9140e-05,
        -4.1878e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0475,  0.1294,  0.0069,  0.0002,  0.0003,  0.0013,  0.0029,  0.0009,
         0.0018,  0.0149,  0.0010,  0.0009,  0.0007,  0.0076,  0.0007,  0.0009,
         0.0020,  0.0014,  0.0024,  0.0008,  0.0014,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0114,  0.1992,  0.0038, -0.0004,  0.0011,  0.0007,  0.0030,  0.0015,
         0.0019,  0.0102,  0.0013,  0.0015,  0.0016,  0.0062,  0.0010,  0.0006,
         0.1111,  0.0014,  0.0028,  0.0017,  0.0028,  0.0004,  0.0018,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1201e-01, -7.6734e-01, -1.5623e-02, -2.3118e-04, -3.2425e-03,
        -5.9595e-03, -1.7479e-02, -4.9758e-03, -1.0444e-02, -5.1663e-02,
        -6.5628e-03, -6.1015e-03, -2.3165e-03, -2.5554e-02, -2.2316e-03,
        -3.8897e-03, -9.6611e-03, -7.7242e-03, -1.4681e-02, -4.2440e-03,
        -6.7035e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.2435e-04, 5.0304e-05, 1.0063e-04, 9.2529e-05, 3.0038e-05, 9.1845e-05,
        6.5034e-06, 1.4504e-05, 1.8987e-05, 1.5594e-05, 4.5781e-05, 6.5039e-05,
        3.2883e-05, 9.7878e-06, 1.6778e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.1522e-04, 4.1178e-05, 2.3572e-04, 8.0830e-05, 3.2828e-05, 1.2247e-04,
        4.6094e-06, 1.1826e-05, 2.0844e-05, 4.9471e-05, 5.1944e-05, 3.6600e-05,
        3.7981e-05, 1.0100e-05, 2.6490e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0207e-04, 3.5529e-05, 1.1021e-04, 7.5847e-05, 2.8659e-05, 6.7191e-05,
        3.8912e-06, 1.8268e-05, 1.2659e-04, 3.2484e-05, 8.4736e-05, 3.2700e-05,
        2.2642e-05, 9.7994e-05, 3.0392e-05, 1.0018e-05, 1.5939e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1030e-02, -3.6483e-01, -4.0532e-04, -9.2279e-03, -4.2186e-03,
        -2.7806e-02, -2.8725e-03, -3.9234e-03, -1.1081e-03,  5.4465e-05,
        -2.7624e-03, -6.3065e-02, -3.4000e-03, -7.3095e-03, -2.1194e-03,
        -5.5457e-03, -2.0675e-03, -3.9436e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2245e-03,  4.0157e-02,  1.8384e-04,  8.3540e-04,  2.6268e-04,
         1.9958e-03,  5.2734e-04,  4.4480e-04,  8.8032e-05,  2.7684e-06,
         3.2135e-04,  1.3093e-02,  5.7875e-04,  6.9101e-04,  4.3490e-04,
         5.3274e-04,  4.6363e-04,  6.6866e-04,  5.0752e-04,  4.1440e-04,
         3.7759e-04,  8.5536e-04,  2.2779e-02,  4.6153e-04,  1.5338e-04,
         3.9923e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8430e-02,  1.9958e-01,  8.2837e-04,  4.5925e-03,  2.1394e-03,
         5.4408e-03,  2.2404e-03,  2.6901e-03,  9.4702e-04, -5.5961e-07,
         1.1961e-03,  1.0861e-01,  3.6720e-03,  3.4600e-03,  2.0239e-03,
         2.6073e-03,  6.5493e-03,  2.3247e-03,  1.2274e-03,  2.5293e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-0.0072, -0.0028, -0.0014, -0.0022, -0.0016, -0.0011, -0.2114, -0.0008,
        -0.0015, -0.0227, -0.0009,  0.0003, -0.0015, -0.0993, -0.0013, -0.0014,
        -0.0041, -0.0085, -0.0018, -0.0032, -0.0022, -0.0004, -0.0018,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1000: [tensor([2.8623e-05, 4.3542e-06, 4.5969e-07, 1.2482e-06, 4.8598e-07, 1.5670e-06,
        4.8665e-07, 4.7455e-07, 6.2676e-07, 3.5831e-07, 1.6890e-06, 3.5586e-06,
        2.6103e-07, 1.9305e-07, 4.2796e-07, 1.2907e-05, 8.9772e-07, 1.3251e-06,
        1.0241e-06, 3.3940e-06, 1.0921e-05, 3.6265e-07, 8.1413e-07, 3.3135e-07,
        6.3221e-07, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.9786e-05, 7.5963e-06, 6.7538e-07, 1.3379e-06, 7.4782e-07, 1.8835e-06,
        5.5593e-07, 6.1428e-07, 1.1846e-06, 5.2153e-07, 2.0993e-06, 6.1736e-06,
        2.8949e-07, 1.1017e-07, 5.3448e-07, 9.9433e-06, 2.3716e-06, 1.0792e-06,
        6.7421e-07, 3.1683e-07, 3.5357e-06, 1.2826e-06, 8.2289e-07, 5.4703e-07,
        2.3188e-07, 2.4642e-07, 6.9729e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5034e-05, 7.1958e-06, 6.1497e-07, 1.6093e-06, 9.9318e-07, 1.8224e-06,
        9.9545e-07, 1.0551e-06, 1.1660e-06, 8.8996e-07, 2.3945e-06, 5.3662e-06,
        3.4474e-07, 3.2225e-07, 5.2515e-07, 2.6726e-05, 2.5699e-06, 1.3073e-06,
        7.7568e-07, 1.2890e-06, 1.6256e-06, 1.8906e-05, 2.3552e-06, 7.9152e-07,
        1.2003e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.8390e-05, 8.3979e-06, 1.0928e-06, 2.0874e-06, 1.2877e-06, 1.1036e-06,
        4.8377e-06, 5.6445e-06, 4.9354e-07, 1.2948e-06, 6.3474e-06, 8.3621e-07,
        9.6384e-08, 8.3000e-07, 3.5937e-06, 6.2132e-06, 2.6208e-05, 2.4319e-06,
        1.3392e-06, 1.1303e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.3375e-05, 9.2132e-06, 5.9357e-07, 1.1054e-06, 1.0612e-06, 9.2692e-07,
        3.0551e-06, 4.4575e-06, 2.7859e-07, 1.0175e-06, 6.0440e-06, 1.2102e-06,
        4.7270e-07, 5.5752e-07, 2.1417e-06, 5.3442e-06, 5.9515e-07, 1.2829e-06,
        6.0005e-07, 2.4515e-06, 3.5915e-07, 7.4161e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5619e-05, 5.9533e-06, 5.0734e-07, 1.5248e-06, 7.4689e-07, 6.1568e-07,
        2.4602e-06, 3.2834e-06, 3.6957e-07, 8.1080e-07, 4.2434e-06, 7.4625e-07,
        2.4869e-07, 4.2735e-07, 1.1526e-06, 4.3935e-06, 9.6338e-07, 4.8312e-06,
        9.9451e-07, 1.3153e-06, 3.7434e-07, 6.3925e-07, 6.3699e-07, 1.1121e-06,
        4.7858e-07, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5410e-05, 4.5196e-06, 8.1924e-07, 5.7549e-07, 1.0320e-06, 5.0442e-07,
        2.4619e-07, 5.7736e-07, 4.9152e-07, 4.4717e-07, 4.5223e-06, 5.0408e-07,
        4.8690e-07, 4.5815e-07, 2.4330e-05, 1.7143e-06, 1.1111e-06, 1.2386e-06,
        1.6602e-06, 1.0404e-06, 9.6133e-07, 3.4599e-07, 9.9056e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.0142e-05, 7.2765e-06, 5.0848e-07, 1.0769e-06, 1.2493e-06, 5.6401e-07,
        3.8429e-07, 6.6735e-07, 6.6280e-07, 4.8359e-07, 4.9105e-06, 3.5422e-07,
        8.9017e-07, 5.2090e-07, 1.3298e-05, 1.0797e-06, 1.5597e-06, 1.4642e-06,
        9.4505e-07, 5.4945e-07, 1.1494e-05, 3.7777e-07, 1.1102e-06, 8.7825e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.9548e-05, 5.2100e-06, 9.3703e-07, 6.6600e-07, 1.2208e-06, 4.7516e-07,
        3.4860e-07, 4.9003e-07, 6.5038e-07, 2.9185e-07, 4.6571e-06, 3.1767e-07,
        2.9272e-07, 4.7146e-07, 1.1298e-05, 1.0208e-06, 1.3321e-06, 1.2863e-06,
        2.9422e-06, 6.2832e-07, 2.7858e-06, 1.4187e-05, 9.6781e-07, 7.9349e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5337e-05, 5.1401e-06, 4.1794e-07, 1.1665e-07, 1.0756e-06, 5.5690e-07,
        1.2938e-06, 1.8414e-06, 8.5089e-07, 6.5021e-07, 7.2352e-07, 4.8980e-07,
        7.8854e-06, 3.9493e-07, 1.0615e-07, 6.1198e-07, 2.1703e-05, 5.9444e-07,
        3.7651e-06, 1.0725e-06, 1.0977e-05, 1.2450e-06, 4.2515e-07, 7.4849e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5657e-05, 7.3903e-06, 5.2656e-07, 2.0453e-07, 9.7161e-07, 5.7725e-07,
        8.4491e-07, 2.1033e-06, 9.0912e-07, 7.0977e-07, 4.6589e-07, 6.8113e-07,
        5.9656e-06, 2.9729e-07, 1.1208e-07, 8.1658e-07, 2.2755e-05, 1.1173e-06,
        1.5298e-06, 5.3013e-06, 4.0351e-07, 1.0992e-05, 7.2719e-07, 5.7539e-07,
        1.1311e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6363e-05, 5.1175e-06, 3.8237e-07, 4.5208e-07, 8.4262e-07, 5.0106e-07,
        7.5323e-07, 2.3110e-06, 5.6552e-07, 4.4556e-07, 5.4941e-07, 7.0785e-07,
        6.0064e-06, 3.6644e-07, 1.4151e-07, 5.2039e-07, 2.0045e-05, 9.3555e-07,
        5.4961e-06, 1.3396e-06, 3.3169e-06, 6.2439e-07, 3.5587e-07, 7.0183e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1050: [tensor([1.6562e-05, 4.3720e-06, 5.0874e-07, 6.5237e-07, 1.5096e-07, 3.6793e-07,
        1.0004e-06, 2.4321e-07, 1.7389e-06, 1.5591e-06, 3.0150e-07, 2.8200e-07,
        5.1166e-06, 1.1194e-06, 9.0326e-07, 2.5935e-07, 7.7469e-07, 7.2016e-07,
        7.0587e-07, 6.1282e-07, 5.6158e-07, 1.3994e-07, 3.9362e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4811e-05, 3.4787e-06, 5.8299e-07, 4.2973e-07, 7.1054e-07, 1.9068e-06,
        2.7254e-07, 8.0825e-07, 5.7621e-06, 4.7751e-07, 1.0168e-07, 9.4663e-07,
        2.1778e-06, 3.1438e-07, 1.1342e-07, 2.8361e-07, 5.6045e-06, 3.7007e-07,
        3.6366e-06, 3.0340e-07, 1.1035e-06, 3.5063e-06, 4.2737e-06, 7.1780e-07,
        2.4049e-07, 5.9257e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7674e-05, 6.5451e-06, 1.0230e-06, 6.6142e-07, 1.1716e-06, 2.9368e-06,
        5.4632e-07, 1.0539e-06, 9.1359e-06, 5.1806e-07, 4.7934e-07, 9.3921e-07,
        3.0717e-06, 4.2538e-07, 1.1154e-07, 6.5825e-07, 5.3474e-06, 1.4072e-06,
        5.7561e-06, 8.2666e-07, 2.7588e-06, 1.0014e-06, 6.0511e-06, 6.2164e-07,
        7.0428e-07, 1.0004e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.4605e-05, 4.6731e-06, 3.9734e-07, 5.4327e-07, 4.5609e-07, 1.4050e-06,
        2.4635e-07, 7.7204e-07, 4.1008e-06, 1.2627e-07, 2.3856e-07, 7.8506e-07,
        1.4939e-06, 6.6768e-08, 6.0697e-08, 3.6453e-07, 1.3927e-05, 7.7997e-07,
        3.8286e-06, 9.4203e-07, 2.8144e-06, 6.9344e-07, 2.8017e-07, 4.7956e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7133e-05, 2.6828e-06, 5.1700e-07, 3.7885e-07, 3.7238e-06, 1.6636e-07,
        5.1053e-07, 2.8047e-07, 2.9565e-07, 5.1525e-07, 1.8906e-06, 3.3695e-07,
        5.6203e-07, 2.7698e-06, 2.3566e-07, 6.6252e-07, 3.2219e-06, 2.7577e-06,
        4.5521e-07, 5.7861e-07, 7.1092e-08, 3.1095e-07, 1.5116e-05, 4.9652e-07,
        1.2426e-06, 6.1468e-07, 9.2413e-07, 8.9234e-06, 5.8336e-07, 2.7592e-07,
        4.5570e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7790e-05, 2.6463e-06, 4.5398e-07, 4.5563e-07, 5.1066e-06, 1.1160e-07,
        6.1592e-07, 3.8484e-07, 3.2446e-07, 7.3713e-07, 1.9145e-06, 4.3851e-07,
        5.4064e-07, 3.2383e-06, 2.9380e-07, 9.3495e-07, 3.0039e-06, 5.5473e-06,
        3.5700e-07, 6.2120e-07, 1.9600e-08, 4.7229e-07, 1.3733e-05, 8.4650e-07,
        5.9112e-07, 5.4016e-06, 5.8413e-07, 5.8417e-07, 9.1059e-07, 5.6916e-06,
        5.4158e-07, 2.7162e-07, 6.6796e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0587e-05, 5.4874e-06, 7.0962e-07, 1.0580e-06, 6.1619e-06, 4.2036e-07,
        8.0208e-07, 5.6990e-07, 4.3699e-07, 1.3563e-06, 2.7458e-06, 6.4333e-07,
        8.6313e-07, 6.1480e-06, 4.4338e-07, 1.0064e-06, 4.5211e-06, 5.8604e-06,
        5.6719e-07, 1.0717e-06, 1.5914e-07, 5.8004e-07, 1.8875e-05, 1.4391e-06,
        1.1017e-06, 1.1255e-05, 9.4772e-07, 6.1819e-07, 9.1221e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6215e-05, 3.6786e-06, 9.1460e-07, 1.7104e-07, 2.7413e-07, 1.7169e-07,
        3.7153e-07, 2.8141e-07, 7.0450e-07, 1.6262e-07, 1.8708e-06, 7.4226e-07,
        3.7034e-07, 2.2667e-06, 4.3287e-07, 1.0411e-06, 8.3362e-07, 3.4858e-07,
        4.9728e-07, 4.0988e-07, 1.4313e-07, 2.8919e-07, 3.2066e-06, 8.1795e-07,
        3.7144e-07, 5.1519e-07, 7.1883e-06, 7.1926e-07, 9.5167e-07, 2.5559e-07,
        5.8215e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6669e-05, 3.6185e-06, 7.8690e-07, 1.5007e-07, 4.7996e-07, 1.7576e-07,
        2.9679e-07, 3.4519e-07, 7.8652e-07, 1.1606e-07, 1.4986e-06, 7.5464e-07,
        3.5709e-07, 2.3747e-06, 4.1142e-07, 1.0229e-06, 9.9449e-07, 3.8105e-07,
        5.4705e-07, 5.5667e-07, 2.3898e-07, 3.1977e-07, 8.2889e-06, 8.3322e-07,
        1.2942e-06, 7.7968e-06, 8.3650e-07, 9.2721e-07, 9.6827e-07, 2.7601e-07,
        7.8486e-06, 7.3916e-07, 6.0324e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2486e-05, 5.4709e-06, 1.1526e-06, 1.5087e-07, 6.1166e-07, 2.7385e-07,
        3.4181e-07, 3.7228e-07, 8.4949e-07, 1.7884e-07, 2.3157e-06, 8.0938e-07,
        7.2689e-07, 2.5967e-06, 6.3585e-07, 1.3175e-06, 1.0438e-06, 4.6337e-07,
        7.9721e-07, 6.6068e-07, 2.8074e-07, 3.9339e-07, 4.0220e-06, 1.2866e-06,
        2.3394e-07, 1.0597e-06, 6.9815e-06, 7.3173e-07, 3.9943e-07, 4.6490e-07,
        5.8999e-06, 7.3133e-07, 1.6495e-06, 6.4781e-07, 6.2243e-07, 1.1294e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2627e-05, 3.0328e-06, 9.8576e-07, 1.4146e-07, 3.4829e-07, 1.9138e-07,
        3.3152e-07, 4.0036e-07, 7.6462e-07, 3.2578e-07, 1.8066e-06, 9.1907e-07,
        3.5037e-07, 2.8330e-06, 3.8245e-07, 8.6615e-07, 1.2719e-06, 3.4352e-07,
        6.7912e-07, 4.6179e-07, 3.8121e-08, 3.9650e-07, 8.6480e-06, 5.4808e-07,
        2.3398e-06, 1.2623e-05, 1.0422e-06, 1.4714e-06, 9.5298e-07, 6.2351e-06,
        5.9406e-07, 5.8475e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8028e-05, 4.0413e-06, 1.1925e-06, 1.8440e-07, 4.8216e-07, 1.1515e-07,
        3.1199e-07, 4.5537e-07, 8.5461e-07, 1.9274e-07, 2.0439e-06, 1.0906e-06,
        5.4132e-07, 2.2026e-06, 4.1302e-07, 1.2679e-06, 9.8815e-07, 5.6286e-07,
        8.2015e-07, 6.7926e-07, 5.7547e-08, 4.6752e-07, 4.3055e-06, 1.1039e-06,
        3.0897e-07, 7.8057e-07, 7.1935e-06, 4.7619e-07, 9.4336e-06, 5.9998e-07,
        3.0020e-07, 7.1140e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1100: [tensor([2.6006e-05, 4.4837e-06, 7.0217e-07, 8.4373e-07, 6.7249e-07, 2.7914e-07,
        1.1338e-06, 1.6909e-06, 5.9860e-07, 6.4489e-07, 5.3502e-07, 1.6319e-07,
        3.8414e-07, 8.9936e-06, 9.9052e-07, 8.8653e-07, 1.2501e-05, 1.1958e-06,
        1.1219e-06, 3.5576e-06, 3.3874e-07, 7.8185e-07, 8.5273e-07, 3.6411e-07,
        9.9392e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.3690e-05, 1.0225e-06, 1.5781e-06, 4.1945e-07, 6.0910e-06, 8.9408e-07,
        3.3626e-06, 7.4877e-07, 1.4616e-06, 6.9880e-06, 7.1270e-07, 1.7954e-07,
        6.8337e-07, 4.7050e-07, 1.8983e-06, 2.2130e-06, 1.1799e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1891e-05, 7.0399e-07, 2.0701e-06, 3.1967e-07, 5.1252e-06, 8.0601e-07,
        2.7690e-06, 8.1788e-07, 1.8373e-06, 9.9682e-06, 1.3970e-06, 2.1888e-07,
        7.6759e-07, 7.2071e-07, 2.4063e-06, 1.9852e-06, 1.0406e-06, 8.4027e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7128e-05, 8.0158e-07, 1.4412e-06, 1.0443e-07, 5.7822e-06, 5.1658e-07,
        2.4488e-06, 7.6897e-07, 1.3294e-06, 5.2986e-06, 6.9016e-07, 1.5448e-07,
        5.8280e-07, 1.3538e-06, 6.2220e-06, 1.4154e-06, 4.7947e-07, 9.2832e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2986e-05, 1.0450e-06, 1.5100e-06, 5.7127e-07, 7.3468e-06, 7.7990e-07,
        1.3594e-06, 7.8997e-07, 4.1670e-06, 1.3795e-06, 6.3775e-08, 9.1554e-07,
        1.7271e-06, 1.1169e-05, 1.0847e-06, 1.3968e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4668e-05, 1.2427e-06, 2.4721e-06, 7.1142e-07, 9.3064e-06, 8.4613e-07,
        1.4303e-06, 1.2224e-06, 3.8798e-06, 2.0210e-06, 8.1489e-08, 1.0890e-06,
        3.0029e-06, 1.2311e-06, 1.6642e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.3517e-05, 9.6286e-07, 1.4636e-06, 6.5152e-07, 7.0319e-06, 7.4008e-07,
        1.0753e-06, 6.8801e-07, 3.9770e-06, 1.2924e-06, 3.4497e-07, 7.0847e-07,
        5.5637e-07, 2.3816e-06, 5.9351e-07, 7.6961e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8405e-05, 5.3366e-07, 3.4986e-07, 3.4100e-07, 2.0142e-07, 2.1164e-06,
        6.1505e-07, 2.6302e-06, 3.0370e-07, 3.4865e-07, 4.5452e-07, 3.2056e-06,
        9.9994e-07, 7.3997e-07, 3.7750e-07, 5.6940e-07, 3.7525e-07, 2.3307e-07,
        2.9590e-07, 1.0231e-05, 7.4024e-07, 5.3336e-07, 5.9251e-06, 9.4332e-07,
        3.1791e-07, 5.2495e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2185e-05, 5.9622e-07, 3.8640e-07, 7.4301e-07, 3.3357e-07, 1.6034e-06,
        7.9710e-07, 3.7701e-06, 4.8766e-08, 3.3076e-07, 6.7329e-07, 3.6774e-06,
        1.1161e-06, 1.1021e-06, 1.3841e-07, 8.7059e-07, 2.6986e-07, 3.7028e-07,
        3.9773e-07, 3.3293e-06, 4.8369e-07, 6.4231e-06, 1.0117e-06, 5.6300e-07,
        7.3263e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8294e-05, 5.8405e-07, 2.4760e-07, 5.0240e-07, 4.2339e-07, 2.1198e-06,
        6.7388e-07, 3.2100e-06, 1.5885e-07, 4.9822e-07, 7.3536e-07, 3.0887e-06,
        1.0458e-06, 1.1106e-06, 2.3779e-07, 8.8546e-07, 3.4500e-07, 3.1217e-07,
        5.7142e-07, 3.9315e-06, 6.7347e-07, 6.0140e-06, 8.4200e-07, 6.3365e-07,
        9.0054e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.6290e-05, 4.9609e-07, 5.8301e-07, 1.3436e-06, 2.7585e-07, 4.3627e-07,
        9.3808e-07, 3.1636e-07, 1.5060e-06, 1.1894e-06, 4.2141e-07, 6.8432e-07,
        2.2064e-06, 4.2445e-07, 4.9183e-07, 3.3589e-08, 4.3427e-07, 3.4660e-07,
        6.1450e-07, 3.6518e-06, 3.4909e-07, 4.8926e-07, 2.1983e-06, 5.8393e-07,
        2.2000e-06, 2.4140e-06, 6.7385e-07, 8.6613e-07, 8.8971e-06, 7.1578e-07,
        7.5685e-07, 2.2604e-07, 4.8447e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2521e-05, 3.4912e-07, 2.9813e-07, 1.3748e-06, 3.9322e-07, 2.9348e-07,
        7.0616e-07, 4.4490e-07, 9.8529e-07, 1.1643e-06, 4.2988e-07, 6.5081e-07,
        2.1188e-06, 2.8818e-07, 3.1763e-07, 7.3693e-08, 3.5434e-07, 3.9097e-07,
        3.8749e-07, 2.3499e-06, 4.1414e-07, 4.2026e-07, 1.7657e-06, 5.8300e-07,
        3.4066e-07, 3.4815e-07, 6.9547e-07, 8.3365e-07, 4.4640e-07, 1.8307e-06,
        1.9109e-06, 6.6386e-07, 8.3479e-07, 6.8552e-06, 6.9326e-07, 6.9734e-07,
        2.0013e-07, 4.3642e-07], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1150: [tensor([ 2.6595e-05,  4.6768e-06,  8.2076e-07,  3.3058e-06,  4.4637e-06,
         3.8800e-07,  5.8092e-07,  5.1705e-07,  2.7025e-06,  3.3859e-07,
         1.9132e-06,  4.6902e-07,  1.8689e-07,  1.1928e-06,  4.2883e-06,
         7.6793e-07,  5.2939e-06,  6.9565e-06,  3.6487e-07,  7.9610e-07,
        -4.0936e-09,  3.4865e-07,  6.4824e-07,  1.0949e-06,  2.2738e-06,
         7.9550e-07,  6.2845e-07,  5.6696e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5120e-05, 3.5374e-06, 5.0624e-07, 4.6705e-07, 5.1782e-06, 3.9247e-07,
        7.2856e-07, 2.9848e-07, 4.9213e-06, 6.2467e-07, 3.0257e-07, 2.9952e-07,
        1.0073e-06, 6.7795e-07, 1.2325e-06, 5.2915e-07, 7.5479e-07, 2.2760e-07,
        5.4369e-07, 6.7260e-07, 1.0103e-06, 4.6974e-06, 4.2694e-07, 7.6456e-07,
        8.8762e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9938e-05, 6.2290e-06, 8.9551e-07, 6.5436e-07, 5.4900e-06, 6.1828e-07,
        9.6256e-07, 4.9787e-07, 6.5938e-06, 5.9544e-07, 4.2103e-07, 6.5370e-07,
        1.5255e-06, 8.0043e-07, 1.6496e-06, 8.0172e-07, 1.2681e-06, 2.4767e-07,
        5.8748e-07, 6.7671e-07, 1.0754e-06, 1.3701e-06, 1.1096e-06, 1.6710e-06,
        1.1372e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6740e-05, 5.9274e-06, 8.3256e-07, 6.9344e-07, 4.4929e-06, 4.7450e-07,
        8.9128e-07, 5.0012e-07, 4.4272e-06, 8.7787e-07, 4.6859e-07, 6.5995e-07,
        1.1923e-06, 5.9217e-07, 1.0754e-06, 6.8962e-07, 9.2514e-07, 2.7965e-07,
        5.9720e-07, 8.6368e-07, 8.3307e-07, 2.3772e-06, 1.1665e-06, 9.2610e-07,
        4.0843e-07, 7.9956e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0384e-05, 4.8889e-07, 1.9070e-06, 3.6220e-07, 2.3393e-07, 4.5629e-07,
        5.7391e-07, 4.2705e-07, 9.8071e-07, 6.4108e-06, 5.5103e-07, 2.6277e-07,
        3.7952e-07, 5.2116e-07, 3.5747e-06, 6.0823e-07, 9.2412e-07, 2.4152e-06,
        3.4127e-07, 9.5300e-08, 4.4011e-07, 4.4812e-06, 3.1843e-07, 1.4914e-06,
        6.3821e-07, 3.3189e-06, 1.4045e-06, 4.4108e-07, 6.4404e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9196e-05, 7.3305e-07, 2.1360e-06, 3.5426e-07, 2.9195e-07, 5.0910e-07,
        6.8816e-07, 5.8140e-07, 9.4494e-07, 7.2458e-06, 5.8579e-07, 2.0209e-07,
        4.7402e-07, 6.2003e-07, 5.5895e-06, 9.3770e-07, 1.0680e-06, 3.5741e-06,
        6.1793e-07, 1.3131e-07, 4.0889e-07, 4.5253e-06, 1.2824e-06, 4.8183e-06,
        1.3422e-06, 2.8256e-06, 2.9985e-07, 6.0578e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.2483e-05, 7.7824e-07, 2.8120e-06, 3.3724e-07, 2.6670e-07, 6.1553e-07,
        7.2526e-07, 8.0760e-07, 1.1467e-06, 7.8950e-06, 6.2514e-07, 3.4731e-07,
        5.6217e-07, 6.5888e-07, 4.3582e-06, 8.0622e-07, 1.5758e-06, 2.9580e-06,
        3.7025e-07, 1.9015e-07, 2.8466e-07, 2.1334e-06, 4.7280e-06, 1.0461e-06,
        4.3467e-07, 1.2803e-05, 8.6205e-07, 5.7135e-07, 4.6560e-07, 6.4103e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7623e-05, 8.8771e-07, 1.4055e-06, 4.2058e-07, 2.1026e-07, 4.3629e-07,
        5.7760e-07, 2.0558e-06, 5.4604e-07, 5.2132e-07, 3.4534e-07, 4.9398e-07,
        2.1028e-06, 1.7456e-06, 8.3069e-07, 4.6213e-07, 6.7095e-07, 1.8180e-06,
        1.6440e-06, 6.8468e-07, 2.9728e-07, 1.6632e-07, 6.4074e-08, 2.9790e-07,
        2.7790e-06, 8.2372e-07, 2.3150e-06, 6.4823e-07, 8.2200e-06, 4.2257e-07,
        4.5150e-07, 2.0075e-06, 2.6018e-06, 9.5718e-07, 2.7510e-07, 3.5209e-07,
        2.8040e-06, 9.1370e-07, 7.9604e-07, 5.4187e-07, 8.0481e-07, 2.8100e-07,
        9.0447e-07, 2.3289e-06, 9.0716e-07, 3.3009e-07, 5.4878e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7088e-05, 8.0223e-07, 2.2047e-06, 4.4598e-07, 2.3058e-07, 3.5582e-07,
        5.2529e-07, 2.3766e-06, 3.8451e-07, 5.0595e-07, 2.8128e-07, 3.8582e-07,
        1.7888e-06, 2.0492e-06, 6.3880e-07, 4.2541e-07, 7.0428e-07, 2.1253e-06,
        1.5173e-06, 5.6968e-07, 2.6324e-07, 1.7835e-07, 2.3860e-08, 2.8026e-07,
        2.7557e-06, 1.1387e-06, 1.9663e-06, 5.0369e-07, 4.6700e-07, 2.4224e-07,
        5.8766e-06, 5.9790e-07, 4.2990e-07, 7.8689e-07, 5.7030e-07, 3.7378e-07,
        4.5226e-07, 6.0989e-07, 2.8588e-06, 6.8018e-07, 1.4783e-07, 4.0720e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7489e-05, 4.9224e-07, 1.3582e-06, 2.5533e-07, 1.5665e-07, 2.4127e-07,
        4.5379e-07, 1.5419e-06, 3.0419e-07, 3.2227e-07, 1.9507e-07, 3.2376e-07,
        1.0604e-06, 1.3027e-06, 4.8952e-07, 2.3283e-07, 3.9952e-07, 1.7886e-06,
        9.0751e-07, 5.0503e-07, 1.4825e-07, 9.2468e-08, 9.7386e-08, 1.5435e-07,
        7.9367e-07, 9.3705e-07, 1.6970e-07, 3.0673e-07, 4.1242e-07, 4.5415e-07,
        3.6264e-06, 3.3622e-07, 3.4148e-07, 2.2078e-07, 3.1531e-07, 1.9371e-07,
        1.0971e-06, 3.0842e-07, 3.2901e-07, 1.4036e-07, 3.1855e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1651e-05, 3.7909e-07, 5.3748e-07, 1.4392e-06, 3.4577e-07, 1.0146e-06,
        3.2722e-07, 2.3994e-07, 5.8622e-07, 6.2646e-06, 3.6593e-07, 5.7769e-07,
        1.1802e-07, 3.8749e-07, 3.2962e-07, 1.0307e-06, 4.4476e-06, 2.7870e-06,
        9.9814e-06, 9.0880e-07, 1.2131e-06, 1.2608e-06, 9.1097e-08, 5.5135e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8353e-05, 5.4631e-07, 1.3400e-06, 2.1588e-06, 5.7413e-07, 2.0114e-06,
        8.3130e-07, 2.9828e-07, 1.0317e-06, 6.3918e-06, 6.3953e-07, 9.2487e-07,
        3.4311e-07, 6.0864e-07, 1.8545e-06, 3.9168e-06, 7.1930e-07, 5.1935e-06,
        9.0301e-06, 8.5551e-07, 4.7372e-07, 9.1706e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1200: [tensor([2.1511e-05, 7.8135e-07, 5.8625e-06, 9.2974e-07, 4.9133e-07, 8.6121e-07,
        3.1350e-07, 4.2637e-07, 1.7134e-07, 4.4717e-07, 5.1707e-06, 7.1141e-07,
        5.0412e-06, 7.9980e-06, 1.2708e-06, 1.0178e-06, 1.4539e-06, 8.8071e-07,
        6.4237e-07, 6.1172e-07, 9.3326e-07, 1.9543e-07, 4.2558e-07, 9.2559e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6029e-05, 7.4526e-07, 6.0814e-07, 6.2252e-07, 1.2740e-06, 8.5378e-07,
        3.6953e-06, 6.9091e-07, 1.0243e-06, 4.6451e-06, 9.0446e-07, 4.8830e-07,
        7.5849e-07, 9.3008e-07, 4.1341e-07, 1.8828e-06, 7.3260e-07, 7.9695e-07,
        4.9990e-07, 1.1518e-07, 4.0305e-07, 7.0040e-07, 1.1225e-06, 2.6156e-06,
        2.0429e-06, 1.5466e-06, 8.9731e-07, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6339e-05, 5.0722e-07, 9.4518e-07, 6.4180e-07, 1.2591e-06, 5.3280e-07,
        3.7422e-06, 4.6968e-07, 8.9412e-07, 7.7057e-06, 7.2826e-07, 4.9733e-07,
        7.3904e-07, 6.2411e-07, 3.3953e-07, 1.0387e-06, 1.0195e-06, 5.5688e-07,
        4.1337e-07, 6.2537e-08, 4.9166e-07, 7.5025e-07, 3.1648e-06, 4.7869e-07,
        7.0096e-07, 8.1453e-06, 8.0404e-07, 9.0714e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5508e-05, 6.9640e-07, 5.9784e-07, 6.3427e-07, 1.5691e-06, 7.1920e-07,
        6.8545e-06, 5.7762e-07, 9.7600e-07, 6.0007e-06, 8.8113e-07, 5.5759e-07,
        1.1303e-06, 8.5784e-07, 6.3770e-07, 1.1563e-06, 8.4318e-07, 7.9549e-07,
        5.3133e-07, 1.7895e-07, 3.1098e-07, 1.6913e-06, 4.3678e-06, 1.2739e-06,
        1.5839e-06, 1.5685e-06, 7.1694e-07, 7.4913e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.4961e-05, 9.3802e-07, 8.5664e-07, 1.2081e-06, 5.9623e-07, 7.4498e-07,
        4.1730e-06, 4.6915e-07, 1.5889e-06, 8.7307e-06, 6.6452e-07, 9.4524e-07,
        3.0252e-07, 7.7441e-07, 8.3163e-06, 1.2888e-06, 1.1802e-05, 9.4332e-07,
        4.9643e-07, 9.2933e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.7033e-05, 5.6025e-07, 2.0603e-07, 6.7283e-07, 3.6423e-07, 4.7993e-07,
        3.0527e-06, 3.3199e-07, 5.1610e-07, 5.0580e-06, 2.2224e-07, 7.1112e-07,
        1.5284e-07, 3.8951e-07, 7.8474e-07, 5.1919e-07, 1.1522e-06, 5.7712e-06,
        1.0966e-06, 8.8544e-07, 8.2657e-07, 4.0886e-07, 8.3668e-07, 3.0658e-06,
        9.5857e-07, 2.9738e-07, 5.2515e-07, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.2435e-05, 9.3815e-07, 2.9744e-07, 1.2869e-06, 6.0802e-07, 8.7695e-07,
        7.5273e-06, 4.8146e-07, 1.3453e-06, 5.3729e-06, 7.3025e-07, 1.0345e-06,
        9.7323e-08, 7.3030e-07, 1.4882e-05, 9.0644e-07, 1.8100e-05, 1.1648e-06,
        8.2319e-06, 6.6079e-07, 4.7146e-07, 9.4456e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.9387e-05, 6.4467e-06, 1.0808e-06, 1.5361e-06, 9.5234e-07, 6.7526e-06,
        5.3836e-07, 5.2142e-07, 2.5679e-07, 7.0703e-07, 1.3045e-05, 1.3395e-06,
        1.2061e-06, 1.6672e-06, 9.1935e-07, 1.6818e-06, 1.7127e-06, 6.6511e-07,
        1.1886e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.0990e-05, 5.9933e-06, 1.3511e-06, 1.0795e-06, 4.3047e-07, 4.7021e-06,
        3.4379e-07, 3.9617e-07, 3.8159e-07, 6.1875e-07, 8.6331e-07, 1.4791e-06,
        8.4018e-07, 7.8286e-07, 4.1433e-06, 1.3220e-06, 5.0686e-07, 8.3030e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.7142e-05, 6.1883e-06, 1.1911e-06, 8.2723e-07, 6.0823e-07, 5.1068e-06,
        1.2417e-07, 3.3443e-07, 2.9335e-07, 6.6015e-07, 8.2397e-07, 1.4727e-06,
        4.7937e-06, 1.6289e-06, 9.1373e-07, 4.7417e-07, 1.2712e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5074e-05, 4.4682e-06, 5.0647e-07, 6.6305e-07, 3.0160e-06, 3.9066e-07,
        3.1338e-07, 3.2055e-07, 5.0577e-07, 1.1477e-05, 5.7789e-07, 3.5668e-06,
        7.1078e-07, 7.7209e-07, 1.1069e-06, 5.0407e-07, 1.8429e-06, 5.7047e-07,
        6.6229e-07, 7.5123e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5011e-05, 4.3166e-06, 2.7168e-07, 5.6146e-07, 2.5328e-06, 3.4774e-07,
        3.9752e-07, 2.0341e-07, 6.6126e-07, 1.3905e-05, 5.2552e-07, 6.1464e-06,
        1.4793e-06, 1.0431e-05, 7.1625e-07, 1.1798e-06, 1.3513e-06, 6.0035e-07,
        8.7972e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1250: [tensor([2.4615e-05, 6.0351e-06, 7.7272e-07, 1.2876e-06, 6.6530e-07, 1.7822e-06,
        3.0058e-07, 7.3159e-07, 2.6939e-06, 7.5263e-07, 6.6140e-07, 4.1385e-07,
        3.5756e-06, 5.5163e-07, 1.7621e-06, 7.7398e-06, 7.8256e-07, 7.6681e-07,
        1.5928e-07, 4.1084e-07, 3.0211e-06, 7.4467e-07, 5.9491e-06, 3.9465e-06,
        7.6228e-07, 3.0558e-07, 5.8953e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7632e-05, 9.1142e-07, 2.8859e-07, 2.6406e-07, 8.3109e-07, 1.5509e-06,
        5.5474e-07, 3.6094e-07, 2.5205e-06, 5.6143e-07, 4.0993e-07, 2.3789e-07,
        1.5268e-07, 6.6819e-07, 5.0178e-06, 1.0900e-06, 5.6898e-07, 4.5058e-06,
        5.9648e-07, 2.9701e-07, 5.4512e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7512e-05, 8.8252e-07, 6.9082e-07, 2.7167e-07, 7.4907e-07, 1.1379e-06,
        7.7401e-07, 5.6917e-07, 2.4470e-06, 4.6040e-07, 4.0355e-07, 8.1283e-08,
        1.5235e-07, 4.1880e-07, 1.2207e-06, 1.6758e-06, 1.0965e-06, 4.1112e-06,
        8.2937e-07, 2.2498e-07, 5.9021e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9098e-05, 1.1013e-06, 4.5113e-07, 3.8735e-07, 1.3681e-06, 1.6820e-06,
        1.0696e-06, 7.1928e-07, 3.5141e-06, 8.3201e-07, 5.5417e-07, 2.8399e-07,
        1.7885e-07, 5.5379e-07, 1.5905e-06, 2.0486e-06, 1.4590e-06, 1.5779e-06,
        4.7841e-07, 9.2599e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.3453e-05, 3.4103e-07, 5.0644e-07, 1.2003e-06, 3.3061e-07, 8.7163e-07,
        1.5089e-07, 4.6084e-07, 3.1875e-07, 3.8633e-07, 1.6336e-07, 1.0784e-06,
        2.8435e-06, 3.8869e-07, 3.7760e-07, 2.6573e-07, 4.7943e-07, 5.3466e-07,
        2.0998e-07, 4.5540e-07, 2.3191e-06, 2.3572e-07, 1.4508e-07, 2.7590e-07,
        5.8948e-07, 4.8471e-07, 3.1647e-07, 8.6188e-07, 5.0567e-06, 2.8138e-07,
        1.6709e-07, 4.1306e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8785e-05, 8.0669e-07, 1.2873e-06, 1.7287e-06, 7.9934e-07, 2.1339e-06,
        3.0950e-07, 9.0828e-07, 5.3245e-07, 5.8771e-07, 3.1282e-07, 2.4736e-06,
        6.4588e-06, 1.0857e-06, 8.1259e-07, 6.2046e-07, 9.3295e-07, 1.5021e-06,
        2.8362e-07, 1.0324e-06, 5.7215e-06, 3.9558e-07, 2.7383e-07, 4.9359e-07,
        8.0298e-07, 1.2940e-06, 3.4671e-06, 8.1513e-07, 5.7388e-07, 3.1709e-07,
        6.3857e-07, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1898e-05, 3.8160e-07, 6.2509e-07, 1.2900e-06, 5.5432e-07, 1.0907e-06,
        2.3749e-07, 6.2455e-07, 4.6421e-07, 5.2830e-07, 3.6231e-07, 1.7624e-06,
        4.6713e-06, 5.4219e-07, 3.5703e-07, 5.0532e-07, 6.2325e-07, 9.9663e-07,
        3.8344e-07, 4.4690e-07, 3.1785e-06, 3.3622e-07, 1.9103e-07, 4.8303e-07,
        7.9549e-07, 7.2388e-07, 2.8357e-06, 6.6789e-07, 2.6319e-07, 5.3677e-07,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0336e-05, 7.7206e-06, 5.8356e-07, 1.8989e-06, 1.0629e-06, 1.2038e-06,
        3.5071e-06, 1.0664e-07, 9.1004e-07, 3.1199e-07, 6.8986e-07, 7.8918e-07,
        1.0092e-06, 5.1680e-07, 8.4303e-08, 7.2529e-07, 7.4305e-06, 9.7876e-07,
        7.1087e-06, 1.1422e-06, 4.9806e-07, 1.1268e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5236e-05, 5.9516e-06, 8.5146e-07, 1.4801e-06, 9.4649e-07, 5.3309e-07,
        4.6505e-06, 1.7153e-07, 7.5222e-07, 1.6098e-07, 6.4088e-07, 5.0367e-07,
        1.2343e-06, 4.2951e-07, 1.5384e-07, 5.2145e-07, 4.6280e-06, 7.8555e-07,
        5.7522e-07, 1.2182e-06, 2.8191e-07, 4.7810e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6991e-05, 5.6284e-06, 7.4241e-07, 1.2081e-06, 6.9882e-07, 5.1542e-07,
        4.5765e-06, 1.7880e-07, 9.5970e-07, 2.8067e-07, 8.0819e-07, 7.3863e-07,
        1.1744e-06, 8.6354e-07, 1.1702e-07, 6.1475e-07, 5.3493e-06, 1.0501e-06,
        4.8181e-06, 1.1241e-06, 7.8641e-06, 6.0680e-07, 7.3348e-07, 1.2708e-06,
        1.9822e-06, 4.4096e-07, 9.9751e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4811e-05, 7.6573e-07, 1.9480e-07, 9.2378e-07, 3.4524e-06, 2.8864e-07,
        2.0285e-06, 5.2205e-07, 4.5370e-07, 1.0815e-07, 3.3423e-07, 8.5054e-07,
        5.5587e-07, 1.9980e-07, 6.4140e-07, 7.3100e-06, 2.8224e-06, 1.4310e-06,
        6.0966e-07, 9.0240e-07, 4.1744e-07, 6.3122e-07, 1.7664e-07, 7.7844e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7144e-05, 4.5855e-07, 1.3826e-07, 5.1167e-07, 2.1472e-06, 2.5230e-07,
        1.5246e-06, 3.4662e-07, 3.4451e-07, 1.3704e-07, 2.6524e-07, 6.6891e-07,
        2.2675e-07, 6.5244e-07, 6.1525e-07, 6.5185e-07, 7.5907e-07, 5.3402e-07,
        4.9869e-07, 3.0147e-06, 1.9834e-06, 7.2161e-07, 3.2649e-07, 6.8593e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1300: [tensor([3.2760e-05, 6.6029e-06, 3.8727e-07, 9.2204e-07, 3.1232e-07, 9.7735e-07,
        7.9977e-07, 9.1896e-07, 1.2395e-06, 1.9605e-07, 5.3739e-07, 1.1683e-05,
        8.5174e-07, 1.5281e-06, 3.6340e-06, 1.6520e-06, 1.8139e-06, 8.7631e-07,
        1.1305e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.5750e-05, 4.4655e-06, 1.8300e-07, 6.8354e-07, 1.5258e-07, 6.6024e-07,
        6.3282e-07, 4.2783e-07, 6.4841e-07, 1.8229e-07, 3.4329e-07, 3.9219e-06,
        8.1534e-07, 9.7183e-07, 4.0130e-07, 9.0586e-07, 8.5731e-07, 8.2748e-07,
        7.2472e-07, 1.5915e-06, 3.7117e-07, 3.6940e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1161e-05, 1.2549e-05, 5.3620e-07, 9.0371e-07, 4.9897e-07, 6.5230e-07,
        1.0347e-06, 7.9412e-07, 1.0523e-06, 4.3718e-07, 5.0183e-07, 5.2246e-06,
        1.8907e-06, 6.0393e-06, 1.7426e-06, 2.3320e-06, 7.9021e-07, 1.1865e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1318e-05, 6.1617e-06, 5.1070e-07, 6.7044e-07, 6.7062e-07, 6.2917e-07,
        9.3804e-07, 1.0357e-06, 7.7621e-07, 3.4132e-07, 4.6282e-07, 1.4213e-06,
        5.1301e-06, 8.5562e-07, 1.2047e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0845e-05, 7.4301e-06, 5.8282e-07, 7.7723e-07, 4.5976e-07, 8.0382e-07,
        9.9434e-07, 7.6667e-07, 1.3050e-06, 4.9384e-07, 6.8616e-07, 4.4267e-06,
        1.1485e-06, 1.0484e-06, 1.2326e-06, 1.5136e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9428e-05, 9.6903e-06, 6.4348e-07, 6.9097e-07, 4.9934e-07, 7.7870e-07,
        7.6180e-07, 7.4150e-07, 7.7844e-07, 1.7016e-07, 6.2462e-07, 8.8177e-06,
        6.9804e-07, 9.9304e-07, 6.5910e-07, 1.4478e-06, 1.1621e-06, 8.0314e-07,
        1.5092e-06, 4.1054e-07, 9.3790e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0604e-05, 3.9233e-06, 4.2842e-07, 5.1796e-07, 3.0352e-07, 7.0444e-07,
        6.1557e-07, 5.6231e-07, 7.6428e-07, 1.0711e-07, 4.5720e-07, 6.4141e-06,
        5.3518e-07, 7.2815e-07, 5.8844e-07, 3.5440e-06, 1.2004e-06, 1.0998e-06,
        6.2828e-07, 7.6403e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.3128e-05, 9.3659e-06, 9.3393e-07, 1.3743e-06, 5.9956e-07, 1.1016e-06,
        1.3062e-06, 1.0177e-06, 1.0969e-06, 3.8628e-07, 5.0138e-07, 5.6647e-06,
        1.3863e-06, 1.1324e-05, 7.5014e-07, 6.3830e-07, 1.4787e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0742e-05, 4.1514e-06, 2.7792e-07, 4.9965e-07, 2.4629e-07, 7.3872e-07,
        6.2014e-07, 6.1088e-07, 6.5674e-07, 1.4243e-07, 4.4211e-07, 6.2147e-06,
        5.6033e-07, 7.0714e-07, 4.7583e-07, 1.3494e-06, 1.1945e-06, 7.5582e-07,
        1.1215e-06, 3.2306e-07, 6.8686e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.1035e-05, 3.0387e-07, 2.0268e-07, 3.8904e-07, 2.7207e-07, 3.6048e-07,
        2.5789e-07, 3.7938e-07, 6.3685e-07, 1.4108e-06, 3.3137e-07, 1.1369e-07,
        3.4532e-07, 1.5561e-07, 5.1025e-07, 3.5157e-06, 3.8329e-07, 3.0509e-07,
        8.5986e-08, 1.2356e-07, 7.8277e-08, 2.3238e-07, 1.4724e-07, 3.6414e-07,
        3.6640e-06, 3.1550e-07, 3.6544e-07, 3.1549e-07, 1.7679e-06, 3.1892e-08,
        1.8430e-07, 6.5486e-07, 4.5816e-07, 5.7399e-08, 2.5371e-07, 1.3140e-05,
        5.4542e-07, 5.4361e-07, 4.1680e-07, 4.2884e-07, 7.8876e-06, 7.0426e-07,
        7.0974e-07, 3.4483e-07, 6.3123e-07, 5.0597e-07, 3.5919e-07, 7.9095e-07,
        2.2787e-06, 6.5046e-07, 2.5106e-07, 9.2200e-07, 2.2208e-06, 7.9107e-07,
        1.8920e-07, 3.7159e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5120e-05, 4.5407e-07, 4.3238e-07, 5.0904e-07, 4.4318e-07, 4.5997e-07,
        3.5889e-07, 3.6226e-07, 4.9442e-07, 1.3404e-06, 3.3527e-07, 1.8007e-07,
        4.1482e-07, 2.8555e-07, 6.4340e-07, 3.8375e-06, 4.7953e-07, 3.6815e-07,
        1.0787e-07, 1.0471e-07, 5.3298e-08, 3.6020e-07, 2.4948e-07, 4.4547e-07,
        5.9996e-06, 3.0665e-07, 5.7119e-07, 3.9122e-07, 1.6669e-06, 2.2344e-07,
        1.8313e-07, 6.2356e-07, 5.1661e-07, 5.0886e-08, 2.8416e-07, 4.9571e-06,
        4.7738e-07, 1.1425e-06, 2.5498e-06, 5.4739e-07, 1.7180e-06, 1.1471e-06,
        1.0475e-06, 4.4662e-07, 7.3066e-06, 1.7676e-07, 1.2135e-06, 2.3543e-07,
        5.0142e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.6713e-05, 1.9455e-07, 4.2105e-07, 3.2450e-07, 2.6656e-07, 2.4549e-07,
        1.8454e-07, 2.5511e-07, 4.2467e-07, 1.0187e-06, 2.6265e-07, 1.8472e-07,
        2.6760e-07, 3.1744e-07, 4.1976e-07, 4.3713e-06, 4.3633e-07, 2.1870e-07,
        1.6446e-07, 1.3833e-07, 8.8799e-08, 2.5267e-07, 1.3128e-07, 2.7052e-07,
        5.2852e-06, 4.5500e-07, 2.4773e-07, 2.7465e-07, 1.4035e-06, 7.9263e-08,
        1.4660e-07, 4.3928e-07, 3.3716e-07, 1.4847e-08, 1.8690e-07, 9.2822e-08,
        3.4537e-06, 5.3672e-07, 6.3957e-07, 1.5847e-06, 2.1570e-07, 3.9711e-07,
        2.9176e-07, 3.2577e-07, 8.4502e-08, 2.6555e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1350: [tensor([2.5511e-05, 1.9222e-06, 9.8345e-07, 9.7120e-07, 7.6102e-07, 5.5963e-07,
        9.1153e-07, 6.6887e-07, 8.2544e-07, 8.0446e-07, 2.7585e-07, 7.0785e-07,
        2.6104e-06, 1.5031e-06, 2.7791e-06, 1.0564e-06, 3.0684e-05, 2.4158e-06,
        2.7123e-06, 5.8132e-06, 9.2285e-07, 5.4583e-07, 1.3888e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9868e-05, 2.7138e-06, 1.2912e-06, 9.7781e-07, 1.0621e-06, 7.0111e-07,
        8.4334e-07, 8.2824e-07, 8.3139e-07, 8.1168e-07, 2.4373e-07, 6.8627e-07,
        3.8610e-06, 1.6765e-06, 3.4452e-06, 1.9342e-06, 2.3563e-06, 1.5109e-06,
        9.3094e-07, 1.5295e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8886e-05, 1.7640e-06, 1.2371e-06, 8.3848e-07, 6.9764e-07, 6.6987e-07,
        9.1826e-07, 8.3294e-07, 7.1716e-07, 5.4035e-07, 9.2668e-08, 7.3321e-07,
        4.0032e-06, 1.3748e-06, 2.7256e-06, 1.0798e-06, 3.5573e-05, 1.0396e-06,
        7.1257e-07, 8.9136e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0554e-05, 1.7901e-06, 1.1330e-06, 6.1275e-07, 6.8420e-07, 4.5447e-07,
        6.8759e-07, 5.4926e-07, 7.2530e-07, 2.8670e-07, 1.8547e-07, 5.0417e-07,
        3.3443e-06, 1.0119e-06, 1.6188e-06, 4.9643e-07, 3.4818e-07, 8.1455e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2235e-05, 1.3544e-06, 1.0507e-06, 7.1608e-07, 6.9934e-07, 4.7879e-07,
        8.7334e-07, 5.7530e-07, 8.4434e-07, 3.7795e-07, 1.6186e-07, 5.3675e-07,
        4.4489e-06, 8.6132e-07, 2.9684e-06, 1.1187e-06, 9.0959e-07, 2.8384e-05,
        7.1624e-07, 4.9611e-07, 8.3022e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8223e-05, 1.5573e-06, 7.2173e-07, 8.2144e-07, 6.1853e-07, 5.2947e-07,
        5.4508e-07, 5.2227e-07, 7.2605e-07, 3.1316e-07, 1.3505e-07, 4.2992e-07,
        1.8752e-06, 7.2927e-07, 1.7306e-06, 1.8376e-06, 1.0359e-06, 4.7045e-07,
        5.6914e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4425e-05, 1.9132e-06, 1.3445e-06, 6.0021e-07, 7.4430e-07, 7.3923e-07,
        6.0558e-07, 9.4369e-07, 8.5410e-07, 3.4326e-07, 7.8052e-08, 6.3043e-07,
        2.0062e-06, 1.5094e-06, 1.9387e-06, 1.3146e-06, 6.5488e-07, 3.9376e-07,
        8.4260e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8768e-05, 5.5602e-06, 1.0046e-06, 3.6767e-06, 1.3278e-06, 1.1424e-06,
        8.7852e-07, 6.9784e-07, 4.9770e-07, 8.2685e-07, 1.1314e-06, 1.9666e-06,
        9.0898e-07, 1.9778e-07, 1.2843e-07, 5.4502e-07, 7.7829e-06, 1.4821e-06,
        5.9392e-06, 2.1809e-06, 5.5884e-07, 2.3231e-07, 7.0991e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8504e-05, 4.3481e-06, 6.4926e-07, 4.1799e-06, 1.1906e-06, 7.6773e-07,
        6.0825e-07, 4.1596e-07, 2.5062e-07, 6.0333e-07, 5.8366e-07, 1.8181e-06,
        7.3204e-07, 1.8030e-07, 7.4625e-08, 5.5066e-07, 1.3013e-05, 1.1274e-06,
        3.6355e-06, 1.4453e-06, 1.2300e-06, 1.6379e-06, 6.8026e-07, 1.0483e-06,
        1.6062e-06, 4.5559e-06, 1.1326e-06, 3.3110e-07, 9.3618e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0098e-05, 4.1343e-06, 5.8148e-07, 3.5595e-06, 8.9047e-07, 5.7683e-07,
        6.1269e-07, 3.3562e-07, 1.9638e-07, 5.1798e-07, 7.2493e-07, 1.9217e-06,
        7.1149e-07, 2.1485e-07, 6.7037e-08, 5.0424e-07, 3.5614e-06, 9.9084e-07,
        3.1326e-06, 7.7185e-07, 6.4215e-07, 4.5486e-06, 1.1105e-06, 2.7503e-07,
        6.4262e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7330e-05, 5.0692e-06, 7.4970e-07, 6.6187e-07, 1.8964e-06, 5.5227e-06,
        6.0303e-07, 7.0310e-07, 6.9483e-07, 3.2401e-07, 4.7302e-07, 9.2279e-07,
        1.6561e-07, 5.4589e-07, 3.6517e-06, 1.0423e-06, 1.1515e-05, 1.6902e-06,
        4.2312e-06, 7.9699e-07, 4.3986e-07, 6.8049e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4601e-05, 6.4762e-06, 9.8734e-07, 5.0839e-07, 1.6625e-06, 3.4101e-06,
        6.4712e-07, 4.6369e-07, 6.9345e-07, 3.3430e-07, 2.6582e-07, 6.6987e-07,
        2.9625e-07, 6.1441e-07, 8.1802e-06, 1.6636e-06, 1.0099e-06, 6.5535e-06,
        1.9494e-06, 6.5669e-06, 2.9137e-06, 1.6698e-06, 6.5137e-07, 8.5679e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1400: [tensor([1.9582e-05, 3.7389e-06, 6.5682e-07, 8.0881e-07, 4.2080e-07, 2.6140e-07,
        1.0591e-06, 3.3674e-07, 5.3080e-07, 3.1065e-07, 1.8962e-08, 3.6730e-07,
        6.3266e-07, 3.0747e-06, 1.0866e-06, 4.6508e-06, 5.7126e-07, 1.0414e-06,
        1.3392e-06, 1.0472e-05, 6.8059e-07, 2.4364e-07, 5.9285e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2522e-05,  5.4732e-06,  7.2866e-07,  8.3460e-07,  4.5341e-07,
         3.2513e-07,  1.2066e-06,  5.5753e-07,  5.3887e-07,  2.9601e-07,
        -2.6354e-08,  3.4619e-07,  9.3371e-06,  1.4323e-06,  1.4028e-06,
         8.8275e-07,  8.9137e-07,  8.6849e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.3616e-05, 4.3394e-06, 1.0346e-06, 7.1386e-07, 3.0204e-07, 6.6216e-07,
        1.8606e-06, 3.6168e-06, 3.0978e-07, 3.1285e-07, 2.3616e-07, 4.4886e-07,
        5.1345e-07, 4.8819e-07, 1.2712e-07, 4.5102e-07, 2.4757e-05, 7.9894e-07,
        8.0555e-07, 3.5915e-07, 5.6139e-07, 1.0315e-06, 6.0055e-07, 6.9682e-07,
        4.5868e-07, 3.8751e-06, 1.5168e-06, 1.1656e-06, 8.0738e-07, 1.1643e-06,
        3.0730e-07, 5.4018e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4989e-05, 3.7549e-06, 7.5792e-07, 9.3235e-07, 4.4557e-07, 7.5853e-07,
        1.9194e-06, 2.0829e-06, 3.6328e-07, 2.5232e-07, 9.3941e-08, 3.7710e-07,
        5.2497e-07, 4.0222e-07, 5.4219e-08, 4.1139e-07, 2.4522e-05, 9.6395e-07,
        4.3356e-06, 6.2090e-07, 8.4583e-07, 4.6881e-07, 1.7434e-06, 3.9076e-07,
        6.1889e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.6556e-05, 4.1389e-06, 6.6997e-07, 7.0473e-07, 4.6694e-07, 6.9078e-07,
        1.9684e-06, 2.3562e-06, 4.6115e-07, 4.0580e-07, 2.3665e-07, 4.9033e-07,
        7.2834e-07, 4.1737e-07, 1.4011e-07, 5.8122e-07, 1.8862e-05, 9.9956e-07,
        5.9072e-06, 1.1222e-06, 4.6829e-07, 4.5713e-07, 3.6613e-07, 8.5680e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7788e-05, 7.6191e-06, 9.8519e-07, 4.7931e-06, 2.6338e-07, 3.7771e-07,
        1.0312e-06, 7.8532e-07, 1.1017e-06, 4.5497e-07, 6.2816e-07, 4.5538e-06,
        1.2117e-06, 1.0598e-06, 1.5808e-05, 6.4702e-07, 8.4525e-07, 1.2391e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.1924e-05, 8.4687e-06, 9.2238e-07, 4.5463e-06, 6.1945e-07, 2.2822e-07,
        1.1033e-06, 7.2757e-07, 1.0983e-06, 3.6296e-07, 6.6544e-07, 5.5316e-06,
        1.3104e-06, 1.2181e-06, 1.4482e-06, 6.7291e-07, 1.4553e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9192e-05, 3.7285e-06, 6.7590e-07, 2.7551e-06, 3.4490e-07, 1.4157e-07,
        8.2780e-07, 6.4171e-07, 7.8820e-07, 2.4943e-07, 3.9194e-07, 3.1140e-06,
        8.7177e-07, 6.2883e-07, 1.1292e-06, 1.4621e-06, 5.8179e-06, 8.8434e-07,
        9.9548e-07, 3.8617e-07, 5.8592e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.9578e-05, 6.4839e-07, 1.3389e-06, 1.0489e-07, 1.2313e-06, 5.4730e-07,
        3.2180e-06, 9.9772e-07, 3.5039e-07, 6.4744e-07, 1.0038e-05, 5.8690e-07,
        6.1711e-06, 2.3466e-06, 1.5678e-06, 6.3384e-06, 2.6057e-06, 6.0363e-06,
        2.0921e-06, 4.0145e-07, 8.9665e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6906e-05, 5.3093e-07, 1.0553e-06, 3.4577e-07, 1.3093e-06, 5.4128e-07,
        3.1990e-06, 9.5097e-07, 2.7973e-07, 6.1438e-07, 1.1924e-06, 8.8180e-07,
        1.5283e-06, 4.8630e-07, 1.8032e-06, 1.5663e-06, 1.0194e-06, 6.5220e-07,
        7.2725e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2798e-05, 2.7862e-07, 1.2740e-06, 1.3264e-07, 1.0705e-06, 4.5096e-07,
        3.8198e-06, 7.0951e-07, 2.8559e-07, 6.5683e-07, 8.4071e-06, 7.1824e-07,
        8.0285e-07, 5.3742e-06, 8.6272e-07, 1.0154e-06, 1.9101e-06, 3.3823e-07,
        8.8456e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9665e-05, 3.0157e-07, 8.5269e-07, 2.4352e-07, 7.3115e-07, 3.4651e-07,
        1.8561e-06, 7.5411e-07, 2.7072e-07, 6.7422e-07, 9.6328e-07, 4.6724e-07,
        9.0372e-07, 6.0020e-06, 1.8239e-06, 4.7858e-07, 8.0176e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1450: [tensor([3.1562e-05, 4.2576e-07, 3.1340e-07, 3.5714e-07, 7.9403e-07, 7.5285e-07,
        3.8739e-06, 2.3465e-07, 6.7549e-07, 8.0344e-07, 7.2407e-07, 8.3094e-07,
        6.9715e-07, 8.1315e-07, 7.0355e-07, 1.4590e-06, 1.4272e-05, 8.0881e-07,
        2.4629e-06, 8.0235e-07, 8.3998e-07, 4.5556e-07, 8.0732e-07, 5.6521e-07,
        1.3975e-06, 8.6624e-07, 7.9616e-07, 6.2961e-06, 1.4639e-05, 5.6842e-07,
        6.5112e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9745e-05,  9.2507e-07,  4.7340e-07,  4.6839e-07,  7.8721e-07,
         5.8609e-07,  3.4098e-06,  2.0871e-07,  6.8232e-07,  8.1019e-07,
         5.3543e-07,  5.2811e-07,  6.1331e-07,  6.2345e-07,  6.6610e-07,
         1.0353e-06,  8.1685e-06,  7.7211e-07,  3.3576e-06,  4.2347e-07,
         9.2202e-07,  4.4375e-07,  9.2444e-07,  4.4069e-07,  1.1222e-05,
         6.8822e-07,  5.0745e-06,  2.1906e-06,  9.4742e-07, -2.7874e-09,
         6.2289e-07,  3.2299e-07,  6.3244e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5017e-05, 4.5197e-07, 4.3552e-07, 3.6775e-07, 7.3158e-07, 6.9647e-07,
        2.9005e-06, 1.2394e-07, 7.8090e-07, 6.2470e-07, 7.2183e-07, 5.8708e-07,
        5.1562e-07, 6.9026e-07, 9.0456e-07, 1.1173e-06, 1.4399e-05, 7.2924e-07,
        2.4697e-06, 5.0777e-07, 8.3199e-07, 5.4634e-07, 8.2375e-07, 4.1425e-07,
        2.9637e-07, 2.3230e-06, 7.6108e-07, 2.5600e-07, 6.2596e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.2658e-05, 7.0952e-07, 5.8890e-07, 3.5035e-07, 8.6637e-07, 4.2628e-07,
        2.9138e-06, 1.5463e-07, 5.9479e-07, 9.6818e-07, 8.1499e-07, 6.5539e-07,
        4.7098e-07, 6.2972e-07, 8.6811e-07, 1.3656e-06, 9.3028e-06, 4.9945e-07,
        1.8339e-06, 4.2913e-07, 9.1494e-07, 4.7604e-07, 8.0026e-07, 6.6988e-07,
        1.6074e-05, 1.1504e-06, 7.2418e-07, 5.0299e-06, 6.5033e-07, 4.0820e-06,
        7.0815e-06, 5.8265e-07, 3.1839e-07, 6.0599e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0628e-05, 7.7582e-07, 4.0914e-07, 5.5206e-07, 1.2268e-06, 6.2841e-07,
        3.2403e-06, 3.1415e-07, 9.3975e-07, 7.8374e-07, 9.1864e-07, 7.5319e-07,
        6.0347e-07, 7.0506e-07, 1.0564e-06, 1.2739e-06, 1.0398e-05, 6.7989e-07,
        3.1438e-06, 6.3081e-07, 1.1616e-06, 5.6719e-07, 1.0956e-06, 5.1161e-07,
        1.7683e-05, 1.9612e-06, 8.0419e-07, 7.6760e-06, 8.1851e-07, 1.7429e-07,
        7.5979e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7293e-05, 7.2228e-07, 6.5089e-07, 4.1742e-07, 7.4012e-07, 4.2616e-07,
        3.1932e-06, 1.9116e-07, 7.2571e-07, 5.3930e-07, 1.0314e-06, 5.0066e-07,
        3.8078e-07, 5.9736e-07, 7.8871e-07, 9.2027e-07, 1.8981e-05, 6.2672e-07,
        2.1221e-06, 5.8266e-07, 1.0272e-06, 6.1515e-07, 9.8272e-07, 6.9325e-07,
        2.9949e-05, 6.8497e-07, 6.6932e-06, 7.5821e-07, 8.3096e-06, 7.3557e-07,
        3.4910e-07, 4.5886e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8068e-05, 3.5434e-07, 1.8906e-07, 2.3551e-07, 7.0236e-07, 4.5800e-07,
        1.7037e-06, 1.5334e-07, 5.8295e-07, 3.9172e-07, 6.3219e-07, 2.7619e-07,
        3.1808e-07, 5.4371e-07, 6.3679e-07, 6.5707e-07, 5.9322e-06, 4.9987e-07,
        1.0809e-06, 3.7237e-07, 5.8622e-07, 3.7187e-07, 4.9574e-07, 2.4791e-07,
        2.2238e-06, 5.8969e-07, 5.2385e-06, 4.5894e-07, 9.8019e-07, 3.2575e-07,
        7.2356e-07, 2.2516e-06, 5.7047e-07, 1.2988e-07, 5.1533e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.9167e-05, 7.9040e-07, 4.9638e-07, 4.7976e-07, 9.3551e-07, 7.7507e-07,
        9.1576e-06, 1.9960e-07, 1.1025e-06, 8.2854e-07, 9.8714e-07, 6.3163e-07,
        5.8838e-07, 1.1686e-06, 1.0605e-06, 1.5760e-06, 1.4027e-05, 9.1538e-07,
        2.5947e-06, 6.7756e-07, 1.1034e-06, 7.0971e-07, 1.3332e-06, 4.4990e-07,
        1.9541e-05, 2.3485e-06, 8.9042e-07, 9.8253e-06, 5.4153e-07, 4.8651e-07,
        9.4086e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0944e-05, 5.1121e-07, 3.4361e-07, 2.9525e-07, 6.4287e-07, 3.0717e-07,
        3.4842e-06, 1.8268e-07, 6.2402e-07, 6.2123e-07, 4.8807e-07, 4.6093e-07,
        4.9341e-07, 6.8856e-07, 8.1563e-07, 6.9723e-07, 9.0425e-06, 6.4139e-07,
        1.5572e-06, 2.2895e-07, 8.3104e-07, 6.0596e-07, 7.0196e-07, 4.1231e-07,
        1.3642e-05, 6.6643e-07, 4.5011e-07, 1.0125e-06, 8.5091e-07, 7.7635e-07,
        4.1150e-07, 1.0184e-06, 6.9860e-07, 4.8886e-07, 1.0464e-06, 1.5220e-06,
        9.5571e-07, 8.4110e-07, 5.3369e-07, 4.4564e-07, 1.7546e-07, 4.6508e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0755e-05, 4.7790e-07, 2.6791e-07, 2.4254e-07, 5.8217e-07, 5.8937e-07,
        3.3261e-06, 1.7076e-07, 5.6444e-07, 7.2396e-07, 8.9042e-07, 5.2446e-07,
        5.7035e-07, 6.5309e-07, 8.3521e-07, 9.5218e-07, 1.0501e-05, 5.3258e-07,
        2.6696e-06, 3.7002e-07, 6.3984e-07, 4.9100e-07, 9.4226e-07, 4.2741e-07,
        8.7268e-07, 6.9844e-07, 9.5845e-07, 4.6628e-07, 3.2996e-07, 1.7053e-07,
        6.6203e-07, 6.1367e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9561e-05, 5.2531e-07, 4.7477e-07, 4.7653e-07, 5.2696e-07, 5.3909e-07,
        2.2384e-06, 2.4161e-07, 5.5026e-07, 6.1567e-07, 7.8718e-07, 4.1011e-07,
        3.1335e-07, 4.6610e-07, 9.1792e-07, 1.0723e-06, 9.2765e-06, 4.9639e-07,
        2.1239e-06, 4.4983e-07, 7.0438e-07, 4.2439e-07, 1.0673e-06, 3.7793e-07,
        5.1641e-07, 5.5364e-07, 7.2422e-07, 3.1915e-06, 8.1702e-07, 8.7321e-07,
        7.8350e-07, 5.2488e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0069e-05, 6.0109e-07, 3.3324e-07, 2.7426e-07, 7.0160e-07, 5.0113e-07,
        2.5948e-06, 6.6477e-08, 5.0925e-07, 5.6138e-07, 5.8635e-07, 4.3444e-07,
        5.1094e-07, 4.4832e-07, 7.0659e-07, 1.2189e-06, 6.1568e-06, 5.7678e-07,
        1.6410e-06, 4.5570e-07, 7.4922e-07, 6.7828e-07, 6.4714e-07, 3.4609e-07,
        9.6997e-06, 9.5926e-07, 5.9575e-07, 5.1007e-07, 7.4768e-06, 6.3012e-07,
        1.1573e-06, 3.2737e-06, 7.5799e-07, 1.2457e-06, 1.2519e-05, 7.4102e-07,
        1.3925e-06, 1.1769e-06, 4.1115e-07, 6.2787e-07, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1500: [tensor([3.2071e-05, 5.3343e-06, 1.7542e-06, 8.0157e-07, 1.2169e-06, 7.9389e-07,
        1.1693e-06, 9.0068e-07, 4.2601e-06, 1.2957e-06, 2.7011e-07, 8.7540e-07,
        4.3452e-06, 7.2279e-07, 2.3299e-06, 1.0548e-05, 4.0002e-06, 1.2070e-06,
        6.3765e-07, 8.5028e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.2494e-05, 3.8371e-06, 1.4430e-06, 6.1152e-07, 9.5515e-07, 5.4179e-07,
        7.9005e-07, 6.3206e-07, 3.1358e-06, 1.6042e-06, 7.9192e-08, 5.9943e-07,
        3.9219e-06, 1.1231e-06, 1.3520e-06, 3.4015e-06, 1.4055e-06, 1.5910e-06,
        1.1116e-06, 4.3099e-07, 8.4004e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.0647e-05, 6.4630e-06, 1.2447e-06, 8.7922e-07, 4.1385e-06, 5.5660e-07,
        4.1721e-07, 7.7616e-07, 5.1752e-07, 3.2645e-07, 5.8793e-07, 1.9998e-06,
        9.8093e-06, 1.2982e-05, 1.9501e-06, 4.9606e-07, 6.9401e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.8281e-05, 8.9284e-06, 1.3479e-06, 8.3293e-07, 3.6242e-06, 1.0297e-06,
        1.6738e-07, 8.1279e-07, 7.6111e-07, 1.7289e-07, 5.1145e-07, 4.3541e-06,
        1.6918e-06, 1.7007e-06, 1.6352e-06, 5.2564e-06, 1.2282e-06, 4.8182e-07,
        1.3087e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.9112e-05, 5.5284e-06, 1.0422e-06, 5.8460e-07, 3.3802e-06, 5.2445e-07,
        1.3232e-07, 5.6656e-07, 6.3556e-07, 2.4618e-07, 4.2740e-07, 3.4639e-06,
        1.4963e-06, 1.4005e-06, 1.2980e-06, 9.2320e-07, 6.4631e-07, 1.0212e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.9205e-05, 3.3916e-06, 7.8520e-07, 7.4304e-07, 8.5463e-07, 2.6617e-07,
        1.6969e-06, 3.0007e-06, 4.7226e-07, 6.8456e-07, 5.0108e-08, 3.0882e-07,
        8.2494e-07, 1.4362e-06, 5.2840e-07, 8.9636e-07, 1.0333e-05, 4.7930e-07,
        7.3022e-07, 7.2559e-07, 4.7346e-07, 3.3594e-07, 1.4356e-05, 6.5767e-07,
        1.4174e-06, 1.4374e-07, 7.3839e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5894e-05, 3.9695e-06, 1.7780e-06, 1.1928e-06, 2.1312e-06, 3.4108e-07,
        2.6868e-06, 5.6828e-06, 9.7374e-07, 1.4478e-06, 2.7818e-07, 7.2958e-07,
        2.1182e-06, 3.1192e-06, 2.2991e-06, 1.9241e-06, 8.4196e-06, 7.8719e-06,
        6.3507e-07, 5.9933e-07, 8.5132e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0380e-05,  6.2387e-06,  1.2142e-06,  1.1242e-06,  1.7657e-06,
         4.0370e-07,  2.5605e-06,  4.1487e-06,  6.0367e-07,  1.1740e-06,
        -4.0533e-09,  4.1562e-07,  1.1535e-06,  1.8588e-06,  8.2157e-07,
         2.0505e-06,  2.0960e-05,  8.4772e-07,  1.3634e-06,  4.8078e-07,
         7.7281e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0635e-05, 8.7176e-06, 1.6790e-06, 1.0888e-06, 3.6343e-06, 6.3454e-06,
        9.7816e-07, 8.3839e-07, 1.5465e-06, 2.0846e-06, 8.8582e-07, 2.1053e-07,
        8.4776e-07, 2.3306e-06, 4.6829e-06, 2.4387e-06, 1.3345e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.8996e-05, 7.4926e-06, 1.3512e-06, 8.8488e-07, 2.6771e-06, 5.0372e-06,
        8.8895e-07, 8.5841e-07, 2.3056e-06, 1.9194e-06, 1.2297e-06, 1.5723e-07,
        9.4449e-07, 1.2474e-05, 1.0830e-06, 1.7673e-06, 1.3895e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.3238e-05, 9.9814e-06, 1.7767e-06, 1.1746e-06, 3.1796e-06, 5.6330e-06,
        7.1277e-07, 8.8584e-07, 1.3565e-06, 2.4100e-06, 9.1342e-07, 2.6330e-07,
        1.0766e-06, 1.2014e-05, 4.3416e-07, 1.3453e-06, 3.2957e-06, 1.8146e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.3208e-05, 3.9185e-06, 6.3696e-07, 4.9226e-07, 4.7886e-07, 3.3010e-07,
        7.0984e-07, 4.7290e-07, 7.6330e-07, 2.7667e-07, 3.3634e-07, 2.6675e-07,
        4.1089e-07, 6.2405e-07, 8.6651e-08, 3.7583e-07, 9.4604e-07, 1.0978e-06,
        1.1790e-06, 1.0484e-06, 1.8081e-06, 3.3993e-07, 2.8012e-06, 3.2232e-06,
        7.2578e-07, 3.9421e-07, 4.8243e-07], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1550: [tensor([3.3043e-05, 9.8682e-06, 1.7204e-06, 1.2233e-06, 1.8793e-06, 1.3502e-06,
        1.0795e-06, 6.8062e-07, 1.1925e-06, 2.5761e-06, 4.9755e-08, 8.4037e-07,
        1.8858e-06, 8.9583e-06, 1.6735e-06, 1.3947e-06, 4.2960e-05, 2.3225e-06,
        1.1945e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.5568e-05, 4.9614e-06, 8.1991e-07, 1.0441e-06, 1.3536e-06, 8.5339e-07,
        9.0694e-07, 3.6598e-07, 8.8249e-07, 1.5123e-06, 1.3247e-07, 4.8203e-07,
        4.1160e-06, 2.1794e-06, 5.9720e-06, 2.9639e-06, 7.6863e-07, 1.8596e-06,
        1.1738e-06, 7.3170e-07, 1.1661e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6431e-05, 4.0227e-06, 1.4430e-06, 1.1092e-06, 1.2150e-06, 7.7682e-07,
        8.4019e-07, 4.9745e-07, 1.0173e-06, 1.4741e-06, 1.0287e-07, 7.4068e-07,
        1.9377e-05, 7.6287e-07, 6.4955e-06, 1.9929e-06, 6.5326e-07, 1.7003e-06,
        2.3724e-06, 1.5479e-06, 8.8622e-07, 2.8152e-07, 1.0546e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2177e-05, 7.3619e-06, 1.2264e-06, 1.1879e-06, 9.5222e-07, 5.8331e-07,
        8.2264e-07, 4.8673e-07, 9.1942e-07, 1.1777e-06, 8.4237e-08, 7.0471e-07,
        9.7268e-06, 8.2193e-07, 5.4096e-06, 1.4312e-06, 1.1820e-06, 1.9902e-06,
        1.1134e-06, 5.9289e-07, 1.1998e-06, 4.5964e-07, 1.2336e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7697e-05,  7.1358e-06,  9.9609e-07,  9.2927e-07,  1.2430e-06,
         7.2952e-07,  8.6636e-07,  3.4884e-07,  1.1014e-06,  1.6470e-06,
        -2.3842e-08,  8.5162e-07,  1.3489e-05,  1.5443e-06,  2.2625e-06,
         5.0370e-07,  1.5897e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0918e-05, 6.0730e-06, 1.1662e-06, 1.2448e-06, 1.3369e-06, 8.7821e-07,
        1.0432e-06, 4.9022e-07, 1.0090e-06, 1.7944e-06, 7.9880e-08, 5.8379e-07,
        4.7490e-06, 1.5720e-06, 7.8098e-06, 1.8068e-06, 1.2258e-06, 7.1583e-07,
        5.7136e-07, 1.2704e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5903e-05, 3.5111e-06, 3.5945e-07, 3.7873e-07, 8.0245e-07, 2.7224e-07,
        4.1033e-07, 3.0036e-07, 5.5676e-06, 3.7147e-07, 6.1054e-07, 5.1206e-07,
        5.3080e-07, 1.8052e-06, 4.3540e-06, 3.2848e-07, 6.1758e-07, 6.8156e-07,
        7.1071e-08, 4.0600e-07, 3.4870e-06, 5.3142e-07, 4.8269e-06, 3.0585e-07,
        5.2984e-06, 2.0335e-05, 6.5130e-07, 4.6924e-07, 7.8816e-07, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9317e-05, 3.2765e-06, 4.4573e-07, 3.0682e-07, 8.1147e-07, 2.4286e-07,
        3.6175e-07, 4.3738e-07, 5.2011e-06, 5.3509e-07, 5.7211e-07, 3.9508e-07,
        5.6183e-07, 1.4715e-06, 4.8604e-06, 1.8924e-07, 9.0312e-07, 3.7625e-07,
        2.0403e-07, 3.0986e-07, 2.9077e-06, 4.0605e-07, 6.8951e-07, 1.2602e-06,
        1.6263e-06, 5.7037e-07, 4.9037e-07, 1.1544e-06, 2.7559e-07, 5.2829e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7822e-05, 2.9040e-06, 2.5414e-07, 7.2856e-08, 1.2375e-06, 3.5305e-07,
        4.8621e-07, 4.4591e-07, 5.5554e-06, 7.7520e-07, 7.3763e-07, 5.6440e-07,
        7.0739e-07, 1.7032e-06, 4.5091e-06, 3.0234e-07, 1.1281e-06, 7.2322e-07,
        1.4490e-07, 4.0110e-07, 3.5383e-06, 8.1856e-07, 7.7597e-06, 4.4204e-06,
        4.0541e-07, 3.2633e-07, 6.9082e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1177e-05, 5.2495e-06, 3.5756e-07, 9.7598e-07, 4.6820e-07, 5.8516e-07,
        5.2655e-07, 4.6081e-07, 1.1920e-06, 3.8391e-07, 3.2605e-06, 7.0559e-07,
        3.8942e-07, 6.3455e-07, 7.3987e-07, 8.3746e-07, 1.9389e-06, 1.2565e-06,
        2.2637e-06, 5.3787e-06, 1.3356e-06, 4.0213e-07, 9.3674e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.4066e-05, 6.1976e-06, 7.7521e-07, 2.2004e-06, 8.5000e-07, 7.0226e-07,
        7.0550e-07, 9.4414e-07, 1.6990e-06, 6.5362e-07, 5.6971e-06, 1.5901e-06,
        3.9217e-07, 6.0205e-07, 6.9567e-07, 1.4388e-06, 5.6751e-06, 7.6770e-06,
        1.5101e-06, 6.9451e-07, 9.2662e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5078e-05, 6.6676e-06, 6.7467e-07, 1.7581e-06, 5.4741e-07, 7.1794e-07,
        9.5364e-07, 7.6117e-07, 2.1331e-06, 3.9460e-07, 4.1881e-06, 1.4102e-06,
        3.1906e-07, 5.4861e-07, 5.0967e-06, 1.1856e-06, 2.1324e-06, 1.5021e-06,
        2.9371e-06, 2.7176e-06, 9.2098e-06, 1.1361e-06, 1.2421e-06, 3.4849e-07,
        1.1393e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1600: [tensor([1.9759e-05, 6.4346e-06, 1.2678e-06, 2.1690e-06, 2.0796e-06, 3.2409e-06,
        5.2606e-06, 1.3698e-06, 3.0755e-07, 3.6355e-07, 6.5574e-07, 1.0685e-05,
        1.0342e-06, 2.5698e-06, 2.9690e-06, 1.0388e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9761e-05, 1.0088e-06, 5.5893e-07, 4.3315e-06, 8.4952e-07, 1.8287e-06,
        2.7277e-06, 6.9036e-07, 2.5096e-07, 9.0804e-07, 8.1933e-06, 1.5345e-06,
        1.0511e-06, 8.7089e-07, 1.0299e-05, 9.3583e-07, 6.9897e-07, 9.7357e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0203e-05, 7.1443e-07, 1.1257e-06, 5.2057e-06, 1.3564e-06, 3.4797e-06,
        2.7785e-06, 5.9137e-07, 5.2337e-07, 7.8436e-07, 9.6999e-07, 1.7139e-06,
        1.1981e-06, 1.9158e-06, 2.1617e-06, 7.1510e-06, 1.6890e-06, 8.3965e-07,
        7.7479e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.6829e-05, 6.3636e-07, 8.2022e-07, 5.1017e-06, 7.6873e-07, 1.5728e-06,
        3.0190e-06, 6.6337e-07, 4.0155e-07, 8.9471e-07, 9.9567e-06, 9.2935e-07,
        1.7258e-06, 4.9369e-06, 4.4002e-06, 1.1554e-06, 7.0916e-07, 1.1313e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4631e-05, 1.1530e-06, 1.1941e-06, 8.3964e-07, 1.7075e-06, 1.7788e-06,
        1.2286e-06, 4.7095e-06, 9.9875e-07, 3.3973e-07, 6.0891e-07, 1.1904e-05,
        2.0857e-06, 1.4108e-06, 6.9307e-06, 2.4401e-06, 3.0640e-06, 1.1420e-05,
        1.0018e-06, 1.4447e-06, 3.6743e-06, 1.1927e-06, 1.4377e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5742e-05, 9.4691e-07, 9.4992e-07, 8.0421e-07, 1.1214e-06, 8.2588e-07,
        6.4853e-07, 2.3013e-06, 6.6909e-07, 1.7274e-07, 6.2639e-07, 1.6402e-06,
        1.3632e-06, 1.0625e-06, 1.0853e-06, 6.4965e-07, 1.1746e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8229e-05, 9.8653e-07, 1.3441e-06, 8.4901e-07, 1.4533e-06, 1.5453e-06,
        1.0262e-06, 3.0242e-06, 5.8143e-07, 1.9489e-07, 5.0067e-07, 1.2399e-05,
        1.8758e-06, 1.5260e-06, 6.3300e-06, 1.4246e-06, 2.9954e-06, 1.5652e-06,
        1.2993e-06, 1.5363e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.5847e-05, 6.1368e-06, 4.9435e-07, 7.6342e-07, 5.7333e-07, 1.2507e-06,
        4.6857e-07, 4.7331e-07, 5.0108e-07, 6.6341e-07, 4.0731e-07, 4.5413e-07,
        6.7687e-07, 5.1578e-07, 1.0001e-06, 2.6291e-06, 6.2937e-07, 4.0289e-07,
        5.5495e-07, 4.6569e-06, 6.2795e-07, 3.3016e-06, 6.7632e-07, 7.0691e-07,
        4.4115e-07, 9.2157e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.3797e-05, 5.1013e-06, 9.1657e-07, 5.6651e-07, 8.7347e-07, 1.6236e-06,
        7.9023e-07, 8.4446e-07, 6.0311e-07, 7.1873e-07, 8.1550e-07, 6.1447e-07,
        8.1025e-07, 2.4718e-07, 1.4376e-06, 3.0956e-06, 1.2352e-06, 3.9311e-07,
        8.5823e-07, 5.7552e-06, 1.1418e-06, 9.5341e-06, 1.3233e-06, 5.0688e-07,
        1.3228e-06, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5979e-05, 4.0960e-06, 3.7503e-07, 6.0242e-07, 5.8142e-07, 1.1549e-06,
        4.5099e-07, 6.4777e-07, 2.4590e-07, 7.2788e-07, 3.9736e-07, 5.8587e-07,
        9.1629e-07, 5.3308e-07, 8.0563e-07, 2.3666e-06, 1.0002e-06, 2.3151e-07,
        5.4266e-07, 3.7590e-06, 1.0742e-06, 7.1739e-06, 6.3351e-07, 3.6265e-07,
        8.1941e-07, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6813e-05, 4.3415e-06, 1.0092e-06, 4.8459e-07, 3.0437e-06, 1.0225e-06,
        9.3346e-07, 2.0937e-06, 2.6133e-06, 7.5583e-07, 5.6639e-07, 4.5770e-07,
        8.6461e-06, 8.8755e-07, 7.9641e-07, 3.5558e-07, 7.8834e-07, 5.1457e-06,
        8.6139e-07, 5.6677e-06, 9.5554e-07, 1.6778e-06, 2.1286e-05, 1.3358e-06,
        4.9708e-07, 9.6989e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0617e-05, 8.4003e-06, 6.7651e-07, 6.6297e-07, 3.0873e-06, 8.5280e-07,
        8.6466e-07, 2.6826e-06, 3.3259e-06, 8.4703e-07, 6.7948e-07, 5.9951e-07,
        5.2607e-06, 8.2024e-07, 5.4864e-07, 3.1844e-07, 9.8247e-07, 5.6872e-06,
        8.8844e-07, 9.3666e-07, 8.6502e-06, 9.7557e-07, 1.1072e-06, 6.0779e-07,
        9.0513e-07, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1650: [tensor([2.7631e-05, 6.4325e-06, 9.4718e-07, 5.1317e-07, 6.3540e-07, 1.0979e-06,
        1.0127e-06, 3.4864e-06, 6.4063e-07, 3.3886e-06, 6.7176e-06, 1.3221e-06,
        8.2042e-07, 3.9137e-06, 4.9177e-06, 6.2137e-07, 5.3307e-07, 4.7270e-07,
        5.6988e-07, 2.8669e-07, 5.7293e-07, 8.3397e-07, 1.6970e-06, 3.2668e-06,
        1.3955e-06, 6.1617e-06, 1.3129e-05, 1.2226e-06, 4.3790e-07, 6.1082e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1460e-05, 6.7172e-07, 5.2212e-07, 1.9278e-06, 3.8160e-07, 1.0046e-07,
        4.0890e-06, 2.7897e-07, 6.2868e-07, 2.8429e-06, 1.7912e-06, 4.0169e-07,
        6.4309e-07, 4.7076e-06, 8.3901e-07, 3.8743e-07, 1.3020e-06, 7.8856e-07,
        5.9345e-07, 2.6156e-07, 2.0790e-06, 3.6744e-07, 1.1402e-06, 2.5411e-06,
        1.0371e-06, 2.9295e-07, 3.6265e-07, 7.7066e-07, 7.7728e-07, 1.5259e-06,
        7.0840e-07, 8.3766e-07, 9.7607e-07, 2.9484e-07, 1.1687e-05, 7.8276e-07,
        8.0426e-07, 6.1098e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2077e-05, 6.4806e-07, 7.0620e-07, 1.9829e-06, 6.4947e-07, 8.5461e-08,
        3.3040e-06, 4.5503e-07, 9.2653e-07, 2.9945e-06, 3.0980e-06, 3.3368e-07,
        9.6941e-07, 9.0282e-06, 1.1112e-06, 7.5773e-07, 1.1432e-06, 8.2032e-07,
        8.7008e-07, 2.8068e-07, 3.2328e-06, 5.1337e-07, 9.8370e-07, 2.1162e-06,
        1.3712e-06, 2.6609e-07, 5.9692e-07, 1.2662e-06, 1.0174e-06, 2.3471e-06,
        6.1114e-07, 1.2602e-06, 6.2660e-07, 1.3402e-05, 8.7742e-07, 9.3277e-07,
        7.2552e-07, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1941e-05, 4.9378e-07, 6.2826e-07, 1.0654e-06, 3.4972e-07, 3.7547e-08,
        2.9720e-06, 2.7788e-07, 6.3276e-07, 2.0507e-06, 1.3778e-06, 3.0393e-07,
        9.2299e-07, 6.5580e-06, 6.4022e-07, 4.3394e-07, 1.1025e-06, 6.6457e-07,
        6.4573e-07, 1.3899e-07, 1.3511e-06, 4.4856e-07, 8.9019e-07, 1.7943e-06,
        1.4395e-06, 2.0702e-07, 4.0789e-07, 5.9402e-07, 6.5646e-07, 1.6716e-06,
        6.2692e-07, 3.9635e-07, 8.0853e-06, 4.5912e-07, 7.6703e-07, 6.4206e-07,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2283e-05, 4.1129e-06, 6.9365e-07, 1.3359e-06, 6.0445e-06, 4.9106e-07,
        5.1613e-07, 3.5563e-07, 6.1688e-07, 3.0428e-06, 5.8040e-07, 4.6270e-07,
        7.8034e-07, 5.2265e-07, 6.6752e-07, 5.7832e-07, 4.7374e-06, 4.3725e-07,
        1.8309e-07, 3.5157e-07, 3.2679e-06, 8.5002e-07, 6.9801e-06, 1.1299e-06,
        5.3504e-07, 7.4006e-07, 1.2808e-06, 1.0702e-06, 7.7129e-07, 3.3208e-07,
        6.6963e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6692e-05, 1.1321e-05, 1.6102e-06, 2.9342e-06, 1.1235e-05, 8.7270e-07,
        9.9563e-07, 8.3736e-07, 1.3248e-06, 5.7334e-06, 1.1925e-06, 9.6166e-07,
        1.7459e-06, 8.1269e-07, 7.8404e-07, 7.4732e-07, 1.1572e-05, 9.4028e-07,
        4.8255e-07, 9.7671e-07, 6.8194e-06, 1.9144e-06, 2.3274e-06, 1.3595e-06,
        2.8590e-06, 9.9530e-07, 1.0051e-06, 1.0266e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.5753e-05, 3.8040e-06, 7.7973e-07, 1.6921e-06, 3.4162e-06, 5.0914e-07,
        6.8792e-07, 3.2243e-07, 7.0406e-07, 3.4200e-06, 7.1942e-07, 3.8430e-07,
        9.0410e-07, 5.9883e-07, 5.0075e-07, 4.7901e-07, 5.2295e-06, 6.4861e-07,
        9.9122e-08, 5.1846e-07, 1.4249e-06, 8.1405e-06, 1.8436e-05, 9.1856e-07,
        5.7812e-07, 1.1725e-06, 7.8977e-07, 1.6197e-06, 3.7848e-07, 7.6463e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8212e-05, 1.2616e-05, 2.8965e-06, 4.7490e-07, 9.5815e-06, 1.4206e-06,
        1.8852e-06, 7.4285e-07, 1.1662e-06, 4.7206e-07, 1.0097e-06, 7.9934e-06,
        2.8048e-06, 5.5401e-06, 1.7485e-06, 2.3600e-06, 1.6461e-05, 1.9407e-06,
        1.4728e-06, 2.0777e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7686e-05, 8.4848e-06, 1.5418e-06, 7.3936e-07, 7.2856e-06, 1.2426e-06,
        1.3490e-06, 3.9441e-07, 1.1142e-06, 3.5167e-07, 8.3505e-07, 1.1622e-05,
        1.3499e-06, 9.7570e-06, 2.1216e-06, 2.4376e-06, 3.3793e-06, 1.9686e-06,
        1.9830e-06, 1.1732e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7308e-05, 7.3121e-06, 1.5434e-06, 1.3279e-06, 5.0253e-06, 1.3759e-06,
        1.6147e-06, 1.8619e-07, 1.0761e-06, 4.5484e-07, 1.0006e-06, 3.1455e-06,
        1.4692e-05, 2.2915e-05, 1.5102e-06, 1.3220e-06, 1.2146e-05, 3.2916e-06,
        1.2370e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2591e-05, 4.6261e-07, 3.9764e-07, 5.3737e-07, 5.3828e-07, 2.4937e-07,
        4.7380e-07, 2.4268e-07, 6.4536e-07, 1.6673e-07, 4.1489e-07, 2.7508e-07,
        6.5771e-07, 3.5992e-06, 1.1464e-07, 6.6352e-07, 1.4962e-07, 2.5642e-07,
        6.3475e-07, 6.4762e-07, 5.2513e-07, 6.5753e-07, 3.8616e-07, 7.9163e-07,
        6.6055e-07, 8.3481e-07, 6.8258e-07, 3.8842e-07, 1.1875e-06, 4.0603e-07,
        1.1353e-06, 6.3129e-06, 2.1763e-07, 1.0308e-06, 3.3636e-07, 3.7243e-07,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9770e-05, 8.9462e-07, 6.1153e-07, 9.9341e-07, 1.5597e-06, 4.1498e-07,
        6.2033e-07, 6.4590e-07, 1.1685e-06, 3.7363e-07, 9.1354e-07, 4.1663e-07,
        1.2340e-06, 7.8416e-06, 4.8136e-07, 2.2374e-06, 4.1177e-07, 9.1265e-07,
        1.9818e-06, 2.6699e-06, 1.3831e-06, 1.0368e-06, 7.8840e-07, 1.8151e-06,
        1.8855e-05, 4.7773e-07, 1.3428e-06, 4.7964e-07, 7.8819e-07, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1700: [tensor([2.0000e-05, 5.7157e-06, 1.5241e-06, 1.2190e-07, 6.0238e-07, 1.1529e-06,
        1.0407e-06, 5.0357e-07, 2.6351e-07, 7.3849e-07, 3.9158e-06, 1.3870e-06,
        2.1502e-06, 1.2950e-06, 1.6476e-06, 3.8869e-06, 9.6401e-07, 5.8225e-07,
        9.7024e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.0894e-05, 1.8934e-06, 9.1769e-07, 2.1494e-06, 1.6427e-06, 2.9897e-06,
        1.0993e-06, 1.3084e-06, 4.4834e-07, 1.0841e-06, 2.0592e-06, 6.4376e-07,
        1.6100e-06, 3.6198e-06, 9.9326e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.4740e-05, 1.5771e-06, 7.4370e-07, 1.9788e-06, 1.5586e-06, 1.4592e-06,
        7.5234e-07, 1.6197e-06, 4.2300e-07, 8.6910e-07, 1.9360e-06, 1.1301e-06,
        2.4758e-06, 1.2656e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.0462e-05, 3.0570e-06, 1.1393e-06, 4.0297e-06, 2.1882e-06, 3.2394e-06,
        9.5325e-07, 1.9001e-06, 8.6716e-07, 1.3219e-06, 2.1908e-06, 1.3508e-06,
        1.5318e-06, 3.8086e-06, 1.7769e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([6.3273e-05, 1.0081e-06, 1.1441e-06, 4.7857e-07, 7.0521e-07, 7.3627e-07,
        7.5141e-07, 4.2375e-06, 1.7147e-06, 6.4483e-07, 6.6180e-07, 4.8981e-07,
        2.6667e-07, 5.3182e-06, 2.0898e-07, 9.4271e-07, 1.2240e-06, 6.5689e-07,
        7.2732e-07, 9.4521e-06, 4.7664e-07, 3.0842e-07, 2.5349e-07, 6.4958e-07,
        5.4502e-07, 8.0788e-07, 4.2384e-06, 1.0335e-06, 6.6542e-06, 3.3072e-06,
        1.2993e-07, 9.9436e-07, 3.1127e-08, 6.4021e-07, 1.2430e-06, 1.4478e-06,
        1.4495e-06, 8.7138e-07, 5.0928e-07, 7.6440e-07, 8.6131e-07, 4.8467e-06,
        1.8754e-06, 3.7313e-07, 9.5893e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.7270e-05, 4.4602e-07, 4.8167e-07, 3.4147e-07, 3.7504e-07, 2.8260e-07,
        4.9253e-07, 1.9882e-06, 8.2738e-07, 3.8342e-07, 1.5107e-07, 2.9467e-07,
        2.2226e-07, 3.4704e-06, 1.6181e-07, 4.0863e-07, 8.7125e-07, 3.0602e-07,
        4.0196e-07, 4.9825e-06, 1.1124e-07, 1.5725e-07, 3.0432e-07, 4.5412e-07,
        2.6275e-07, 3.1549e-07, 2.0541e-06, 6.6602e-07, 1.9459e-06, 1.8013e-06,
        8.1730e-08, 4.4858e-07, 7.2362e-08, 2.5879e-07, 5.8693e-07, 7.4930e-07,
        5.4267e-07, 6.3978e-07, 3.1259e-07, 7.4356e-07, 4.2904e-07, 5.6721e-07,
        1.0845e-06, 5.1125e-07, 1.1980e-05, 3.0391e-07, 5.0107e-07, 3.2723e-07,
        5.3476e-07, 4.6324e-07, 4.6470e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.9637e-05, 4.8817e-07, 7.0637e-07, 3.1564e-07, 7.0447e-07, 5.9596e-07,
        6.1907e-07, 2.9897e-06, 9.3159e-07, 5.0306e-07, 3.9319e-07, 4.0001e-07,
        3.7611e-07, 4.6482e-06, 1.7732e-07, 5.2315e-07, 1.1415e-06, 3.3291e-07,
        6.5266e-07, 5.2820e-06, 3.9014e-07, 8.8030e-08, 2.8747e-07, 5.6241e-07,
        6.7146e-07, 3.9646e-07, 2.5971e-06, 1.0332e-06, 3.6486e-06, 1.8608e-06,
        3.6911e-08, 9.3287e-07, 1.9890e-07, 3.7023e-07, 3.4319e-06, 1.0048e-06,
        3.2682e-06, 1.1686e-06, 1.4902e-06, 2.8766e-06, 1.8512e-07, 1.3278e-06,
        5.2197e-07, 5.8733e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.9355e-05, 8.7557e-06, 1.2721e-06, 7.2436e-06, 1.3488e-06, 9.7210e-07,
        2.7033e-06, 4.8743e-06, 6.7854e-07, 1.8886e-06, 5.4716e-07, 4.8242e-07,
        1.4936e-06, 1.5904e-05, 2.1275e-06, 1.5019e-05, 3.1341e-06, 1.8107e-05,
        1.4500e-06, 8.6264e-07, 1.6865e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.3746e-05, 7.0611e-06, 1.3717e-06, 4.8183e-06, 1.3344e-06, 8.3463e-07,
        2.2626e-06, 6.4871e-06, 6.4375e-07, 1.6520e-06, 7.2103e-07, 3.4721e-07,
        1.0481e-06, 9.8152e-06, 1.4534e-06, 1.3740e-06, 2.7555e-05, 1.1078e-06,
        6.2524e-07, 1.3823e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.1205e-05, 4.0806e-06, 6.4998e-07, 3.3689e-06, 6.0108e-07, 4.9298e-07,
        1.3977e-06, 3.6571e-06, 2.5623e-07, 9.3379e-07, 2.3941e-07, 2.3544e-07,
        5.0357e-07, 1.1342e-05, 7.7755e-07, 1.1740e-05, 1.6804e-06, 4.1007e-06,
        9.4187e-07, 1.6760e-06, 3.2636e-06, 2.6887e-07, 1.1904e-06, 5.0230e-07,
        2.1652e-07, 5.1944e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.3159e-05, 8.0939e-06, 1.7154e-06, 1.3730e-06, 5.5016e-06, 4.9609e-06,
        1.8600e-06, 1.3407e-06, 1.2642e-05, 9.3701e-07, 1.3900e-07, 1.4206e-06,
        2.9995e-05, 3.2470e-06, 1.9297e-06, 1.8717e-05, 2.0802e-06, 4.7721e-07,
        2.1363e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.7236e-05, 1.0789e-05, 1.8644e-06, 1.8464e-06, 5.1768e-06, 4.0805e-06,
        1.5527e-06, 2.2885e-06, 1.3088e-05, 1.1494e-06, 2.0853e-07, 1.2360e-06,
        3.0762e-05, 2.4715e-06, 1.7699e-05, 2.3184e-06, 1.1231e-06, 2.4509e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1750: [tensor([4.4603e-05, 2.8128e-06, 4.0274e-07, 3.3272e-06, 6.1709e-06, 2.4679e-07,
        8.1539e-07, 7.6418e-07, 1.4533e-06, 1.6135e-06, 4.2650e-07, 5.7903e-06,
        3.4348e-07, 1.0565e-06, 4.3628e-07, 3.1080e-06, 6.5056e-07, 1.7418e-07,
        5.5206e-07, 4.1301e-06, 6.2541e-07, 5.7440e-06, 1.3164e-06, 1.3780e-06,
        2.8966e-05, 7.0045e-07, 1.2042e-06, 6.3072e-07, 4.6922e-06, 1.6029e-06,
        1.7087e-06, 9.0711e-07, 9.5470e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.8384e-05, 6.8820e-06, 5.9530e-07, 4.5461e-06, 6.6781e-06, 2.6989e-07,
        1.1754e-06, 7.7736e-07, 1.7044e-06, 1.1338e-06, 8.1953e-07, 6.6211e-06,
        4.0454e-07, 1.8944e-06, 1.0368e-06, 4.3627e-06, 8.4102e-07, 3.7237e-07,
        7.1354e-07, 5.4390e-06, 6.3008e-07, 1.8896e-06, 8.7959e-06, 1.2734e-06,
        1.0182e-06, 8.0328e-07, 1.2325e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.0327e-05, 4.1364e-06, 5.1186e-07, 4.2342e-06, 5.2625e-06, 3.3249e-07,
        9.1962e-07, 5.9094e-07, 1.3864e-06, 1.1579e-06, 5.5424e-07, 3.8498e-06,
        2.5169e-07, 7.4286e-07, 5.0082e-07, 2.5961e-06, 7.6451e-07, 2.9157e-07,
        7.4452e-07, 1.6059e-05, 8.1839e-07, 5.5026e-06, 2.0288e-05, 1.3219e-06,
        1.3820e-06, 1.1225e-05, 1.8056e-06, 1.8001e-06, 1.0170e-06, 8.2771e-06,
        4.8077e-07, 1.8314e-06, 1.0069e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6569e-05, 3.1521e-06, 7.5034e-07, 6.2006e-06, 2.4063e-07, 4.6099e-07,
        1.7394e-06, 5.1030e-06, 4.4819e-07, 1.6079e-06, 5.2375e-07, 5.4536e-06,
        8.4357e-07, 3.2041e-07, 6.7253e-07, 8.5970e-06, 1.2644e-06, 1.1897e-05,
        2.9278e-06, 1.1755e-06, 6.9876e-07, 1.0010e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.6853e-05, 3.2446e-06, 6.4397e-07, 6.1013e-06, 3.3080e-07, 6.5983e-07,
        1.9624e-06, 4.5163e-06, 5.7873e-07, 1.7931e-06, 5.6552e-07, 7.3979e-06,
        7.9195e-07, 3.0392e-07, 7.0133e-07, 5.6221e-06, 1.8994e-06, 7.7078e-06,
        1.5809e-06, 3.8977e-05, 1.5276e-06, 5.6948e-07, 1.2500e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.2718e-05, 4.7074e-06, 1.0173e-06, 5.5167e-06, 5.1199e-07, 4.0319e-07,
        2.2688e-06, 7.0771e-06, 2.9217e-07, 2.4895e-06, 8.4833e-07, 1.0654e-05,
        7.5034e-07, 4.9848e-07, 7.8331e-07, 6.2695e-06, 1.7651e-06, 8.2722e-06,
        1.7514e-06, 5.5384e-07, 1.3830e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.5698e-05, 4.7763e-06, 4.9659e-07, 3.1103e-06, 4.3514e-07, 5.9334e-07,
        8.2249e-07, 5.7163e-07, 4.9668e-07, 2.7001e-06, 2.8662e-07, 1.2706e-06,
        1.2006e-07, 5.9244e-07, 3.5049e-06, 8.6871e-07, 7.6618e-07, 1.1114e-06,
        1.0293e-06, 1.2061e-06, 1.0492e-06, 9.3478e-07, 6.3984e-06, 1.7881e-06,
        3.5305e-07, 8.4218e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.6347e-05, 7.7352e-06, 1.1592e-06, 4.8052e-06, 6.2441e-07, 8.8183e-07,
        5.7972e-07, 6.7975e-07, 6.3432e-07, 3.8975e-06, 3.5819e-07, 9.0327e-07,
        4.6220e-07, 1.0401e-06, 1.3662e-05, 1.1126e-06, 1.2099e-06, 9.4861e-07,
        5.6802e-06, 9.3194e-07, 1.9040e-06, 3.1260e-07, 1.2906e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.8756e-05, 5.0816e-06, 4.0388e-07, 4.4249e-06, 7.3988e-07, 5.3514e-07,
        6.1016e-07, 7.0167e-07, 5.7391e-07, 3.2184e-06, 2.3149e-07, 1.3276e-06,
        1.9705e-07, 6.9106e-07, 3.9861e-06, 6.9493e-07, 2.8226e-06, 9.8272e-07,
        9.7738e-07, 1.0079e-06, 9.8450e-07, 1.2926e-06, 6.7477e-07, 8.9907e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.1685e-05, 6.5605e-06, 1.5882e-06, 8.8090e-07, 2.8656e-07, 3.0870e-07,
        3.5362e-06, 7.3971e-07, 1.7619e-06, 2.0802e-06, 4.2261e-06, 9.0888e-07,
        2.4828e-07, 9.5405e-07, 2.2831e-05, 1.1231e-06, 1.7877e-05, 8.1536e-07,
        1.6407e-06, 6.8479e-06, 1.0236e-06, 5.9415e-07, 1.3585e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.6812e-05, 8.1971e-06, 1.4004e-06, 7.2833e-07, 3.2948e-07, 1.5876e-07,
        3.0702e-06, 7.9267e-07, 1.1138e-06, 1.4620e-06, 3.3612e-06, 1.4132e-06,
        3.5561e-07, 8.1347e-07, 1.2040e-05, 9.7794e-07, 1.0194e-05, 8.4401e-06,
        6.4890e-06, 5.9973e-07, 8.4428e-07, 1.0678e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.0807e-05, 1.0681e-05, 1.4825e-06, 1.1560e-06, 1.6092e-07, 4.9850e-07,
        3.7578e-06, 8.7626e-07, 1.8457e-06, 2.4156e-06, 4.7139e-06, 1.1843e-06,
        4.8219e-07, 9.8775e-07, 2.3882e-05, 9.0916e-07, 1.3160e-05, 1.4797e-06,
        1.4231e-06, 8.0157e-07, 1.2530e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1800: [tensor([3.4744e-05, 1.1489e-05, 8.0314e-07, 9.5894e-07, 9.3628e-07, 6.5773e-07,
        9.8185e-07, 9.5458e-07, 1.7521e-06, 3.9107e-06, 4.0819e-07, 4.1491e-07,
        9.7012e-07, 1.7032e-06, 8.8044e-07, 2.9415e-06, 2.4880e-05, 1.2183e-06,
        5.2088e-07, 1.3592e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6726e-05, 6.6582e-06, 8.7434e-07, 6.5991e-07, 7.6922e-07, 3.2495e-07,
        7.5549e-07, 7.2179e-07, 1.4474e-06, 4.1754e-06, 7.8828e-07, 2.3993e-07,
        9.3283e-07, 1.1855e-06, 8.0409e-07, 1.6953e-06, 2.9197e-06, 1.8912e-05,
        7.8899e-07, 6.0626e-07, 1.2822e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0383e-05, 7.4768e-06, 1.0672e-06, 1.0126e-06, 1.1754e-06, 4.9412e-07,
        8.4801e-07, 1.1888e-06, 1.9084e-06, 4.6111e-06, 5.7186e-07, 1.1701e-07,
        7.7534e-07, 1.5742e-06, 9.0950e-07, 1.6092e-06, 1.9618e-06, 6.7675e-07,
        8.8354e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6372e-05, 1.0036e-06, 1.9689e-06, 2.3245e-06, 4.9538e-06, 7.3335e-06,
        1.0844e-06, 9.5116e-07, 1.4064e-05, 9.9599e-07, 4.6285e-06, 8.4574e-07,
        6.1093e-07, 3.0261e-07, 9.4369e-07, 7.9203e-06, 1.6051e-06, 1.0277e-05,
        1.7064e-06, 1.7248e-06, 1.6418e-05, 2.7795e-06, 7.7419e-07, 1.2037e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7380e-05, 6.9538e-07, 1.0701e-06, 2.7277e-06, 2.2710e-06, 4.3250e-06,
        6.0629e-07, 7.8778e-07, 2.9354e-06, 3.8331e-07, 2.3341e-06, 7.4887e-07,
        3.1462e-07, 9.5311e-08, 4.9153e-07, 4.7668e-06, 2.0920e-06, 4.0021e-06,
        8.1897e-07, 9.6208e-07, 9.4595e-06, 1.5199e-06, 1.1099e-06, 6.6333e-07,
        5.5642e-07, 3.7714e-07, 4.5813e-07, 6.8902e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.0510e-05, 1.3058e-06, 2.5852e-06, 3.8625e-06, 5.1480e-06, 7.2277e-06,
        1.1658e-06, 1.2030e-06, 1.2575e-05, 1.0640e-06, 3.5787e-06, 8.7928e-07,
        5.4252e-07, 3.1792e-08, 9.7406e-07, 8.1962e-06, 3.3738e-06, 2.7250e-06,
        2.1942e-06, 2.2784e-06, 2.8553e-06, 1.4484e-05, 1.0710e-06, 2.0625e-06,
        7.6534e-06, 1.5998e-06, 5.3681e-07, 8.0126e-07, 1.4613e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2735e-05,  1.1328e-06,  1.3703e-06,  5.5711e-06,  7.9706e-06,
         3.2898e-07,  6.5507e-07,  1.1985e-06,  1.0095e-05,  8.1667e-07,
         6.7539e-07,  6.7509e-06,  6.3364e-07,  1.0519e-06, -3.2863e-08,
         5.9636e-07,  9.7471e-06,  2.0462e-06,  6.0604e-06,  2.9305e-05,
         1.1753e-06,  1.1369e-06,  4.9687e-06,  2.4657e-06,  1.9476e-06,
         8.0326e-07,  1.4327e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.5978e-05, 9.4621e-07, 8.6091e-07, 4.5516e-06, 6.1542e-06, 3.4464e-07,
        5.9435e-07, 8.6202e-07, 5.6989e-06, 5.2151e-07, 4.4667e-07, 5.8656e-06,
        3.2867e-07, 1.0081e-06, 1.4871e-07, 6.0546e-07, 3.3238e-06, 3.0702e-06,
        1.6143e-06, 1.8941e-06, 3.4755e-06, 7.0519e-07, 5.4399e-07, 9.4774e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6322e-05, 1.2174e-06, 1.2410e-06, 5.1038e-06, 1.0713e-05, 3.2359e-07,
        7.5840e-07, 1.1710e-06, 6.3218e-06, 8.4130e-07, 6.4335e-07, 6.8839e-06,
        5.4146e-07, 1.2436e-06, 1.7905e-07, 9.1919e-07, 8.5076e-06, 2.0701e-06,
        4.6280e-06, 2.4207e-06, 7.3992e-06, 1.0583e-05, 2.0950e-06, 6.2726e-07,
        1.5779e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.0155e-05, 5.0692e-06, 7.3753e-07, 6.6255e-07, 5.1599e-06, 6.6291e-07,
        9.4985e-07, 6.0775e-07, 7.0585e-06, 4.5356e-07, 6.3280e-07, 9.6549e-07,
        3.1315e-06, 5.4447e-06, 5.3189e-07, 1.5295e-06, 2.7202e-07, 7.8235e-07,
        7.6433e-06, 7.7028e-07, 1.2551e-06, 7.1980e-06, 1.6337e-05, 1.9143e-06,
        4.9291e-07, 9.3309e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0352e-05, 9.4374e-06, 8.3848e-07, 9.9932e-07, 8.2863e-06, 6.5436e-07,
        6.0222e-07, 5.3882e-07, 7.7275e-06, 4.9904e-07, 1.0779e-06, 7.9261e-07,
        3.9354e-06, 1.7774e-05, 6.6562e-07, 1.6474e-06, 5.1936e-07, 9.6674e-07,
        1.1581e-05, 1.2338e-06, 1.2974e-06, 6.5038e-07, 6.9230e-07, 1.3684e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.3798e-05, 5.7350e-06, 4.6652e-07, 8.6227e-07, 5.3095e-06, 7.6409e-07,
        6.3853e-07, 5.5003e-07, 5.6350e-06, 4.4327e-07, 5.7160e-07, 1.0712e-06,
        3.0315e-06, 5.5270e-06, 3.6114e-07, 1.2894e-06, 3.6454e-07, 6.8927e-07,
        4.5356e-06, 1.0253e-06, 1.0995e-06, 2.6915e-06, 1.0026e-06, 1.4625e-06,
        9.0567e-06, 4.8154e-06, 1.2319e-06, 6.4777e-07, 1.6556e-06, 3.9411e-06,
        7.7256e-07, 7.4450e-07, 1.6531e-06, 1.0027e-06, 7.7674e-07, 1.0304e-06,
        6.2510e-07, 9.2551e-07], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1850: [tensor([2.7121e-05, 3.3361e-06, 3.6952e-07, 6.2598e-07, 4.0160e-07, 6.1108e-07,
        1.6627e-06, 5.9258e-07, 4.5687e-07, 9.2196e-07, 2.7318e-07, 2.0066e-07,
        5.6995e-07, 3.2824e-07, 1.7135e-06, 4.7715e-06, 3.2854e-07, 3.9220e-07,
        3.7342e-07, 4.1454e-07, 3.0753e-07, 2.0962e-06, 1.6371e-07, 1.0572e-07,
        3.3610e-07, 7.6011e-06, 3.9494e-07, 9.2446e-07, 7.0838e-07, 5.6465e-07,
        7.9906e-07, 6.8424e-07, 7.8692e-07, 6.5258e-07, 1.1386e-05, 3.9302e-07,
        5.8992e-07, 8.3940e-07, 5.8322e-07, 2.9471e-06, 8.2468e-07, 1.1903e-06,
        8.5043e-07, 5.0928e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7760e-05, 4.2585e-06, 7.2589e-07, 6.6759e-07, 5.6540e-07, 8.1021e-07,
        3.2378e-06, 8.5668e-07, 9.2163e-07, 9.8803e-07, 3.1008e-07, 2.8181e-07,
        1.0451e-06, 6.6119e-07, 1.7334e-06, 6.9619e-06, 4.1876e-07, 7.3847e-07,
        4.7594e-07, 4.5180e-07, 4.6639e-07, 3.4368e-06, 2.5324e-07, 1.2490e-07,
        6.0753e-07, 4.4458e-06, 9.1520e-07, 7.9859e-07, 1.1220e-06, 9.6792e-07,
        1.7610e-06, 7.8705e-07, 1.3178e-06, 4.4863e-06, 1.0511e-06, 9.2725e-07,
        8.1521e-07, 9.9190e-07, 4.4082e-07, 7.7087e-07, 3.7937e-07, 1.0771e-06,
        7.9483e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.4374e-05, 7.9213e-06, 5.8818e-07, 7.3869e-07, 5.4405e-07, 9.2378e-07,
        3.2662e-06, 1.2175e-06, 7.8408e-07, 1.3002e-06, 5.9053e-07, 5.9842e-07,
        1.1887e-06, 9.8028e-07, 3.8964e-06, 8.8348e-06, 6.0413e-07, 5.5488e-07,
        9.0787e-07, 1.0958e-06, 5.2623e-07, 3.6081e-06, 3.8497e-07, 2.9844e-07,
        7.6778e-07, 1.0285e-05, 2.0288e-06, 3.2144e-06, 1.4508e-06, 5.3486e-07,
        6.7689e-07, 1.3016e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2157e-05, 6.8815e-06, 1.2628e-06, 9.5872e-07, 7.5610e-07, 1.2774e-06,
        7.0165e-06, 9.3966e-07, 1.3226e-06, 1.4677e-06, 6.9427e-07, 5.4599e-07,
        1.8656e-06, 1.4523e-06, 4.5202e-06, 1.4435e-05, 9.2343e-07, 7.0724e-07,
        1.1814e-06, 9.9746e-07, 1.1572e-06, 5.8730e-06, 4.5225e-07, 3.0280e-07,
        6.5208e-07, 1.7963e-05, 2.3679e-06, 1.1697e-05, 8.3823e-07, 2.7966e-07,
        1.6542e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6210e-05, 4.3334e-06, 5.5435e-07, 6.2121e-07, 5.1807e-07, 7.3674e-07,
        2.9682e-06, 8.8569e-07, 8.7837e-07, 1.0213e-06, 1.8409e-07, 4.3910e-07,
        8.0113e-07, 4.8691e-07, 2.1120e-06, 7.4167e-06, 5.9368e-07, 3.4981e-07,
        6.7511e-07, 7.3067e-07, 7.2557e-07, 2.5651e-06, 2.7128e-07, 1.2268e-07,
        4.2194e-07, 4.4802e-06, 7.6919e-07, 1.0473e-06, 8.6505e-07, 7.1826e-07,
        6.0170e-07, 5.2483e-07, 9.8581e-07, 6.3321e-07, 1.3995e-06, 5.3781e-06,
        6.7148e-07, 5.0358e-07, 9.4205e-07, 7.0767e-07, 4.8234e-06, 8.0758e-07,
        2.5270e-06, 8.9997e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.8514e-05, 4.5758e-06, 9.1959e-07, 7.7734e-07, 4.3952e-07, 9.0687e-07,
        3.5774e-06, 7.9181e-07, 1.0372e-06, 1.0084e-06, 5.6155e-07, 3.9379e-07,
        1.1206e-06, 8.5690e-07, 4.1628e-06, 1.1978e-05, 7.6829e-07, 7.1930e-07,
        8.3370e-07, 8.6972e-07, 8.5361e-07, 3.5080e-06, 4.9373e-07, 2.2714e-07,
        5.4512e-07, 1.3523e-05, 2.1809e-06, 2.1543e-06, 1.3192e-06, 1.9872e-06,
        9.6302e-07, 9.6195e-07, 7.0851e-07, 9.3135e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.1438e-05, 4.6464e-06, 5.4621e-07, 6.0474e-07, 5.7007e-07, 6.5224e-07,
        2.7540e-06, 4.5883e-07, 5.3124e-07, 8.0023e-07, 2.3046e-07, 3.7058e-07,
        8.1869e-07, 6.6061e-07, 1.9054e-06, 8.2025e-06, 3.3190e-07, 3.7089e-07,
        4.8352e-07, 7.5755e-07, 4.9127e-07, 2.6599e-06, 2.5812e-07, 1.4655e-07,
        4.5152e-07, 1.3499e-05, 1.0360e-06, 5.3177e-06, 1.2638e-06, 1.1496e-05,
        1.4766e-06, 4.3167e-07, 6.5126e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.9726e-05, 7.0465e-06, 6.7603e-07, 7.3208e-07, 7.0883e-07, 9.4450e-07,
        2.7379e-06, 6.2939e-07, 9.3187e-07, 1.1335e-06, 3.7493e-07, 5.2061e-07,
        9.0880e-07, 9.6020e-07, 3.5134e-06, 1.3229e-05, 6.7205e-07, 5.5826e-07,
        8.7771e-07, 5.8597e-07, 6.3324e-07, 3.7765e-06, 4.2075e-07, 1.9495e-07,
        4.8961e-07, 1.3212e-05, 1.6099e-06, 8.4257e-06, 2.0630e-05, 1.4758e-06,
        1.3724e-06, 1.6270e-06, 1.3485e-06, 1.7119e-06, 1.1951e-06, 1.3101e-06,
        8.4008e-07, 8.7813e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.2558e-05, 5.4789e-06, 6.2778e-07, 4.2147e-07, 3.9002e-07, 6.6112e-07,
        2.2346e-06, 6.9142e-07, 6.8139e-07, 1.0042e-06, 4.6511e-07, 3.1169e-07,
        7.8360e-07, 6.5753e-07, 2.7472e-06, 1.0276e-05, 4.6243e-07, 4.4920e-07,
        5.3925e-07, 5.6331e-07, 5.4776e-07, 3.4586e-06, 2.2301e-07, 5.1793e-08,
        4.6556e-07, 1.3310e-05, 9.9009e-07, 5.9940e-07, 1.2090e-06, 4.5255e-06,
        4.6819e-07, 9.6979e-07, 8.1508e-07, 7.6599e-07, 4.8403e-07, 1.0469e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.3428e-05, 5.8985e-06, 7.0336e-07, 4.2225e-07, 4.4992e-07, 8.6923e-07,
        3.7037e-06, 1.1180e-06, 9.4934e-07, 1.3701e-06, 3.8039e-07, 6.0451e-07,
        1.1019e-06, 8.2898e-07, 2.9030e-06, 9.7594e-06, 8.7883e-07, 3.5269e-07,
        6.6538e-07, 8.5360e-07, 7.6585e-07, 4.1754e-06, 2.7293e-07, 3.8206e-07,
        7.2173e-07, 1.1045e-05, 1.4679e-06, 8.5854e-06, 1.6943e-06, 1.6383e-06,
        6.4665e-07, 8.6457e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2853e-05, 2.1247e-06, 2.3231e-07, 3.1635e-07, 1.8073e-07, 3.7993e-07,
        1.3562e-06, 3.4910e-07, 3.8850e-07, 5.4414e-07, 1.8224e-07, 1.7032e-07,
        4.0543e-07, 2.3660e-07, 1.2639e-06, 3.4732e-06, 2.4197e-07, 3.2660e-07,
        3.7766e-07, 4.0652e-07, 3.5582e-07, 1.7049e-06, 1.9570e-07, 8.6737e-08,
        2.4594e-07, 8.5930e-06, 3.6571e-07, 4.7788e-07, 5.5893e-07, 6.7678e-07,
        8.1855e-07, 4.8419e-07, 5.0508e-07, 5.1635e-07, 1.1654e-05, 3.4702e-07,
        7.8451e-07, 7.0342e-07, 4.0280e-07, 2.0571e-06, 4.9675e-07, 1.0363e-06,
        5.0898e-07, 5.3392e-07, 6.5200e-06, 3.5513e-07, 5.7743e-07, 7.2383e-07,
        2.6109e-06, 2.5886e-06, 2.6804e-07, 1.9742e-07, 5.3013e-07, 2.9122e-07,
        4.9509e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.9851e-05, 3.8264e-06, 6.2840e-07, 5.8679e-07, 5.2135e-07, 6.6613e-07,
        3.9243e-06, 5.9999e-07, 9.2960e-07, 7.8773e-07, 4.9929e-07, 3.6802e-07,
        1.1223e-06, 8.1413e-07, 2.5755e-06, 8.8218e-06, 4.6161e-07, 4.5010e-07,
        8.8567e-07, 9.4859e-07, 4.8828e-07, 3.3085e-06, 2.4008e-07, 2.7782e-07,
        6.0901e-07, 1.0691e-05, 7.2114e-07, 1.0673e-06, 8.6106e-07, 9.0418e-07,
        1.5876e-06, 1.7648e-05, 8.0649e-07, 7.3100e-06, 2.2784e-05, 2.9633e-06,
        9.5088e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1900: [tensor([3.6542e-05, 1.0591e-06, 2.0212e-06, 1.9162e-06, 2.1375e-06, 1.4031e-06,
        4.2226e-06, 1.4074e-06, 2.4351e-07, 1.2719e-06, 1.5378e-06, 3.2133e-06,
        1.8835e-06, 2.1020e-06, 1.6073e-06, 4.1096e-06, 7.1972e-07, 1.9801e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.4373e-05, 1.3040e-06, 6.9046e-07, 1.0146e-06, 1.2881e-06, 1.5429e-06,
        4.6430e-06, 1.0447e-06, 5.8115e-07, 9.9490e-07, 3.8170e-06, 1.6833e-06,
        1.1906e-06, 1.5398e-06, 1.0316e-05, 2.0177e-05, 2.6747e-06, 5.0831e-07,
        6.7216e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.4996e-05, 7.5790e-06, 3.1220e-07, 1.3528e-06, 9.8805e-07, 2.8319e-06,
        6.3751e-06, 4.5100e-07, 4.3572e-07, 5.7045e-07, 7.0159e-07, 1.7095e-06,
        3.1700e-07, 6.9017e-07, 1.2262e-06, 1.9070e-06, 3.6415e-06, 6.3327e-07,
        4.8351e-07, 1.2999e-05, 6.4079e-07, 2.0665e-06, 5.4211e-07, 1.3462e-06,
        4.2770e-08, 7.6008e-07, 9.9552e-08, 4.1144e-06, 2.7817e-06, 1.0091e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6719e-05, 6.7114e-06, 3.5205e-07, 1.3588e-06, 7.4766e-07, 2.4928e-06,
        4.4632e-06, 4.0906e-07, 5.9529e-07, 4.8858e-07, 5.4918e-07, 1.2955e-06,
        5.7326e-07, 7.3173e-07, 6.0560e-07, 1.9025e-06, 2.5454e-06, 8.8959e-07,
        5.1590e-07, 9.9417e-06, 2.7617e-07, 1.6350e-06, 9.8968e-07, 1.3778e-06,
        1.7738e-07, 3.4310e-07, 2.6299e-06, 9.0761e-06, 2.5868e-06, 6.2509e-07,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.3342e-05, 7.5766e-06, 2.9029e-08, 1.4071e-06, 1.2194e-06, 2.9159e-06,
        7.6219e-06, 3.4215e-07, 4.7737e-07, 5.8363e-07, 8.6169e-07, 1.1364e-06,
        5.5484e-07, 8.5445e-07, 7.3169e-07, 1.6380e-06, 2.3247e-06, 7.7607e-07,
        2.5904e-07, 1.7675e-05, 4.8740e-07, 2.0101e-06, 1.0253e-06, 1.4474e-06,
        1.0452e-07, 8.0320e-07, 2.5941e-06, 2.5843e-06, 1.3750e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.6210e-05, 4.5568e-06, 9.5266e-07, 3.5999e-06, 6.8767e-07, 1.2371e-06,
        4.7625e-06, 1.1397e-06, 3.4379e-07, 7.8046e-07, 4.2683e-07, 4.1836e-06,
        7.3982e-07, 3.9896e-07, 5.6941e-07, 8.5543e-07, 1.3538e-06, 4.0307e-07,
        5.6872e-07, 6.9730e-07, 4.1940e-07, 4.8942e-07, 8.9674e-07, 9.0125e-06,
        1.2896e-06, 4.3980e-07, 3.1154e-07, 7.9197e-07, 2.4965e-07, 4.4075e-07,
        1.4636e-05, 1.1763e-06, 5.8147e-06, 4.9914e-07, 1.4797e-06, 5.5317e-07,
        4.2161e-07, 8.4095e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2282e-05, 4.5996e-06, 1.1513e-06, 3.9664e-06, 8.6976e-07, 1.3484e-06,
        5.1588e-06, 1.0089e-06, 3.0424e-07, 7.6598e-07, 5.6238e-07, 5.7838e-06,
        6.2923e-07, 5.4268e-07, 3.3814e-07, 8.5614e-07, 2.2851e-06, 2.3413e-07,
        7.5079e-07, 8.9422e-07, 5.5689e-07, 7.5399e-07, 1.3111e-06, 1.0151e-05,
        2.0085e-06, 6.9239e-07, 2.9740e-07, 9.5494e-07, 2.7456e-07, 5.3751e-07,
        8.8605e-06, 1.4165e-06, 2.4716e-06, 2.0854e-05, 1.7570e-06, 2.5020e-06,
        1.5236e-06, 4.5770e-07, 1.1244e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.6189e-05, 5.8611e-06, 8.9819e-07, 2.4963e-06, 5.5132e-07, 1.3399e-06,
        3.3825e-06, 1.0539e-06, 1.8906e-07, 9.0608e-07, 6.2506e-07, 5.3952e-06,
        6.5274e-07, 4.1715e-07, 7.2713e-07, 7.2154e-07, 1.7255e-06, 3.2721e-07,
        5.1935e-07, 7.2190e-07, 3.1231e-07, 6.2312e-07, 1.0646e-06, 7.4566e-06,
        1.5346e-06, 4.6022e-07, 3.0686e-07, 6.4994e-07, 2.5770e-07, 3.7563e-07,
        3.7842e-06, 7.2782e-07, 1.2414e-06, 4.4028e-06, 1.8920e-06, 5.4680e-07,
        6.9521e-07, 1.4257e-06, 4.8558e-06, 1.5981e-06, 2.5186e-07, 9.4439e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.9025e-05, 3.5215e-06, 7.4136e-07, 3.2400e-06, 8.1923e-07, 9.5870e-07,
        4.4434e-06, 7.9004e-07, 3.6748e-07, 9.4886e-07, 6.2761e-07, 4.1069e-06,
        5.3972e-07, 1.6382e-07, 6.8305e-07, 5.2302e-07, 1.3535e-06, 1.4568e-07,
        6.2766e-07, 4.9032e-07, 2.7561e-07, 5.1152e-07, 1.0110e-06, 7.5878e-06,
        1.0970e-06, 3.9133e-07, 3.1731e-07, 9.0766e-07, 1.9466e-07, 3.8923e-07,
        1.4635e-06, 4.0595e-07, 6.3655e-07, 1.0460e-06, 2.6637e-06, 1.7114e-06,
        9.2638e-06, 1.1423e-06, 4.9470e-06, 1.0911e-06, 4.3510e-07, 6.8206e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.6007e-05, 9.7780e-06, 1.6385e-06, 2.6610e-06, 7.0506e-07, 1.4092e-06,
        4.7335e-06, 1.1812e-06, 3.7290e-07, 7.9433e-07, 4.9961e-07, 4.7370e-06,
        7.6733e-07, 3.1669e-07, 8.0189e-07, 1.0115e-06, 1.9491e-06, 3.2946e-07,
        5.3138e-07, 8.3474e-07, 4.0169e-07, 9.5065e-07, 1.2085e-06, 1.0387e-05,
        1.5483e-06, 4.2409e-07, 4.3426e-07, 5.8944e-07, 5.3368e-07, 3.7458e-07,
        1.2054e-05, 1.3031e-06, 5.4945e-06, 1.2616e-06, 1.0024e-05, 8.6883e-07,
        7.5017e-07, 1.0055e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.3453e-05, 9.3269e-06, 1.9809e-06, 4.9016e-06, 1.0801e-06, 1.8545e-06,
        5.7982e-06, 1.4671e-06, 5.2502e-07, 1.0639e-06, 8.3082e-07, 4.8737e-06,
        7.0872e-07, 6.9833e-07, 9.4692e-07, 9.7503e-07, 2.2270e-06, 2.9826e-07,
        1.0088e-06, 7.2875e-07, 4.6201e-07, 6.3224e-07, 1.7456e-06, 8.8334e-06,
        3.1722e-06, 6.3663e-07, 6.7128e-07, 1.2962e-06, 1.5559e-07, 8.0119e-07,
        1.8018e-05, 1.7559e-06, 8.0141e-06, 1.1506e-06, 1.2315e-05, 8.9545e-07,
        3.1338e-05, 1.6397e-06, 7.9055e-07, 1.0922e-06, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.6024e-05, 6.7448e-06, 9.8091e-07, 4.0090e-06, 5.8785e-07, 1.1229e-06,
        4.3704e-06, 9.9644e-07, 3.1968e-07, 6.0078e-07, 4.9658e-07, 4.0339e-06,
        5.4341e-07, 5.6772e-07, 3.8708e-07, 7.0230e-07, 1.2000e-06, 2.3460e-07,
        5.2399e-07, 7.4075e-07, 3.1424e-07, 5.0496e-07, 9.5573e-07, 4.6648e-06,
        1.4739e-06, 3.7218e-07, 2.9498e-07, 7.3962e-07, 1.8439e-07, 3.6996e-07,
        3.2205e-06, 1.1374e-06, 8.8238e-06, 9.7010e-07, 5.0517e-06, 1.1692e-06,
        2.6114e-05, 9.3140e-07, 9.3902e-07, 4.4976e-07, 7.5225e-07, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1950: [tensor([4.3582e-05, 4.8824e-06, 9.2018e-07, 6.4182e-07, 1.6466e-06, 7.7399e-07,
        9.0920e-06, 1.3972e-06, 1.6753e-06, 4.4442e-06, 3.2929e-06, 5.5863e-07,
        4.0084e-07, 1.0118e-06, 1.0323e-06, 2.4446e-06, 6.9017e-06, 1.6755e-06,
        1.2290e-06, 5.5817e-06, 6.1315e-06, 1.2953e-06, 1.0500e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8691e-05, 1.0047e-05, 6.3033e-07, 9.1651e-07, 1.5669e-06, 9.8057e-07,
        8.6682e-06, 1.2922e-06, 2.0864e-06, 5.1311e-06, 3.7927e-06, 1.0083e-06,
        3.5880e-07, 9.3151e-07, 2.0658e-06, 1.5763e-05, 1.7003e-06, 9.2850e-06,
        5.1866e-06, 2.4792e-06, 2.1097e-06, 9.2836e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.1834e-05, 9.0951e-06, 7.9006e-07, 1.1779e-06, 1.5634e-06, 1.7626e-06,
        1.4914e-05, 1.6988e-06, 1.8649e-06, 8.5755e-06, 4.7187e-06, 6.6735e-07,
        6.4077e-07, 9.9865e-07, 1.6702e-05, 2.4895e-06, 1.9574e-05, 1.6682e-06,
        2.4469e-06, 3.5355e-05, 2.2050e-06, 2.9306e-06, 1.2918e-06, 8.2527e-07,
        1.8629e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.9418e-05, 1.1508e-05, 1.5270e-06, 1.8119e-06, 4.2902e-06, 1.3811e-06,
        5.4842e-07, 9.2734e-07, 1.4011e-05, 1.5424e-06, 1.5087e-07, 9.9286e-07,
        2.0951e-05, 2.6284e-06, 2.6552e-06, 1.2443e-05, 2.5951e-06, 1.4801e-06,
        1.9588e-06, 2.2853e-06, 1.4543e-06, 9.4437e-06, 7.3958e-07, 7.9304e-07,
        1.8763e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7186e-05,  9.7966e-06,  1.8834e-06,  2.5201e-06,  7.2515e-06,
         2.0122e-06,  7.9972e-07,  1.0820e-06,  9.6759e-06,  1.8372e-06,
        -3.5938e-08,  9.9734e-07,  2.0929e-05,  2.2275e-06,  4.0681e-06,
         1.8752e-05,  2.6852e-06,  2.6951e-06,  4.6278e-06,  6.7740e-07,
         1.9666e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4525e-05, 9.9485e-06, 2.7164e-06, 2.4297e-06, 6.5590e-06, 1.9186e-06,
        8.6239e-07, 4.4108e-07, 1.4052e-05, 1.7186e-06, 4.2640e-07, 1.5578e-06,
        2.5904e-05, 2.7968e-06, 3.8221e-06, 2.0344e-05, 4.1741e-06, 2.5689e-06,
        1.9742e-06, 2.2899e-06, 1.6261e-05, 1.7560e-06, 5.8832e-07, 1.6316e-06,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.5545e-05, 2.5672e-06, 8.2403e-07, 5.0509e-07, 1.5358e-06, 5.7677e-06,
        8.5179e-06, 2.6804e-06, 2.1400e-06, 4.4038e-07, 1.4788e-06, 2.9525e-06,
        4.3540e-06, 8.7647e-06, 1.5823e-06, 2.7938e-06, 5.1922e-05, 5.1107e-06,
        3.0025e-06, 9.0093e-07, 2.4697e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8657e-05, 1.3779e-06, 6.1020e-07, 4.6212e-07, 8.5631e-07, 3.2103e-06,
        1.0035e-05, 9.8674e-07, 9.3913e-07, 1.8846e-07, 6.1034e-07, 6.3691e-06,
        2.5531e-06, 9.8336e-07, 9.4267e-07, 9.8136e-07, 1.3380e-06, 2.9730e-07,
        1.0844e-06, 1.6207e-06, 2.0146e-06, 3.4600e-07, 1.0043e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2091e-05, 2.4012e-06, 9.5284e-07, 1.3714e-06, 1.6164e-06, 4.5483e-06,
        1.7924e-05, 2.4239e-06, 1.3844e-06, 4.9718e-07, 1.1802e-06, 8.2101e-06,
        2.2828e-06, 1.7033e-06, 3.0382e-06, 6.1446e-06, 9.2867e-07, 2.9943e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5085e-05, 3.5428e-06, 1.1454e-06, 6.9965e-06, 2.8038e-06, 6.9916e-06,
        6.1502e-07, 7.9320e-07, 3.9306e-07, 1.0438e-06, 3.9015e-06, 1.6932e-06,
        1.7357e-05, 1.2679e-06, 6.7913e-06, 1.0340e-05, 6.5755e-07, 1.5373e-06,
        7.5491e-07, 1.5483e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2890e-05, 3.7079e-06, 1.2579e-06, 6.8344e-06, 2.5778e-06, 5.6093e-06,
        3.7671e-07, 1.0271e-06, 3.5037e-07, 9.5303e-07, 4.0339e-06, 1.6459e-06,
        8.1083e-06, 3.7799e-06, 1.6529e-05, 1.9724e-06, 1.9825e-06, 6.6231e-06,
        1.8419e-06, 1.0492e-06, 5.7554e-07, 2.5225e-05, 5.4477e-07, 4.9725e-07,
        1.5425e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9982e-05, 5.1514e-06, 2.6212e-06, 8.7717e-06, 3.7806e-06, 9.3493e-06,
        5.5277e-07, 1.3285e-06, 5.3545e-07, 1.1697e-06, 7.3299e-06, 2.9730e-06,
        8.4488e-06, 4.7841e-06, 2.5860e-05, 3.1581e-06, 3.0732e-06, 9.9316e-07,
        1.9367e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2000: [tensor([3.4470e-05, 1.5727e-06, 1.4748e-06, 4.2591e-06, 2.1540e-06, 6.3260e-06,
        1.4601e-06, 6.1293e-07, 1.5669e-06, 1.5325e-06, 5.2976e-07, 1.1159e-06,
        1.9268e-06, 3.5493e-06, 6.4660e-06, 2.9428e-06, 1.3075e-06, 1.6851e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.6281e-05, 2.4063e-06, 2.2585e-06, 6.1036e-06, 2.1140e-06, 8.2630e-06,
        2.1623e-06, 1.2358e-06, 2.4836e-06, 1.7403e-06, 3.2596e-07, 1.8569e-06,
        3.1811e-06, 4.2560e-06, 6.9836e-06, 2.2994e-06, 1.4241e-06, 3.1585e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.5814e-05, 1.3879e-05, 1.9754e-06, 3.9517e-06, 5.0541e-06, 1.4832e-06,
        2.4068e-06, 6.1433e-06, 2.0010e-06, 6.7075e-06, 1.0220e-05, 1.4633e-06,
        5.9235e-07, 1.3153e-06, 2.1859e-06, 2.6798e-06, 3.4291e-06, 7.9577e-06,
        2.3561e-06, 1.8766e-06, 1.2233e-06, 2.7602e-06, 2.3595e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.0675e-05, 7.0765e-06, 2.7088e-06, 2.4552e-06, 2.8797e-06, 1.2250e-06,
        1.3955e-06, 4.7428e-06, 9.3797e-07, 6.2860e-06, 8.6125e-06, 1.6023e-06,
        6.3729e-07, 9.5768e-07, 1.6360e-06, 2.6492e-06, 2.3857e-06, 1.3418e-05,
        2.1330e-06, 6.8784e-06, 1.2906e-05, 5.8146e-07, 2.0736e-06, 3.4397e-06,
        1.5824e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.0404e-05, 1.0176e-05, 2.3004e-06, 3.4429e-06, 4.9771e-06, 1.5494e-06,
        9.3986e-07, 5.7115e-06, 2.4890e-06, 6.2685e-06, 1.0863e-05, 1.5262e-06,
        7.6510e-07, 1.6604e-06, 2.8377e-06, 2.5880e-06, 2.7696e-06, 1.4130e-05,
        3.0720e-06, 2.0379e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7890e-05, 5.7474e-07, 4.7444e-07, 2.7652e-07, 5.8445e-07, 5.3049e-07,
        8.1381e-07, 3.7052e-07, 6.7531e-07, 1.4525e-06, 3.0920e-07, 6.7099e-07,
        4.8024e-07, 6.2861e-07, 4.3620e-07, 3.4852e-07, 4.1321e-07, 6.5845e-07,
        4.5691e-06, 5.8949e-07, 2.9142e-07, 6.3564e-08, 3.8550e-07, 1.0785e-06,
        9.0814e-07, 1.5493e-06, 4.5231e-06, 1.1951e-06, 1.2979e-06, 7.2192e-07,
        6.7701e-07, 6.4294e-07, 6.4220e-07, 3.9586e-07, 1.6411e-06, 8.3892e-07,
        4.9698e-07, 3.8738e-07, 5.5731e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.3713e-05, 7.9281e-07, 1.2260e-06, 6.9351e-07, 1.2704e-06, 7.8641e-07,
        1.2000e-06, 7.3032e-07, 8.8389e-07, 1.8605e-06, 7.1622e-07, 1.1538e-06,
        1.0811e-06, 1.0058e-06, 6.2966e-07, 7.9570e-07, 7.6779e-07, 1.4598e-06,
        8.6132e-06, 6.8941e-07, 7.3814e-07, 2.2313e-07, 6.3461e-07, 9.7731e-07,
        1.6950e-06, 5.9966e-06, 3.1459e-06, 1.9528e-06, 1.0204e-06, 1.6332e-06,
        5.5643e-07, 1.0939e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.0236e-05, 1.0723e-06, 7.3774e-07, 4.6289e-07, 9.9584e-07, 9.5550e-07,
        8.1511e-07, 7.8936e-07, 9.6559e-07, 1.4081e-06, 6.1407e-07, 1.0137e-06,
        1.0414e-06, 8.6729e-07, 5.4291e-07, 6.3743e-07, 6.2001e-07, 2.0830e-06,
        1.2801e-05, 8.0794e-07, 6.7411e-07, 1.6928e-07, 6.8758e-07, 1.5416e-06,
        9.8709e-07, 8.8588e-07, 1.5843e-06, 1.6457e-06, 1.4855e-06, 1.4955e-06,
        1.0657e-06, 1.1186e-06, 5.4019e-07, 9.2756e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7213e-05, 3.6783e-06, 8.6629e-07, 5.6967e-07, 5.0398e-07, 6.2939e-07,
        3.1146e-06, 3.7193e-06, 8.3461e-07, 1.2861e-06, 5.6543e-07, 6.2208e-07,
        6.5226e-06, 2.0326e-07, 8.3828e-07, 4.5304e-06, 9.2960e-07, 5.5328e-07,
        7.7493e-07, 8.2693e-07, 7.0662e-07, 1.0721e-06, 1.1510e-05, 4.3118e-07,
        6.3621e-06, 1.0425e-05, 3.5940e-07, 8.2226e-07, 4.2183e-07, 1.9401e-08,
        7.0617e-07, 6.0526e-07, 6.3615e-07, 3.4907e-07, 1.2907e-05, 6.7015e-07,
        4.3987e-08, 6.8628e-07, 8.1932e-06, 8.6018e-07, 2.8475e-07, 1.3455e-05,
        1.2360e-06, 8.0721e-07, 2.8595e-07, 6.5449e-07, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.4590e-05, 4.6811e-06, 5.8883e-07, 2.9476e-07, 2.9159e-07, 3.8402e-07,
        2.3896e-06, 3.3665e-06, 4.9459e-07, 1.2170e-06, 5.8185e-07, 4.3817e-07,
        3.6107e-06, 2.0016e-07, 6.3607e-07, 4.0806e-06, 5.8285e-07, 5.9249e-07,
        5.3324e-07, 9.5653e-07, 5.2487e-07, 6.7139e-07, 7.1876e-06, 3.4302e-07,
        2.8391e-06, 5.2416e-06, 1.3193e-07, 4.5467e-07, 2.5342e-07, 6.7522e-08,
        4.1613e-07, 4.8373e-07, 3.8372e-07, 2.5331e-07, 7.6769e-06, 6.3451e-07,
        1.2455e-07, 4.2850e-07, 5.9144e-06, 7.0588e-07, 1.9923e-07, 1.0210e-06,
        2.3363e-06, 1.1253e-06, 2.8571e-07, 6.7059e-07, 1.3939e-06, 6.0395e-07,
        3.9216e-07, 7.4696e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.8510e-05, 3.9011e-06, 7.0906e-07, 3.6211e-07, 3.3052e-07, 3.7689e-07,
        1.9994e-06, 2.6839e-06, 5.9627e-07, 8.6932e-07, 2.9748e-07, 4.9825e-07,
        5.9750e-06, 2.4571e-07, 8.3259e-07, 3.0150e-06, 5.4492e-07, 6.0079e-07,
        5.0527e-07, 7.3904e-07, 4.3893e-07, 8.6829e-07, 5.4221e-06, 2.8791e-07,
        3.2540e-06, 6.0669e-06, 9.5010e-08, 4.3561e-07, 2.2208e-07, 3.6563e-08,
        3.7074e-07, 2.4145e-07, 4.0292e-07, 1.6455e-07, 5.9035e-06, 6.4956e-07,
        3.4654e-08, 4.5117e-07, 7.5510e-06, 6.3079e-07, 1.6643e-07, 9.6311e-07,
        7.0377e-07, 1.0630e-06, 7.8684e-06, 9.6348e-07, 7.4573e-07, 2.4466e-07,
        7.1796e-07, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4104e-05, 1.3055e-06, 1.8039e-06, 2.4888e-06, 6.9789e-06, 1.0218e-05,
        1.9965e-06, 1.1067e-06, 2.0259e-06, 1.6388e-06, 2.3683e-06, 2.8107e-06,
        3.5596e-06, 1.4422e-06, 8.7018e-06, 2.2151e-06, 2.1561e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2050: [tensor([5.4900e-05, 5.1390e-06, 1.5276e-06, 1.0149e-06, 1.0635e-06, 1.5308e-06,
        5.9902e-07, 7.8655e-07, 1.0663e-05, 1.5767e-06, 1.1881e-06, 1.0297e-06,
        9.0201e-07, 7.3556e-07, 6.1099e-06, 1.4018e-06, 7.4052e-06, 1.2306e-06,
        2.1708e-06, 1.5723e-05, 6.8745e-07, 1.6506e-06, 2.7642e-07, 2.1859e-06,
        2.1120e-06, 4.7007e-07, 1.5246e-06, 8.2446e-07, 3.8585e-07, 1.9915e-06,
        1.6958e-06, 3.3596e-07, 7.4173e-07, 5.8841e-06, 2.0331e-06, 1.0851e-05,
        1.4260e-06, 1.0076e-06, 7.7593e-07, 9.1257e-07, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.7832e-05, 6.3329e-06, 1.1345e-06, 8.1528e-07, 6.2734e-07, 1.2953e-06,
        2.9744e-07, 7.1196e-07, 7.4928e-06, 1.3184e-06, 9.5925e-07, 6.8664e-07,
        7.4627e-07, 2.0041e-07, 4.1786e-06, 1.3402e-06, 6.0227e-06, 1.2104e-06,
        1.6152e-06, 8.7450e-06, 4.4819e-07, 1.0021e-06, 3.5693e-07, 2.2687e-06,
        1.1944e-06, 3.3620e-07, 9.4938e-07, 3.6957e-07, 3.5383e-07, 1.6643e-06,
        1.4080e-06, 3.6142e-07, 5.2662e-07, 5.9181e-06, 1.4398e-06, 1.7120e-05,
        2.0547e-06, 4.6085e-06, 1.1322e-06, 5.9477e-07, 4.5214e-07, 7.2452e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.8474e-05, 7.3016e-06, 1.6932e-06, 7.6414e-07, 7.0125e-06, 9.0421e-07,
        4.9805e-07, 1.1653e-06, 3.9096e-06, 1.3479e-05, 1.2795e-06, 1.0578e-06,
        9.3174e-07, 1.8529e-06, 1.1299e-06, 3.0423e-06, 1.8075e-05, 1.1712e-06,
        3.4787e-07, 8.4708e-07, 1.8870e-05, 1.3915e-06, 2.2095e-06, 1.4126e-05,
        2.0470e-06, 8.9945e-07, 1.0606e-06, 1.2811e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.1163e-05, 6.9807e-06, 1.5035e-06, 1.0756e-06, 4.5016e-06, 4.0551e-07,
        4.9376e-07, 1.1448e-06, 2.4311e-06, 1.0994e-05, 1.1947e-06, 8.5286e-07,
        1.3605e-06, 1.1733e-06, 1.2043e-06, 2.7097e-06, 1.7462e-05, 9.2230e-07,
        6.4427e-08, 7.1532e-07, 7.1865e-06, 1.3714e-06, 2.1265e-06, 1.4632e-05,
        1.2203e-06, 7.5570e-07, 2.3830e-06, 6.8894e-07, 2.1926e-06, 3.6783e-06,
        1.3011e-06, 4.2387e-07, 9.1447e-07, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.6374e-05, 6.3829e-06, 1.7643e-06, 6.8876e-07, 7.4088e-06, 6.5765e-07,
        4.2631e-07, 8.8082e-07, 2.6983e-06, 8.6997e-06, 1.3150e-06, 6.7508e-07,
        1.0408e-06, 1.7977e-06, 9.4087e-07, 2.7609e-06, 1.2009e-05, 9.6095e-07,
        3.5971e-07, 9.9513e-07, 1.3452e-05, 1.8681e-06, 2.5712e-06, 1.8903e-05,
        3.9748e-06, 1.2329e-06, 1.4211e-06, 5.5888e-07, 1.3716e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7409e-05, 8.1751e-06, 2.3798e-06, 6.8025e-06, 1.3534e-06, 1.8663e-06,
        1.1656e-06, 4.5767e-06, 1.3755e-06, 2.4872e-06, 8.2507e-07, 5.0749e-06,
        1.8314e-06, 1.7846e-06, 9.7032e-07, 2.4718e-07, 2.0454e-06, 1.9641e-05,
        2.0643e-06, 1.4147e-05, 2.1686e-06, 1.4147e-05, 3.3201e-05, 1.9040e-06,
        3.5383e-06, 1.1136e-06, 2.1055e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.0741e-05, 7.3688e-06, 1.9425e-06, 7.3690e-06, 1.6516e-06, 2.5581e-06,
        1.5789e-06, 9.0980e-06, 1.6938e-06, 2.9026e-06, 8.0433e-07, 6.3752e-06,
        2.4798e-06, 1.5875e-06, 1.6260e-06, 5.4053e-07, 1.5050e-06, 1.9942e-05,
        2.9100e-06, 1.7431e-05, 2.7623e-06, 8.8477e-06, 5.3583e-05, 3.3905e-06,
        1.1361e-06, 1.0609e-06, 1.1155e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.8713e-05, 4.9722e-06, 1.5165e-06, 4.5813e-06, 1.0757e-06, 1.0108e-06,
        8.2105e-07, 3.2303e-06, 1.0150e-06, 1.8448e-06, 3.8633e-07, 3.5402e-06,
        1.4350e-06, 8.8704e-07, 8.7970e-07, 4.5846e-07, 1.2363e-06, 2.7566e-05,
        1.7841e-06, 7.3652e-06, 1.9984e-06, 5.0450e-06, 2.1454e-05, 1.3762e-06,
        1.3269e-06, 6.7219e-07, 1.2183e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.7916e-05, 1.2944e-05, 1.7299e-06, 7.3076e-06, 1.3719e-05, 1.4048e-06,
        7.0609e-07, 4.5847e-06, 1.9319e-06, 6.0318e-06, 1.5369e-06, 3.9325e-06,
        1.3154e-05, 9.0542e-07, 9.6236e-07, 2.2274e-07, 1.4386e-06, 2.9842e-05,
        2.5043e-06, 1.3568e-05, 2.1344e-06, 9.2477e-06, 5.8039e-05, 1.7939e-06,
        1.7787e-06, 2.0782e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.8420e-05, 6.9056e-06, 1.8836e-06, 6.6124e-06, 1.1433e-05, 9.5469e-07,
        5.0896e-07, 2.8108e-06, 2.0511e-06, 3.8566e-06, 1.2292e-06, 4.6588e-06,
        1.3031e-05, 9.4420e-07, 9.1554e-07, 7.4127e-07, 1.5903e-06, 2.4288e-05,
        2.7120e-06, 1.2539e-05, 2.2739e-06, 5.9382e-06, 3.0602e-05, 1.6276e-06,
        1.5232e-06, 1.7094e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6775e-05, 8.5093e-06, 1.6659e-06, 7.1257e-06, 1.0899e-05, 7.8891e-07,
        6.0550e-07, 3.8356e-06, 1.8888e-06, 4.6892e-06, 1.1407e-06, 3.7783e-06,
        8.4455e-06, 1.0587e-06, 8.1909e-07, 4.4928e-07, 1.6624e-06, 1.5742e-05,
        2.6728e-06, 1.8482e-05, 2.2109e-06, 8.4801e-06, 3.3325e-05, 1.4795e-06,
        1.1712e-06, 2.7578e-06, 4.1110e-05, 1.9199e-06, 1.7015e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2374e-05, 1.0987e-05, 1.8983e-06, 1.9875e-06, 1.1985e-05, 8.1905e-07,
        1.7511e-06, 1.7274e-06, 2.9242e-06, 7.1159e-07, 9.7307e-07, 1.6321e-05,
        2.7528e-06, 1.6553e-05, 2.9767e-06, 2.3623e-06, 2.8193e-06, 3.1001e-05,
        1.7544e-06, 2.1493e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2100: [tensor([4.2591e-05, 1.1431e-06, 2.1639e-07, 9.2285e-07, 1.2532e-06, 1.3510e-06,
        1.2878e-06, 1.0328e-06, 1.4692e-06, 1.5893e-06, 2.2634e-06, 1.7537e-06,
        6.5559e-07, 1.8514e-06, 7.4299e-06, 2.2871e-06, 4.6157e-07, 9.0521e-07,
        5.8800e-06, 2.9902e-06, 4.5642e-06, 1.0823e-06, 8.7220e-06, 1.5505e-06,
        6.3905e-07, 1.1484e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.5712e-05, 6.6102e-06, 1.2609e-06, 5.8806e-06, 1.3596e-06, 4.2611e-06,
        1.4013e-06, 1.0232e-06, 9.7440e-07, 1.8490e-06, 6.0771e-07, 4.8913e-07,
        7.9404e-06, 2.1591e-06, 8.0562e-07, 4.3715e-07, 1.8027e-06, 8.9734e-07,
        5.9455e-06, 1.2175e-06, 9.9977e-08, 9.0117e-07, 3.2158e-05, 2.1422e-06,
        1.9224e-06, 2.9731e-06, 1.0081e-05, 2.5559e-06, 1.7899e-06, 2.2826e-06,
        1.5225e-05, 3.4688e-06, 7.2223e-07, 1.0043e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([7.2885e-05, 1.3124e-05, 2.2204e-06, 1.0685e-05, 2.7570e-06, 1.2240e-05,
        2.6951e-06, 8.0136e-07, 1.5647e-06, 2.7863e-06, 6.9093e-07, 6.8066e-07,
        1.8563e-05, 2.0134e-06, 1.8540e-06, 8.6085e-07, 4.6876e-06, 9.0320e-07,
        1.0775e-05, 2.5105e-06, 1.0886e-08, 1.1238e-06, 1.5063e-06, 3.1702e-06,
        8.2642e-06, 4.0277e-06, 1.9949e-06, 1.0324e-06, 2.2167e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.3874e-05, 7.7480e-06, 1.7917e-06, 4.6949e-06, 1.5440e-06, 7.6464e-06,
        1.5623e-06, 8.9315e-07, 1.1249e-06, 1.9131e-06, 2.2023e-07, 7.4646e-07,
        1.1223e-05, 1.7310e-06, 1.0167e-06, 3.9348e-07, 2.2396e-06, 1.1336e-06,
        7.7971e-06, 1.3619e-06, 2.9869e-07, 9.1442e-07, 1.3938e-06, 1.6210e-06,
        7.9387e-06, 1.1263e-06, 2.2079e-05, 2.4773e-06, 2.3783e-06, 1.5452e-06,
        9.4408e-07, 7.0673e-07, 1.4033e-06, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.1003e-05, 8.3706e-06, 1.0599e-06, 1.8562e-06, 9.3852e-07, 9.0822e-07,
        3.1597e-06, 3.7070e-07, 4.9320e-07, 1.5793e-06, 3.5627e-06, 4.6857e-07,
        4.1782e-07, 8.7088e-07, 5.5255e-06, 2.6031e-06, 4.2815e-06, 1.8827e-06,
        4.6959e-06, 8.5101e-07, 1.7269e-06, 7.9305e-06, 6.4298e-07, 7.4256e-07,
        1.1319e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.7491e-05, 1.1799e-05, 9.8712e-07, 3.4944e-06, 1.9625e-06, 1.2469e-06,
        4.3773e-06, 1.1403e-06, 1.5846e-06, 1.2278e-06, 6.0471e-06, 5.8210e-07,
        5.5114e-07, 1.4189e-06, 1.0286e-05, 4.9864e-06, 1.3019e-05, 3.3513e-06,
        9.9837e-06, 1.1788e-05, 1.0555e-06, 6.2056e-07, 2.8388e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.8381e-05, 7.3464e-06, 1.5029e-06, 2.1878e-06, 1.4406e-06, 9.6826e-07,
        3.1088e-06, 8.8958e-07, 7.7834e-07, 1.9947e-06, 4.8990e-06, 2.1143e-07,
        5.7661e-07, 9.4847e-07, 6.5662e-06, 3.4802e-06, 8.5850e-06, 2.8775e-06,
        4.2992e-06, 2.1470e-06, 2.5387e-06, 9.1084e-07, 9.2506e-07, 1.7033e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([6.5900e-05, 2.2874e-06, 2.1267e-06, 1.5826e-05, 8.6792e-07, 1.8846e-06,
        1.4885e-05, 2.3125e-06, 8.8610e-06, 7.6576e-07, 3.6663e-06, 5.8948e-07,
        1.7374e-06, 2.8841e-05, 1.6493e-06, 2.3084e-05, 2.0824e-06, 3.3895e-06,
        2.7787e-05, 6.0756e-06, 1.0200e-05, 3.6993e-07, 5.1625e-06, 1.4130e-06,
        2.7275e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([7.5518e-05, 1.8946e-06, 2.0791e-06, 1.0782e-05, 5.3011e-07, 8.6674e-07,
        1.2725e-05, 2.1499e-06, 7.9407e-06, 3.6340e-07, 2.7786e-06, 4.7299e-07,
        1.7468e-06, 2.8028e-05, 3.0174e-06, 3.0636e-06, 2.7473e-05, 1.5483e-06,
        2.7708e-06, 3.0974e-05, 2.6744e-06, 1.7180e-05, 1.4813e-06, 6.6966e-06,
        7.4780e-07, 1.8305e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.7190e-05, 1.8235e-06, 1.8300e-06, 8.2464e-06, 6.1796e-07, 8.5129e-07,
        9.1719e-06, 1.8073e-06, 6.8173e-06, 6.6906e-07, 2.3673e-06, 5.1343e-07,
        1.5861e-06, 1.8351e-05, 1.8427e-06, 1.2835e-05, 9.0100e-07, 2.1438e-06,
        1.8000e-05, 2.6087e-06, 6.2990e-06, 5.7624e-07, 4.5502e-06, 8.8692e-07,
        1.5607e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.7025e-05, 1.7202e-06, 2.3436e-06, 2.0173e-06, 1.3525e-06, 1.4971e-06,
        1.4045e-06, 2.0099e-06, 2.6030e-06, 2.9548e-07, 1.2905e-06, 2.1816e-05,
        2.6444e-06, 4.0547e-06, 4.6664e-06, 2.4104e-06, 1.1641e-06, 2.8046e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.8478e-05, 2.8065e-06, 3.5443e-06, 2.8599e-06, 2.6292e-06, 4.1228e-06,
        2.5786e-06, 3.8392e-06, 3.5834e-06, 1.0601e-06, 2.3297e-06, 7.3717e-06,
        5.6750e-06, 8.5689e-06, 1.4427e-06, 3.2080e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2150: [tensor([5.2993e-05, 2.8291e-06, 3.1145e-06, 1.0659e-05, 4.3062e-06, 6.1676e-06,
        9.1718e-06, 2.0253e-06, 8.3714e-07, 2.7005e-06, 2.0684e-05, 2.6172e-06,
        6.0764e-06, 6.6720e-06, 1.1910e-05, 1.8680e-06, 2.2347e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.3453e-05, 1.1301e-06, 1.5079e-06, 7.6000e-06, 4.2179e-06, 3.5547e-06,
        5.4942e-06, 1.7962e-06, 4.4754e-07, 1.1873e-06, 1.8334e-05, 9.6868e-07,
        2.8909e-06, 3.2928e-05, 2.5387e-06, 3.0667e-06, 2.8025e-06, 1.0918e-06,
        2.4901e-06, 2.4969e-06, 5.5967e-07, 1.9532e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.4525e-05, 1.1034e-05, 1.2915e-06, 8.8298e-06, 2.5477e-06, 9.1876e-07,
        1.4067e-05, 2.4735e-06, 3.5186e-06, 7.2926e-07, 8.5410e-07, 1.7193e-05,
        2.8862e-06, 1.6803e-05, 1.7116e-06, 2.5130e-06, 6.0650e-05, 2.8944e-06,
        4.5728e-06, 1.0678e-06, 1.6877e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0916e-05, 7.5289e-06, 1.8522e-06, 9.1996e-06, 2.0662e-06, 9.5089e-07,
        1.2721e-05, 1.4128e-06, 2.8566e-06, 3.9095e-07, 1.1226e-06, 1.7410e-05,
        2.3717e-06, 1.2004e-05, 1.9399e-06, 5.3153e-06, 4.7720e-05, 1.7596e-06,
        2.0580e-06, 1.0901e-06, 1.6838e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.4211e-05, 1.2453e-05, 2.1590e-06, 1.1353e-05, 2.0882e-06, 1.4275e-06,
        8.8281e-06, 2.1500e-06, 3.1098e-06, 7.5776e-07, 1.2860e-06, 1.5865e-05,
        3.2924e-06, 1.7235e-05, 1.7337e-06, 3.2407e-06, 2.0886e-06, 1.8672e-05,
        3.0136e-06, 2.1018e-06, 1.7717e-05, 3.7937e-06, 6.0758e-06, 7.0171e-07,
        1.6374e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4229e-05, 1.1205e-06, 1.1706e-05, 1.7153e-05, 7.2455e-07, 1.3689e-06,
        1.1329e-05, 5.8230e-07, 1.5912e-06, 7.9889e-07, 1.5650e-06, 1.0406e-06,
        2.6234e-06, 8.5126e-07, 2.9567e-06, 1.0706e-06, 2.3938e-06, 1.2383e-06,
        5.9982e-06, 2.1521e-05, 5.4258e-07, 2.9379e-07, 1.1348e-06, 8.6932e-06,
        2.3445e-06, 1.2345e-05, 9.8771e-06, 3.0071e-05, 1.4729e-06, 6.2265e-07,
        1.7042e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.6978e-05, 1.1458e-06, 1.0229e-05, 1.2895e-05, 3.0969e-07, 1.6242e-06,
        7.6907e-06, 6.1686e-07, 9.7939e-07, 6.4887e-07, 9.3406e-07, 1.2246e-06,
        3.5223e-06, 8.9304e-07, 3.2929e-06, 1.6690e-06, 2.1713e-06, 1.3369e-06,
        7.6817e-06, 1.2337e-05, 8.8688e-07, 7.3440e-07, 8.7315e-07, 6.5881e-06,
        1.6654e-06, 1.7437e-05, 3.6818e-05, 1.7146e-06, 1.1726e-06, 1.7746e-06,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.8585e-05, 6.9700e-07, 1.0394e-05, 1.1925e-05, 3.7995e-07, 1.2956e-06,
        9.8629e-06, 4.1351e-07, 1.5126e-06, 7.9335e-07, 1.0293e-06, 1.0072e-06,
        2.6835e-06, 7.4891e-07, 2.7033e-06, 9.2485e-07, 2.3428e-06, 1.7003e-06,
        5.0169e-06, 1.4401e-05, 8.3377e-07, 3.7336e-07, 9.6886e-07, 6.6107e-06,
        1.9920e-06, 9.6238e-06, 9.3143e-06, 2.6480e-05, 3.8807e-06, 7.2541e-07,
        1.7385e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.7546e-05, 1.4167e-06, 1.3784e-05, 2.2402e-05, 1.9058e-06, 1.2284e-06,
        7.7822e-07, 2.8997e-06, 1.9497e-06, 5.8415e-06, 1.2200e-05, 1.1657e-06,
        9.2932e-07, 3.3561e-07, 1.2496e-06, 8.0617e-06, 1.4718e-05, 7.7877e-06,
        2.2729e-06, 5.3126e-06, 2.4010e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.3237e-05, 1.5178e-06, 3.0763e-05, 1.9165e-05, 1.8150e-06, 1.4513e-06,
        1.3668e-06, 2.7098e-06, 2.0079e-06, 4.9847e-06, 1.2216e-05, 1.5203e-06,
        1.2899e-06, 2.9425e-07, 1.3536e-06, 6.5134e-06, 2.4883e-05, 4.3515e-05,
        1.8498e-06, 2.1131e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.3947e-05, 1.0327e-06, 1.4424e-05, 2.1615e-05, 1.5974e-06, 1.3374e-06,
        1.2445e-06, 2.3651e-06, 2.7512e-06, 3.5384e-06, 1.3802e-05, 1.3563e-06,
        7.4587e-07, 7.0828e-07, 1.2615e-06, 6.8489e-06, 3.4416e-05, 3.0418e-06,
        2.0532e-06, 1.3858e-06, 1.9308e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.4122e-05, 8.9778e-06, 1.0903e-06, 1.0137e-06, 1.0705e-06, 1.1877e-06,
        1.3258e-06, 2.2706e-06, 1.6720e-06, 1.0515e-06, 1.6151e-06, 1.3031e-06,
        2.3149e-06, 2.4684e-07, 7.5640e-07, 7.5716e-06, 1.8255e-06, 5.7238e-06,
        1.7085e-05, 1.5360e-06, 1.4520e-06, 2.6682e-06, 3.4707e-06, 1.0347e-06,
        2.4088e-06, 8.9526e-07, 2.5570e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2200: [tensor([3.1762e-05, 4.0158e-06, 6.9242e-06, 2.8459e-06, 2.0053e-06, 4.1865e-06,
        2.7609e-06, 1.2390e-06, 2.5151e-06, 2.1766e-06, 2.1828e-06, 9.1573e-06,
        5.9673e-06, 1.6095e-06, 3.1020e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.5613e-05, 1.9094e-06, 3.6730e-06, 1.9117e-06, 9.6828e-07, 2.1027e-06,
        1.9830e-06, 9.7954e-07, 1.6438e-06, 1.6774e-06, 3.1598e-06, 7.4526e-06,
        1.0058e-05, 4.6864e-05, 3.0428e-06, 4.3890e-06, 1.8834e-06, 2.6782e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.5338e-05, 7.0911e-06, 1.4664e-06, 5.6809e-06, 1.3495e-06, 1.0421e-06,
        1.0837e-06, 7.9950e-07, 1.1707e-06, 2.3595e-06, 4.8274e-06, 1.3667e-06,
        5.0547e-06, 1.0902e-06, 2.2102e-06, 1.4173e-05, 3.6998e-06, 9.2160e-07,
        3.9158e-07, 9.8782e-07, 9.5768e-07, 3.4888e-07, 7.7159e-07, 1.2921e-05,
        2.1845e-06, 1.6953e-06, 3.4758e-05, 4.9346e-06, 1.3111e-06, 1.6078e-06,
        2.5224e-06, 7.7141e-07, 1.7149e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.5121e-05, 1.1648e-05, 1.8074e-06, 6.5857e-06, 1.5343e-06, 1.4961e-06,
        1.7496e-06, 8.4940e-07, 1.6262e-06, 3.1370e-06, 5.1233e-06, 1.5725e-06,
        5.5523e-06, 1.2032e-06, 2.2181e-06, 1.4970e-05, 4.2613e-06, 1.3246e-06,
        5.0232e-07, 1.1902e-06, 8.9014e-07, 2.0947e-07, 1.2514e-06, 2.7839e-05,
        2.3037e-06, 1.1366e-06, 2.1373e-06, 1.9046e-06, 2.9264e-06, 3.0691e-06,
        5.4705e-07, 1.3365e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.7561e-05, 6.9676e-06, 1.2490e-06, 7.2546e-06, 1.7441e-06, 1.2176e-06,
        1.2390e-06, 9.2763e-07, 1.5582e-06, 3.1364e-06, 4.1582e-06, 1.3234e-06,
        6.3691e-06, 1.1704e-06, 2.6352e-06, 1.4263e-05, 3.5801e-06, 1.0870e-06,
        6.0060e-07, 1.4061e-06, 7.2660e-07, 3.8204e-07, 8.1955e-07, 3.3945e-05,
        1.7422e-06, 1.7113e-06, 1.1403e-06, 2.4340e-06, 1.8377e-06, 1.6997e-06,
        3.0923e-06, 1.3280e-06, 2.5469e-06, 7.0092e-06, 6.0272e-06, 6.1713e-07,
        1.6874e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.9789e-05, 1.5485e-05, 2.1215e-06, 3.8612e-06, 2.2312e-06, 4.5617e-06,
        5.6215e-06, 8.7849e-06, 1.5578e-06, 1.5847e-06, 1.3866e-05, 1.6656e-06,
        8.2554e-07, 1.4807e-06, 1.2281e-05, 4.7298e-06, 3.2149e-06, 1.3630e-05,
        6.0500e-06, 5.3461e-06, 4.1287e-06, 1.7770e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([7.4967e-05, 1.4523e-05, 2.0473e-06, 4.7712e-06, 2.7577e-06, 4.3057e-06,
        7.3010e-06, 1.0107e-05, 2.4494e-06, 1.1371e-06, 1.0525e-05, 1.7843e-06,
        6.7192e-07, 1.9436e-06, 1.1983e-05, 4.0239e-06, 3.9814e-06, 2.3139e-05,
        3.0862e-06, 2.6852e-06, 2.7070e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2533e-05, 1.1877e-05, 1.3176e-06, 3.5207e-06, 2.1663e-06, 2.4012e-06,
        6.0338e-06, 9.3284e-06, 1.2979e-06, 1.1256e-06, 1.1514e-05, 9.7210e-07,
        5.5571e-07, 1.4553e-06, 9.1913e-06, 2.8934e-06, 2.5405e-06, 1.1161e-05,
        4.2650e-06, 2.1171e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([7.6596e-05, 2.6571e-05, 3.5804e-06, 2.9405e-06, 6.3552e-06, 1.3231e-05,
        5.1952e-06, 2.7979e-06, 2.5960e-05, 2.8536e-06, 2.6365e-06, 1.0606e-06,
        2.2014e-06, 5.6137e-05, 5.7553e-06, 5.0044e-06, 2.9502e-05, 5.4086e-05,
        6.4731e-06, 4.8381e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.2654e-05, 1.1336e-05, 3.4409e-06, 3.6548e-06, 5.8935e-06, 8.9502e-06,
        1.9861e-06, 1.8711e-06, 1.8705e-05, 2.8090e-06, 2.7804e-06, 6.1906e-07,
        1.7384e-06, 4.0384e-05, 3.2541e-06, 5.1400e-06, 1.6661e-05, 5.1041e-05,
        3.6075e-06, 2.6259e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([7.6473e-05, 1.2256e-05, 3.1075e-06, 3.2825e-06, 1.1366e-05, 1.0326e-05,
        3.4380e-06, 1.9399e-06, 2.0896e-05, 2.9755e-06, 2.9678e-06, 5.7638e-07,
        2.3112e-06, 4.0190e-05, 3.7661e-06, 5.6534e-06, 2.9423e-05, 5.8440e-05,
        2.4060e-06, 5.3109e-06, 2.7965e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.6308e-05, 1.8727e-06, 9.2541e-07, 1.7600e-06, 2.5848e-06, 1.8360e-06,
        1.0057e-06, 2.2736e-06, 7.2174e-06, 8.7639e-07, 5.7102e-07, 1.6282e-06,
        8.6175e-06, 4.0785e-06, 8.9338e-06, 4.2882e-06, 6.7005e-05, 4.5210e-06,
        3.2282e-06, 1.5419e-06, 1.1993e-06, 2.7892e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2250: [tensor([4.9785e-05, 2.7918e-06, 1.1471e-06, 2.7624e-06, 1.2823e-06, 2.5847e-06,
        6.1294e-07, 1.6782e-06, 3.3351e-05, 4.2621e-06, 1.1165e-06, 9.1979e-06,
        1.1819e-05, 2.4031e-06, 3.7640e-06, 1.8086e-06, 5.3193e-06, 2.0599e-06,
        3.3547e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.9394e-05, 2.0365e-06, 2.9367e-06, 8.1224e-07, 1.6465e-05, 1.7260e-06,
        2.4226e-06, 8.0579e-07, 2.3932e-06, 9.8591e-07, 9.1457e-06, 1.7532e-06,
        5.5382e-06, 3.2614e-06, 3.7538e-07, 1.6813e-06, 3.0278e-06, 1.2931e-06,
        2.6380e-06, 1.7662e-06, 3.3408e-05, 3.4651e-06, 5.7216e-06, 3.4837e-06,
        2.4182e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.0767e-05, 1.7607e-06, 2.7777e-06, 5.8221e-07, 1.6539e-05, 1.6953e-06,
        2.0293e-06, 1.5146e-06, 1.6721e-06, 1.4208e-06, 1.6002e-05, 1.5764e-06,
        3.4300e-06, 3.3678e-06, 1.4166e-07, 1.5695e-06, 3.3105e-06, 2.1585e-06,
        8.1893e-06, 3.7303e-06, 2.2721e-06, 3.6312e-07, 3.5288e-06, 2.2526e-06,
        3.2114e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.2328e-05, 1.6024e-06, 1.9673e-06, 6.4819e-07, 1.0806e-05, 2.3648e-06,
        1.5941e-06, 5.8750e-07, 1.3552e-06, 1.2201e-06, 1.2901e-05, 1.6549e-06,
        2.8538e-06, 1.9782e-06, 5.2693e-07, 1.7432e-06, 5.0927e-06, 8.4047e-06,
        3.5681e-06, 1.5125e-06, 1.9464e-05, 3.4802e-06, 1.7408e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.9102e-05, 1.9898e-06, 4.2883e-06, 1.0050e-06, 7.5396e-06, 8.9156e-07,
        4.0271e-06, 1.6688e-05, 2.1771e-06, 2.0295e-06, 2.7877e-07, 2.5683e-06,
        3.0079e-06, 1.0504e-05, 4.0545e-06, 9.3352e-06, 4.5032e-06, 1.9282e-06,
        1.9759e-06, 3.9768e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.8859e-05, 2.8922e-06, 5.8611e-06, 2.9383e-06, 1.2376e-05, 8.4770e-07,
        5.5798e-06, 1.6480e-05, 3.1875e-06, 1.8392e-06, 1.9687e-07, 2.4387e-06,
        1.6871e-05, 2.1660e-06, 1.8137e-05, 3.2217e-06, 2.6787e-06, 1.4071e-06,
        1.8646e-06, 3.1085e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.9989e-05, 3.2898e-06, 4.8032e-06, 2.5011e-06, 1.1057e-05, 6.5013e-07,
        5.2349e-06, 1.0780e-05, 2.5664e-06, 2.3599e-06, 1.4075e-08, 1.8937e-06,
        1.6241e-05, 3.0826e-06, 3.5170e-06, 3.4272e-06, 6.7621e-05, 3.7095e-06,
        6.2291e-06, 1.3943e-06, 2.4778e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.5436e-05, 7.5960e-06, 1.5357e-06, 7.4357e-06, 2.2301e-06, 3.2695e-07,
        1.5921e-06, 3.6928e-06, 5.7994e-06, 1.8224e-06, 2.0804e-06, 4.4558e-06,
        1.1129e-06, 8.4522e-07, 9.3351e-07, 1.7056e-06, 4.8515e-06, 2.3924e-06,
        6.6877e-07, 1.2181e-05, 1.5712e-06, 1.2468e-06, 1.9070e-06, 5.5245e-06,
        2.0350e-06, 1.4231e-06, 7.1179e-05, 2.5295e-06, 1.8335e-06, 1.3560e-06,
        2.0372e-05, 2.2615e-06, 1.3079e-06, 1.2504e-06, 4.0752e-05, 2.5523e-06,
        1.2951e-06, 1.6805e-06, 2.3006e-06, 3.8129e-06, 2.1667e-06, 1.3387e-06,
        1.7583e-06, 1.1288e-05, 1.4922e-06, 6.6862e-07, 2.1143e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.8586e-05, 1.6047e-05, 1.7689e-06, 5.4934e-06, 1.9024e-06, 4.0605e-07,
        1.5327e-06, 4.6877e-06, 5.1289e-06, 1.8665e-06, 2.2470e-06, 5.3152e-06,
        1.0223e-06, 6.7493e-07, 8.0108e-07, 1.4962e-06, 3.3480e-06, 2.5484e-06,
        6.6518e-07, 1.2746e-05, 1.4786e-06, 1.4589e-06, 1.4436e-06, 5.8541e-06,
        1.6178e-06, 9.2174e-07, 3.0140e-05, 2.5328e-06, 8.3344e-07, 2.5468e-06,
        3.4663e-06, 1.7951e-06, 1.3386e-06, 6.3846e-06, 1.0766e-05, 1.9144e-06,
        7.3957e-07, 1.3513e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.1805e-05, 6.1837e-06, 1.6332e-06, 8.0690e-06, 2.1462e-06, 4.0047e-07,
        1.4180e-06, 4.3332e-06, 4.8109e-06, 2.3251e-06, 1.7323e-06, 4.6187e-06,
        1.0235e-06, 9.4478e-07, 9.4080e-07, 1.5200e-06, 4.3830e-06, 3.4357e-06,
        7.7578e-07, 1.4686e-05, 1.6936e-06, 1.5309e-06, 2.1561e-06, 5.2746e-06,
        1.7546e-06, 1.2829e-06, 1.7723e-06, 4.8243e-06, 1.0431e-06, 1.3479e-06,
        2.4421e-05, 2.5786e-06, 3.2428e-06, 1.0489e-05, 2.5389e-06, 3.7232e-06,
        1.3218e-05, 1.9905e-06, 5.8133e-07, 1.4410e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.9511e-05, 3.3218e-06, 7.3449e-06, 6.1731e-07, 1.5449e-05, 2.4594e-06,
        2.6618e-06, 1.9089e-06, 3.0609e-06, 1.4274e-06, 4.7982e-07, 2.1891e-06,
        4.1907e-06, 2.3131e-05, 1.6703e-06, 3.0897e-05, 2.2333e-06, 4.9782e-06,
        2.5737e-06, 1.5829e-06, 2.0563e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.3725e-05, 1.9622e-06, 7.6524e-06, 7.6618e-07, 1.5734e-05, 1.7794e-06,
        3.2646e-06, 1.5237e-06, 2.6344e-06, 1.8593e-06, 4.4223e-07, 1.0458e-06,
        2.5887e-06, 2.5899e-06, 2.4380e-06, 7.0702e-06, 3.1460e-06, 6.1416e-06,
        3.4148e-06, 3.3642e-06, 3.7281e-06, 8.7817e-07, 8.8445e-07, 2.4940e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2300: [tensor([5.8292e-05, 1.6163e-06, 1.6987e-06, 7.6380e-06, 1.6300e-06, 1.5209e-06,
        3.4664e-06, 2.0116e-06, 9.4213e-07, 2.0675e-06, 2.1071e-06, 1.4541e-06,
        2.1339e-05, 2.5114e-06, 3.1339e-06, 9.6855e-07, 2.3997e-06, 1.8773e-06,
        2.8192e-07, 1.4807e-06, 1.2139e-05, 1.7887e-06, 3.1341e-06, 8.4459e-06,
        3.2111e-06, 1.1559e-06, 4.0134e-06, 1.3892e-06, 2.4883e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.3954e-05, 2.3448e-05, 4.5046e-06, 7.8656e-06, 2.1270e-05, 3.0646e-06,
        3.3407e-06, 5.0617e-06, 4.2671e-06, 2.6470e-06, 9.2067e-07, 2.9558e-06,
        1.7342e-05, 6.4208e-06, 3.2123e-06, 5.4011e-05, 9.4973e-06, 7.2889e-06,
        5.4926e-06, 3.2641e-06, 1.6419e-06, 4.5708e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.3809e-05, 1.3617e-05, 4.4247e-06, 7.5621e-06, 1.7868e-05, 1.5092e-06,
        2.7230e-06, 2.1926e-06, 3.6520e-06, 2.1195e-06, 4.5420e-07, 2.3779e-06,
        3.1736e-05, 2.6605e-06, 3.4288e-06, 3.4229e-06, 8.8187e-06, 5.7371e-06,
        3.7949e-06, 4.2949e-06, 1.3709e-06, 2.1458e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.0363e-05, 2.1753e-05, 4.5397e-06, 6.8183e-06, 1.9975e-05, 2.3440e-06,
        2.9316e-06, 3.2340e-06, 3.4368e-06, 2.6362e-06, 6.3158e-07, 2.6285e-06,
        6.0553e-05, 2.1642e-06, 3.4818e-06, 6.1149e-06, 4.6754e-06, 2.3087e-06,
        3.2077e-06, 2.9214e-06, 1.4275e-06, 3.4663e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([8.2562e-05, 2.2545e-05, 3.4914e-06, 3.7289e-06, 2.6745e-05, 3.0457e-06,
        2.8212e-06, 4.9612e-06, 1.9023e-05, 2.3879e-06, 1.2158e-06, 1.9037e-06,
        3.1569e-05, 3.8437e-06, 4.0279e-06, 6.9719e-06, 3.9966e-06, 5.2128e-06,
        1.4544e-06, 3.0071e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.6646e-05, 1.8996e-05, 2.9633e-06, 2.6963e-06, 1.2539e-05, 2.1380e-06,
        2.2305e-06, 4.2143e-06, 1.7633e-05, 2.2509e-06, 1.6421e-06, 2.4081e-06,
        4.5618e-05, 3.7099e-06, 3.3429e-06, 1.3621e-05, 5.6389e-05, 4.1986e-06,
        4.4113e-06, 1.5520e-06, 2.4858e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([8.5183e-05, 2.4229e-05, 3.8619e-06, 4.1735e-06, 3.1244e-05, 2.8957e-06,
        4.6571e-06, 6.8406e-06, 2.4280e-05, 4.5098e-06, 1.9948e-06, 2.7111e-06,
        4.4568e-05, 5.3679e-06, 5.0293e-06, 8.0430e-05, 6.6592e-06, 7.2140e-06,
        1.0139e-05, 3.7495e-06, 3.6723e-06, 1.6321e-06, 3.8510e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([6.9014e-05, 2.3607e-06, 3.4908e-06, 6.2981e-07, 1.1524e-06, 2.1159e-06,
        1.5516e-05, 9.0903e-07, 3.4102e-06, 1.6235e-05, 1.3695e-06, 1.0169e-06,
        1.7109e-06, 3.1227e-06, 4.7680e-06, 2.6266e-06, 6.0274e-05, 3.0894e-06,
        1.8286e-06, 2.3644e-06, 2.0726e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.6744e-05, 2.1219e-06, 3.2400e-06, 6.8155e-07, 1.7034e-06, 1.7425e-06,
        1.5247e-05, 1.3970e-06, 3.1750e-06, 1.3028e-05, 1.1616e-06, 1.8569e-07,
        1.8887e-06, 2.9590e-05, 3.1042e-06, 5.8454e-06, 4.0639e-06, 5.8076e-06,
        2.9165e-06, 2.2825e-06, 8.9933e-07, 2.3505e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.5611e-05, 2.3498e-06, 3.4929e-06, 6.6940e-07, 1.9627e-06, 2.6769e-06,
        2.1269e-05, 1.0048e-06, 4.3943e-06, 1.4683e-05, 1.2239e-06, 6.8777e-07,
        2.8197e-06, 2.6209e-06, 4.3619e-06, 2.6597e-06, 6.8077e-06, 2.4685e-06,
        4.5239e-05, 4.8931e-06, 3.0095e-06, 5.9664e-07, 3.7543e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([7.2122e-05, 1.4524e-05, 9.6911e-07, 1.4149e-06, 3.5260e-06, 5.1822e-07,
        2.7466e-06, 3.6483e-06, 1.8468e-06, 9.0075e-06, 1.3373e-05, 3.2852e-06,
        4.2310e-06, 3.3679e-06, 8.6986e-07, 4.6670e-06, 2.4445e-06, 2.9278e-06,
        3.0903e-06, 7.1894e-07, 1.4452e-06, 4.7665e-05, 2.4512e-06, 2.6376e-06,
        5.8232e-06, 5.0627e-06, 8.0249e-06, 2.3916e-06, 3.8561e-06, 5.9610e-06,
        1.0083e-05, 4.7200e-05, 3.9479e-06, 4.6216e-06, 2.0062e-06, 2.7494e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([8.0076e-05, 1.2592e-05, 8.7194e-07, 1.4386e-06, 3.7736e-06, 5.1381e-07,
        1.8832e-06, 2.9572e-06, 1.5846e-06, 1.1477e-05, 8.3046e-06, 2.4045e-06,
        3.5104e-06, 3.5447e-06, 2.4122e-07, 2.3236e-06, 2.1109e-06, 2.0074e-06,
        3.2183e-06, 9.2351e-08, 1.0264e-06, 2.6634e-05, 2.6837e-06, 6.1211e-06,
        4.4204e-06, 6.9154e-06, 4.4063e-06, 2.5386e-06, 2.7447e-06, 4.0479e-06,
        1.6806e-06, 9.8819e-07, 2.1994e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2350: [tensor([8.6163e-05, 2.8842e-06, 5.7118e-06, 8.3169e-06, 5.4486e-06, 1.4291e-05,
        1.8614e-06, 4.0829e-06, 2.5802e-06, 3.0333e-06, 3.2262e-06, 4.7156e-06,
        7.1248e-05, 4.7203e-06, 3.4567e-08, 3.0522e-06, 1.7405e-05, 6.9288e-06,
        1.2785e-05, 5.8198e-06, 3.9299e-06, 1.4390e-06, 6.1363e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([7.5879e-05, 4.0705e-06, 4.0547e-06, 8.7890e-06, 5.6806e-06, 2.7091e-05,
        7.5462e-06, 3.0774e-06, 3.1375e-07, 2.9415e-06, 1.6552e-05, 6.6129e-06,
        2.2255e-05, 1.0114e-05, 1.9326e-06, 6.0066e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([9.4062e-05, 5.6305e-06, 7.9347e-06, 5.7119e-06, 8.0851e-06, 3.2612e-05,
        7.7720e-06, 5.2285e-06, 2.4814e-06, 5.1107e-06, 2.6468e-05, 6.2607e-06,
        1.8839e-05, 5.8987e-06, 3.8544e-06, 1.0233e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.0546e-04, 4.1419e-06, 8.0212e-06, 6.4355e-06, 7.7426e-06, 2.6025e-05,
        8.1814e-06, 3.5152e-06, 2.1806e-06, 4.2386e-06, 2.6506e-05, 6.4589e-06,
        1.9588e-05, 4.5600e-06, 2.9886e-06, 4.4862e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([9.2775e-05, 2.5406e-05, 2.8102e-06, 4.9826e-06, 2.8393e-05, 2.6070e-06,
        1.9213e-06, 4.9544e-06, 1.3483e-06, 3.6869e-06, 5.8048e-06, 2.3213e-05,
        6.2779e-06, 6.9370e-06, 7.9080e-06, 5.2951e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.1364e-04, 2.6745e-05, 4.6230e-06, 4.7253e-06, 4.9260e-05, 2.0694e-06,
        2.4507e-06, 5.2683e-06, 3.1726e-06, 3.8999e-06, 9.2003e-06, 3.2944e-05,
        7.5419e-06, 6.8130e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([8.6492e-05, 3.1834e-05, 5.4753e-06, 6.6320e-06, 4.4097e-05, 3.1737e-06,
        2.2175e-06, 4.8953e-06, 2.1314e-06, 4.1306e-06, 7.4887e-06, 3.9056e-05,
        1.1707e-05, 5.0001e-06, 4.9917e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([9.0334e-05, 1.7086e-05, 4.2742e-06, 3.0029e-06, 9.9240e-06, 7.3979e-06,
        3.9881e-06, 5.1242e-06, 3.3912e-06, 3.8026e-06, 3.0864e-06, 3.3454e-06,
        1.8690e-06, 1.6672e-06, 3.9143e-06, 4.4892e-05, 2.6831e-06, 2.6719e-06,
        4.7713e-06, 1.8961e-06, 2.2174e-07, 2.1287e-06, 1.6571e-05, 3.6110e-06,
        4.5328e-05, 6.3720e-06, 1.7852e-06, 4.0639e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([8.2610e-05, 2.3275e-05, 5.3290e-06, 2.5968e-06, 8.1142e-06, 8.3008e-06,
        3.0138e-06, 3.3827e-06, 2.9341e-06, 4.3292e-06, 3.1013e-06, 2.0429e-06,
        1.7111e-06, 1.8781e-06, 4.0108e-06, 2.3286e-05, 2.2943e-06, 2.5965e-06,
        3.4159e-06, 2.3458e-06, 5.5413e-07, 2.2167e-06, 1.7689e-05, 2.3158e-06,
        3.4989e-05, 2.6694e-06, 1.4159e-06, 4.4263e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.5220e-05, 1.5221e-05, 2.8872e-06, 1.3463e-06, 7.3165e-06, 5.5052e-06,
        2.1082e-06, 2.4146e-06, 1.8654e-06, 2.2666e-06, 1.6668e-06, 1.4815e-06,
        1.0929e-06, 7.8620e-07, 2.4851e-06, 1.5993e-05, 2.1965e-06, 1.3008e-06,
        2.3966e-06, 9.6752e-07, 3.5424e-07, 1.4556e-06, 8.3687e-06, 2.1510e-06,
        2.6520e-05, 1.0711e-06, 1.4810e-06, 2.0913e-06, 3.0657e-05, 5.8235e-06,
        1.4773e-06, 1.4304e-06, 2.2308e-06, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([7.8002e-05, 1.4507e-05, 1.2662e-06, 1.7867e-06, 1.2606e-06, 2.2387e-06,
        2.4206e-06, 1.1927e-06, 9.2633e-07, 9.7670e-06, 1.0409e-06, 7.1146e-06,
        1.6316e-05, 1.6564e-06, 3.1767e-06, 1.3627e-06, 1.8512e-06, 2.3504e-06,
        1.9595e-06, 6.7481e-06, 2.2125e-06, 3.5494e-06, 7.7602e-07, 1.7353e-06,
        3.9020e-05, 2.2140e-06, 1.9445e-06, 6.0152e-06, 3.7702e-06, 4.9394e-06,
        3.2640e-06, 2.1922e-06, 1.0445e-06, 2.0059e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([7.0110e-05, 1.5069e-05, 1.3905e-06, 2.1895e-06, 1.7652e-06, 1.9509e-06,
        1.7249e-06, 1.4249e-06, 1.0471e-06, 1.1628e-05, 1.9191e-06, 6.7437e-06,
        1.2925e-05, 2.2501e-06, 2.8090e-06, 1.5390e-06, 1.4273e-06, 2.0746e-06,
        3.1912e-06, 6.0858e-06, 1.5729e-06, 4.0550e-06, 4.9865e-07, 1.3579e-06,
        7.4747e-05, 2.0830e-06, 2.8725e-06, 3.3279e-06, 2.2153e-06, 5.0131e-06,
        1.7826e-06, 1.9219e-06, 1.0886e-06, 1.9461e-06], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2400: [tensor([8.1235e-05, 3.5015e-05, 4.2895e-06, 4.6871e-06, 3.5304e-06, 3.4002e-06,
        2.9158e-05, 2.3399e-06, 4.6901e-06, 2.6073e-06, 2.9661e-06, 3.2369e-06,
        8.2345e-07, 3.4331e-06, 4.2673e-05, 8.1725e-06, 5.2629e-05, 5.2359e-06,
        2.7990e-06, 5.5316e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.4330e-05, 2.2689e-05, 3.8886e-06, 2.2748e-06, 1.6734e-06, 1.9245e-06,
        1.4229e-05, 1.2245e-06, 3.8503e-06, 2.4160e-06, 1.9368e-06, 2.0859e-06,
        9.4573e-07, 1.8658e-06, 4.8957e-05, 3.9895e-06, 4.2916e-06, 3.5930e-06,
        3.5733e-05, 3.1263e-06, 4.3589e-06, 9.6624e-07, 3.4794e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.5111e-04, 2.2052e-05, 4.9706e-06, 3.5733e-06, 3.6067e-06, 2.4437e-06,
        4.3567e-05, 1.7064e-06, 6.7419e-06, 4.3883e-06, 3.1417e-06, 2.4479e-06,
        1.2860e-06, 3.3782e-06, 4.9053e-05, 7.5126e-06, 5.0624e-05, 6.7119e-06,
        4.4589e-06, 6.9192e-05, 3.9695e-06, 6.3468e-06, 3.1571e-06, 2.5961e-06,
        5.4664e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.8013e-05, 3.5668e-05, 5.9267e-06, 5.2665e-06, 3.5953e-06, 2.8171e-06,
        1.8195e-05, 1.8924e-06, 6.2750e-06, 2.9341e-06, 1.7318e-06, 3.1464e-06,
        1.5673e-06, 3.2544e-06, 5.3194e-05, 6.7446e-06, 6.1693e-05, 5.4060e-06,
        3.8713e-06, 8.4784e-05, 8.1960e-06, 2.9505e-06, 6.6795e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([7.9966e-05, 2.5298e-05, 3.9159e-06, 3.9764e-06, 3.8379e-06, 3.1777e-06,
        1.7753e-05, 2.7572e-06, 5.7080e-06, 3.5970e-06, 2.3300e-06, 3.4961e-06,
        1.1789e-06, 2.7879e-06, 3.5620e-05, 7.9516e-06, 5.7898e-05, 5.2982e-06,
        5.0951e-05, 2.6719e-06, 6.5257e-06, 6.6803e-07, 3.5363e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.2339e-04, 3.9379e-05, 1.0724e-05, 5.0685e-06, 5.1197e-06, 5.8270e-06,
        4.0437e-05, 2.7186e-06, 7.9326e-06, 4.1699e-06, 4.4500e-06, 3.6253e-06,
        3.5327e-06, 5.3634e-06, 7.3233e-05, 9.4577e-06, 7.0082e-06, 9.6127e-06,
        3.0561e-05, 5.9752e-06, 4.2721e-06, 1.8721e-06, 6.4016e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.3662e-05, 2.1268e-05, 4.2943e-06, 3.7635e-06, 3.4313e-06, 3.1089e-06,
        1.7049e-05, 1.7452e-06, 4.2117e-06, 3.1426e-06, 2.7478e-06, 2.0232e-06,
        2.2056e-06, 1.9661e-06, 2.3550e-05, 6.0550e-06, 5.2075e-05, 8.5889e-06,
        4.8759e-06, 4.7594e-06, 3.6409e-06, 4.2949e-06, 1.7510e-06, 4.6276e-06,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.1084e-04, 2.7064e-05, 7.1836e-06, 6.2278e-06, 4.7219e-06, 4.1020e-06,
        3.2802e-05, 1.8975e-06, 6.5113e-06, 3.2383e-06, 1.8680e-06, 2.6583e-06,
        1.1412e-06, 2.8078e-06, 6.9951e-05, 7.1206e-06, 1.1342e-05, 2.0671e-06,
        6.5327e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.0445e-04, 4.7413e-05, 7.3884e-06, 4.7800e-06, 4.3820e-06, 2.8016e-06,
        3.5330e-05, 2.3525e-06, 9.1725e-06, 5.2489e-06, 3.0770e-06, 3.4964e-06,
        3.3857e-07, 4.2429e-06, 6.5059e-05, 1.1287e-05, 6.9714e-05, 8.2735e-06,
        4.0653e-06, 1.2922e-05, 3.0215e-06, 4.9964e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.1102e-04, 4.1734e-06, 5.2888e-06, 8.4089e-06, 4.4880e-06, 1.4197e-05,
        2.8258e-06, 2.1571e-06, 3.2967e-06, 4.4909e-06, 2.0318e-06, 1.3326e-05,
        3.7873e-06, 5.8817e-07, 2.5675e-06, 2.2980e-05, 9.7454e-06, 4.0274e-05,
        6.7668e-06, 6.3338e-06, 3.9063e-06, 2.0485e-06, 5.1046e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.4114e-04, 5.8564e-06, 6.1011e-06, 1.0205e-05, 5.5802e-06, 1.8357e-05,
        4.6231e-06, 4.5447e-06, 5.2507e-06, 5.5765e-06, 3.5594e-06, 1.9654e-05,
        5.5248e-06, 1.0273e-06, 4.1630e-06, 3.1757e-05, 1.0733e-05, 4.5217e-05,
        8.0309e-06, 1.0250e-04, 1.0003e-05, 2.7025e-06, 7.7299e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.1878e-04, 3.9196e-06, 3.6937e-06, 7.3793e-06, 3.6980e-06, 1.0524e-05,
        2.6996e-06, 3.3640e-06, 3.3289e-06, 4.4441e-06, 3.0438e-06, 1.4448e-05,
        6.0751e-06, 1.2489e-06, 2.3897e-06, 7.3466e-05, 1.1952e-05, 3.3799e-05,
        2.1117e-05, 4.0347e-06, 4.4733e-06, 5.5683e-06, 2.7141e-06, 4.6892e-06,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2450: [tensor([4.2507e-05, 4.1431e-06, 6.9567e-06, 2.0372e-06, 3.7378e-06, 3.7613e-06,
        6.5112e-06, 3.3225e-06, 3.9159e-06, 6.5242e-06, 1.3973e-05, 1.1273e-06,
        5.4767e-06, 3.8401e-06, 2.0749e-06, 4.5695e-05, 2.7560e-06, 5.0999e-06,
        2.2224e-06, 4.4054e-06, 1.5401e-06, 2.6654e-06, 5.3955e-06, 8.8168e-06,
        1.2156e-05, 6.1150e-06, 5.6147e-06, 4.2154e-06, 3.2338e-06, 4.6409e-06,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.0071e-04, 3.8273e-06, 4.0727e-06, 1.0367e-06, 2.7523e-06, 3.6295e-06,
        4.0731e-06, 2.3957e-06, 6.1654e-06, 4.6488e-06, 1.1052e-05, 2.0392e-06,
        3.6964e-06, 4.8117e-06, 2.2157e-06, 3.1192e-05, 2.2859e-06, 4.1107e-06,
        1.7395e-06, 2.9487e-06, 7.4001e-07, 2.4980e-06, 5.5171e-06, 6.5492e-06,
        6.2849e-05, 2.1816e-06, 3.7634e-06, 1.9280e-05, 9.2636e-06, 1.4808e-06,
        4.0412e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.6873e-05, 3.1311e-06, 5.6000e-06, 3.2410e-06, 4.8099e-06, 3.3558e-06,
        6.2423e-06, 2.7899e-06, 4.2528e-06, 3.2083e-05, 3.1527e-06, 5.9752e-06,
        1.0049e-06, 3.3581e-06, 6.6171e-06, 1.1907e-05, 2.2147e-05, 2.4270e-05,
        2.2763e-05, 4.1281e-06, 1.0726e-05, 1.3654e-04, 1.0434e-05, 2.5947e-06,
        5.5148e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.0465e-04, 5.4254e-06, 6.3413e-06, 3.2431e-06, 5.4905e-06, 5.3616e-06,
        5.9608e-06, 3.9616e-06, 4.1838e-06, 4.4064e-05, 2.3543e-06, 8.3890e-06,
        2.4479e-06, 4.0706e-06, 7.7113e-06, 2.7925e-05, 1.2926e-05, 1.0625e-04,
        5.9497e-06, 1.0424e-05, 3.6937e-06, 7.2879e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([9.5603e-05, 4.4078e-06, 6.7853e-06, 1.7582e-06, 3.2772e-06, 5.9109e-06,
        6.1929e-06, 3.0945e-06, 3.5460e-06, 3.4982e-05, 2.4169e-06, 5.6458e-06,
        1.7181e-06, 3.7405e-06, 1.9775e-06, 9.7795e-06, 2.5097e-05, 1.0350e-05,
        4.7884e-06, 1.2806e-05, 2.5842e-05, 1.0956e-05, 3.7973e-06, 6.1947e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.9050e-04, 5.7522e-06, 1.0694e-05, 2.7408e-06, 4.8618e-06, 1.7718e-06,
        1.0100e-05, 9.4313e-06, 8.4398e-06, 8.7612e-06, 2.9332e-05, 2.5223e-06,
        7.7364e-06, 3.0294e-06, 4.4452e-06, 1.1380e-04, 1.1659e-05, 8.5334e-06,
        7.6481e-05, 6.5076e-06, 1.9144e-05, 1.1015e-05, 1.3963e-05, 7.9438e-05,
        1.4749e-05, 4.5006e-06, 8.8728e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.8509e-05, 3.7298e-06, 7.5963e-06, 2.9120e-06, 4.6398e-06, 1.7771e-06,
        7.9956e-06, 9.5321e-06, 4.2446e-06, 6.0563e-06, 2.1050e-05, 2.1355e-06,
        6.1314e-06, 1.2254e-06, 2.9501e-06, 2.4740e-05, 9.8579e-06, 1.5640e-05,
        2.1463e-05, 7.6659e-06, 3.1203e-06, 4.1358e-06, 7.1538e-06, 1.0296e-05,
        2.1759e-06, 6.0459e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.3517e-04, 4.7112e-06, 8.1649e-06, 2.5530e-06, 3.5633e-06, 2.1629e-06,
        7.7398e-06, 5.6506e-06, 6.1679e-06, 8.0526e-06, 1.9271e-05, 3.0267e-06,
        9.1836e-06, 1.3415e-06, 1.9918e-06, 5.3985e-06, 8.7259e-06, 4.6444e-06,
        4.1633e-06, 9.0679e-06, 1.2870e-05, 1.2467e-05, 1.6668e-06, 6.1696e-05,
        4.6271e-06, 4.4181e-06, 1.2949e-05, 3.1320e-06, 5.3632e-06, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.3955e-04, 3.1768e-05, 6.3264e-06, 3.2017e-06, 5.8725e-06, 3.4335e-06,
        6.3377e-06, 4.5276e-06, 5.8862e-06, 7.0390e-06, 4.1604e-07, 5.8296e-06,
        7.2315e-06, 5.6391e-05, 1.5002e-04, 1.4883e-05, 9.3322e-06, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.1501e-04, 3.9473e-05, 6.7806e-06, 3.6766e-06, 6.3374e-06, 3.4466e-06,
        5.0897e-06, 4.2057e-06, 6.7271e-06, 1.0373e-05, 4.8075e-07, 4.5121e-06,
        5.2161e-06, 4.1598e-05, 8.6214e-05, 2.2572e-05, 2.6973e-06, 1.2284e-05,
        5.5610e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0794e-04,  3.8053e-05,  7.3284e-06,  3.3573e-06,  7.2860e-06,
         3.5490e-06,  5.1967e-06,  2.6528e-06,  5.9894e-06,  9.4783e-06,
        -4.9763e-07,  4.7291e-06,  4.3018e-06,  3.4447e-05,  1.7848e-05,
         2.0982e-06,  1.4864e-05,  6.0889e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([9.2439e-05, 8.7841e-05, 6.8691e-06, 8.2956e-06, 3.8416e-06, 7.5334e-06,
        1.7968e-05, 3.8865e-06, 6.3934e-06, 2.4674e-06, 1.8677e-06, 1.2529e-06,
        5.0884e-06, 3.7512e-05, 8.5172e-06, 8.2779e-05, 1.2268e-05, 1.2517e-04,
        6.4827e-06, 2.3655e-06, 7.6322e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2500: [tensor([1.0550e-04, 3.3502e-05, 7.6209e-06, 5.1095e-06, 6.3766e-06, 3.4306e-06,
        1.1133e-05, 5.9551e-06, 5.3449e-06, 1.6818e-05, 3.6932e-05, 1.4168e-06,
        2.3742e-06, 3.8357e-06, 6.5459e-05, 5.9802e-06, 8.9231e-06, 6.5040e-06,
        1.7545e-05, 8.4972e-06, 2.5774e-05, 9.7537e-06, 3.0542e-06, 6.6463e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.6461e-04, 4.8092e-05, 1.1726e-05, 7.4873e-06, 8.9578e-06, 2.7000e-06,
        1.5671e-05, 5.3264e-06, 6.2509e-06, 2.8628e-05, 3.9477e-05, 1.8559e-06,
        1.8708e-06, 6.0913e-06, 4.6988e-05, 8.8855e-06, 9.8658e-06, 3.7373e-05,
        1.3011e-05, 6.0803e-06, 3.7783e-06, 7.5415e-06, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.1914e-04, 7.1793e-05, 1.1311e-05, 4.7759e-06, 1.1525e-05, 3.3578e-06,
        1.6099e-05, 5.8381e-06, 8.1062e-06, 3.6522e-05, 5.8039e-05, 2.1899e-06,
        1.6980e-06, 5.6410e-06, 4.4411e-05, 1.0628e-05, 9.1534e-06, 3.8902e-05,
        6.7297e-06, 5.9209e-06, 8.5577e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.7671e-04, 2.5501e-05, 1.6287e-06, 9.6942e-06, 5.3096e-06, 1.3919e-05,
        2.5262e-05, 4.1664e-06, 3.4771e-06, 5.6368e-06, 3.7210e-06, 5.8326e-06,
        3.2267e-05, 6.7365e-06, 4.5055e-06, 1.2200e-04, 6.5840e-06, 1.8378e-06,
        3.8333e-06, 4.2101e-05, 7.6890e-06, 7.7963e-05, 1.5629e-05, 8.3366e-06,
        1.4206e-05, 1.3224e-05, 4.3410e-06, 5.0033e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0908e-04,  2.2280e-05, -5.9355e-08,  7.9733e-06,  6.4470e-06,
         2.0396e-05,  3.3140e-05,  5.4095e-06,  2.6734e-06,  5.1271e-06,
         2.5694e-06,  5.6075e-06,  4.6910e-05,  6.8340e-06,  6.0579e-06,
         5.7313e-05,  5.7097e-06,  2.4469e-06,  4.5814e-06,  2.9682e-05,
         7.0181e-06,  7.0083e-05,  6.9296e-06,  6.2115e-06,  4.9095e-05,
         9.5812e-06,  2.0402e-06,  7.5780e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1263e-04,  4.5471e-05, -1.1881e-07,  8.9647e-06,  5.1120e-06,
         1.9322e-05,  4.5933e-05,  4.1689e-06,  3.4238e-06,  4.4630e-06,
         3.4274e-06,  5.3940e-06,  4.9352e-05,  8.9633e-06,  3.3384e-06,
         8.4135e-05,  6.4881e-06,  2.1238e-06,  4.7850e-06,  3.5018e-05,
         1.1070e-05,  4.5853e-05,  8.8769e-06,  4.6239e-05,  1.5758e-05,
         6.2240e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.0839e-04, 6.1530e-05, 4.5356e-06, 1.3264e-05, 4.9137e-06, 7.1988e-06,
        3.9039e-06, 5.5234e-06, 9.4343e-06, 4.3157e-06, 7.5624e-06, 3.3362e-06,
        7.8063e-05, 1.7301e-06, 7.5716e-07, 9.0236e-06, 2.5065e-06, 6.6218e-06,
        3.0656e-05, 6.2139e-05, 1.4053e-04, 6.4480e-06, 1.5535e-05, 1.2205e-05,
        6.1974e-06, 9.3037e-06, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7775e-04,  4.6278e-05,  5.3428e-06,  1.1033e-05,  7.6743e-06,
         6.5328e-06,  4.2868e-06,  4.9249e-06,  1.0587e-05,  4.1761e-06,
         3.5522e-06,  1.7895e-06,  4.3037e-05,  2.0090e-06, -4.7311e-08,
         9.4987e-06,  3.0303e-06,  7.0782e-06,  8.4095e-06,  1.7398e-05,
         4.0072e-05,  7.5071e-06,  2.9796e-05,  1.2603e-05,  2.4452e-05,
         6.4500e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3940e-04,  4.0088e-05,  8.3842e-06,  1.2727e-05,  6.2920e-06,
         7.0926e-06,  4.7936e-06,  5.8884e-06,  1.1798e-05,  3.5762e-06,
         5.1350e-06,  2.5600e-06,  4.7848e-05,  1.2135e-06, -2.0813e-07,
         8.0458e-06,  2.9531e-06,  6.7230e-06,  1.1218e-05,  2.8233e-05,
         9.7038e-06,  5.1279e-06,  6.6768e-06,  2.8311e-05,  1.0510e-05,
         2.6867e-06,  7.9047e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.1551e-04, 4.6068e-05, 5.8639e-06, 9.7896e-06, 4.3903e-05, 7.8696e-06,
        6.7771e-05, 9.8223e-06, 8.1549e-06, 1.4515e-05, 7.6107e-06, 7.5889e-06,
        4.9979e-06, 8.2319e-06, 2.1544e-07, 8.2942e-06, 8.5549e-05, 1.1176e-05,
        1.5101e-05, 5.6202e-05, 1.0489e-05, 1.3821e-05, 1.0568e-05, 1.2294e-05,
        6.2564e-06, 1.2750e-05, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.0523e-04, 3.4301e-05, 2.6712e-06, 7.6454e-06, 3.2900e-05, 4.8467e-06,
        6.4304e-05, 8.1085e-06, 8.2856e-06, 1.5247e-05, 5.6464e-06, 8.0816e-06,
        4.9509e-06, 8.9049e-06, 1.1287e-07, 8.4548e-06, 5.6020e-05, 1.6621e-05,
        1.0699e-05, 1.5174e-04, 9.7215e-06, 1.4089e-05, 6.4055e-06, 9.3049e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.2984e-04, 5.3083e-05, 2.7377e-06, 4.8253e-06, 3.1433e-05, 4.8270e-06,
        3.5539e-05, 6.9878e-06, 6.4706e-06, 7.2472e-06, 5.3479e-06, 5.9600e-06,
        4.1251e-06, 8.3019e-06, 1.2446e-06, 5.7211e-06, 5.9026e-05, 1.5334e-05,
        2.4740e-05, 8.2488e-06, 7.3637e-05, 3.5004e-06, 5.2505e-06, 8.2319e-06,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2550: [tensor([2.0933e-04, 9.6619e-05, 1.1463e-05, 2.1114e-05, 1.1338e-05, 2.7385e-06,
        9.1401e-05, 7.9472e-06, 1.3647e-05, 4.3616e-06, 6.2168e-06, 4.1260e-05,
        1.2939e-04, 1.5843e-05, 1.0466e-05, 2.7449e-05, 1.9712e-05, 1.3102e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([8.4055e-05, 8.5241e-06, 5.8136e-06, 6.9174e-06, 3.0461e-06, 9.3325e-06,
        4.9135e-05, 1.0680e-05, 2.4953e-06, 7.6795e-06, 8.2373e-06, 1.1083e-05,
        1.2956e-05, 1.6247e-05, 1.1455e-05, 1.5009e-05, 1.8260e-05, 1.0692e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.2198e-04, 1.7179e-05, 8.0530e-06, 1.6510e-05, 9.2202e-06, 1.4936e-05,
        1.0739e-04, 1.5749e-05, 6.3299e-06, 1.5137e-05, 3.7889e-05, 2.4828e-05,
        7.7640e-05, 1.5066e-05, 1.8133e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8034e-04, 1.1315e-05, 8.9397e-06, 9.3164e-06, 7.0699e-06, 1.0752e-05,
        9.4727e-05, 1.3498e-05, 3.9087e-06, 9.1203e-06, 1.3159e-05, 1.3203e-05,
        2.8181e-05, 1.5734e-05, 1.1110e-05, 1.0672e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.8038e-04, 7.3632e-05, 1.7960e-05, 1.8345e-05, 1.2896e-04, 1.1066e-05,
        1.0212e-05, 1.9822e-05, 1.0768e-05, 8.0483e-05, 2.0485e-06, 1.7839e-05,
        1.5386e-05, 3.5189e-05, 8.3724e-06, 2.7608e-06, 1.3003e-05, 3.3664e-04,
        3.2557e-05, 1.1456e-04, 1.4532e-05, 1.4289e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2579e-04,  8.5919e-05,  1.0830e-05,  1.1535e-05,  8.7556e-05,
         9.4004e-06,  1.2901e-05,  1.8849e-05,  1.6221e-05,  8.5470e-05,
         3.7882e-06,  1.3633e-05,  1.3675e-05,  5.6001e-05,  6.9761e-06,
        -8.8521e-07,  1.0975e-05,  2.0143e-04,  2.5696e-05,  8.3937e-05,
         1.5577e-05,  2.4903e-04,  1.5631e-05,  1.5710e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([2.7204e-04, 8.2500e-05, 2.4101e-05, 1.6844e-05, 1.0909e-04, 1.7236e-05,
        1.1392e-05, 2.7498e-05, 8.8663e-06, 1.2907e-04, 5.7246e-06, 2.0303e-05,
        1.8418e-05, 8.8169e-05, 8.2851e-06, 3.3343e-06, 1.3014e-05, 3.4681e-04,
        2.5865e-05, 1.3783e-04, 1.6974e-05, 1.4448e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8019e-04, 6.9793e-06, 8.2495e-06, 1.2526e-05, 2.9391e-05, 6.0765e-06,
        1.1750e-05, 5.8405e-06, 4.4936e-06, 7.3310e-06, 8.0554e-06, 2.3413e-06,
        9.8928e-06, 8.7639e-06, 1.4472e-05, 9.0634e-06, 1.0852e-04, 1.0162e-05,
        8.6996e-06, 1.9020e-05, 2.3515e-05, 6.2172e-05, 6.1046e-06, 5.6407e-05,
        4.7786e-05, 1.0711e-05, 1.9887e-05, 7.1888e-06, 1.1341e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([5.7849e-05, 5.3242e-06, 4.4893e-06, 1.1376e-05, 1.7888e-05, 4.2241e-06,
        9.5314e-06, 3.7596e-06, 2.9246e-06, 5.6567e-06, 7.9691e-06, 2.6611e-06,
        6.6176e-06, 5.5383e-06, 4.6488e-06, 7.6060e-06, 7.9608e-05, 4.1656e-05,
        9.2559e-06, 1.2429e-05, 8.1062e-06, 1.0876e-05, 1.4855e-05, 7.0908e-06,
        7.2108e-06, 7.0311e-06, 3.4380e-06, 7.0607e-06, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.6899e-04, 9.5857e-06, 1.0844e-05, 1.6698e-05, 2.7771e-05, 9.4063e-06,
        1.7498e-05, 7.7955e-06, 6.3523e-06, 6.5592e-06, 1.4166e-05, 3.6946e-06,
        1.1479e-05, 1.1294e-05, 1.2791e-05, 1.4937e-05, 2.4373e-05, 6.7868e-06,
        1.4009e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8861e-04, 2.1485e-05, 6.3103e-05, 1.3682e-05, 2.0718e-05, 1.7652e-05,
        7.3854e-05, 2.0790e-05, 8.1705e-06, 1.3649e-05, 7.2392e-05, 4.3112e-05,
        6.4742e-05, 3.0516e-05, 1.4937e-05, 4.3235e-05, 1.2070e-04, 3.3365e-05,
        1.8828e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2843e-04, 1.5737e-05, 3.7256e-05, 1.1168e-05, 2.1473e-05, 1.4676e-05,
        5.2753e-05, 1.3704e-05, 6.1383e-06, 1.0929e-05, 5.2723e-05, 5.9241e-06,
        8.6929e-06, 2.3549e-05, 2.0219e-05, 1.5176e-04, 3.3812e-05, 1.0289e-05,
        2.0063e-05, 1.7546e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2600: [tensor([3.6557e-04, 3.5687e-05, 3.6241e-05, 1.2170e-05, 1.6016e-05, 4.2542e-05,
        1.0902e-05, 7.9562e-06, 1.0629e-05, 4.3661e-05, 1.5060e-05, 5.6107e-06,
        1.3100e-05, 7.3043e-06, 4.4951e-06, 1.1106e-05, 1.6588e-05, 2.9702e-05,
        7.1746e-06, 1.8789e-05, 5.7634e-06, 1.0932e-05, 5.7497e-06, 6.8856e-06,
        4.6633e-06, 4.8704e-05, 3.6142e-06, 1.5034e-05, 2.1212e-05, 8.3328e-06,
        1.0517e-05, 1.2507e-05, 1.6744e-05, 3.0304e-06, 4.1630e-05, 4.4759e-05,
        1.4862e-05, 2.1629e-06, 3.5968e-06, 7.6541e-06, 7.0408e-05, 1.6137e-05,
        2.1073e-04, 1.0942e-05, 2.9585e-05, 1.4934e-04, 2.2705e-05, 8.8000e-06,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.8736e-04, 3.7745e-05, 3.0100e-05, 1.4596e-05, 1.5733e-05, 6.6021e-05,
        1.3123e-05, 8.3771e-06, 1.1582e-05, 3.9865e-05, 1.1790e-05, 3.1451e-06,
        1.0166e-05, 8.9046e-06, 2.1940e-06, 1.2358e-05, 1.9018e-05, 3.6615e-05,
        7.3496e-06, 1.8280e-05, 7.2606e-06, 1.3031e-05, 4.2731e-06, 6.9995e-06,
        5.0216e-06, 5.2940e-05, 1.0836e-06, 1.3134e-05, 1.6125e-05, 3.8018e-06,
        1.1465e-05, 8.9629e-06, 1.8466e-05, 2.4889e-06, 5.7277e-05, 3.2584e-05,
        1.1554e-05, 4.1525e-06, 4.0663e-06, 7.2129e-06, 6.1666e-05, 1.4417e-05,
        2.8483e-04, 1.7981e-05, 9.8883e-06, 1.4111e-05, 1.2902e-05, 7.7540e-06,
        1.0208e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0632e-04, 1.5698e-05, 2.7428e-05, 6.0441e-05, 1.3910e-05, 1.9504e-05,
        1.9501e-05, 3.2918e-05, 2.8474e-05, 4.0443e-05, 1.9215e-05, 2.3456e-06,
        1.2904e-05, 2.2346e-05, 2.5425e-05, 9.9953e-05, 3.8000e-05, 2.8423e-05,
        2.2617e-05, 4.2880e-05, 2.2356e-05, 3.2549e-05, 1.9197e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0115e-04, 1.3358e-05, 2.3995e-05, 5.3936e-05, 1.2737e-05, 1.3833e-05,
        1.7350e-05, 2.6680e-05, 1.6884e-05, 2.3318e-05, 3.6579e-05, 2.5952e-06,
        1.8059e-05, 1.7834e-05, 2.0044e-05, 9.8662e-05, 4.1967e-05, 2.2219e-05,
        1.7488e-05, 2.0110e-05, 1.8677e-05, 2.6981e-05, 6.1727e-05, 3.4845e-05,
        2.0657e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7912e-04, 1.6347e-05, 3.1048e-05, 5.2546e-05, 2.4642e-05, 2.3786e-05,
        1.6747e-05, 3.2236e-05, 2.0011e-05, 2.9949e-05, 2.3972e-05, 7.1768e-06,
        1.4454e-05, 1.9288e-05, 2.6744e-05, 6.5710e-05, 4.8816e-05, 2.8690e-05,
        2.5280e-05, 7.2666e-05, 3.3345e-05, 2.2990e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2994e-04, 8.4364e-05, 2.6536e-05, 2.4096e-05, 1.9827e-05, 8.1960e-06,
        1.3398e-05, 7.6339e-05, 1.0268e-05, 6.8896e-05, 1.2036e-05, 4.4211e-06,
        1.3021e-05, 2.3332e-04, 3.1200e-05, 1.4759e-04, 3.2253e-05, 2.9946e-05,
        2.5540e-04, 2.5162e-05, 1.4043e-05, 7.4629e-05, 2.4072e-05, 1.1098e-05,
        1.7788e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.3455e-04, 1.4132e-04, 3.5249e-05, 2.5350e-05, 3.4611e-05, 1.8312e-05,
        2.2803e-05, 1.9642e-04, 2.3915e-05, 1.5963e-04, 1.6574e-05, 7.0316e-06,
        1.8035e-05, 4.1229e-04, 3.6941e-05, 1.7985e-04, 3.4206e-04, 1.3330e-04,
        2.3878e-05, 1.0486e-05, 2.6431e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.5952e-04, 7.5806e-05, 1.7671e-05, 1.6603e-05, 1.8807e-05, 8.3937e-06,
        1.2670e-05, 5.6030e-05, 1.2372e-05, 9.1456e-05, 8.2714e-06, 3.1811e-06,
        9.6547e-06, 1.9831e-04, 1.5589e-05, 1.0636e-04, 1.5234e-05, 4.5948e-05,
        1.6021e-05, 2.6661e-04, 8.6732e-06, 1.0958e-04, 9.4168e-05, 2.3334e-05,
        3.0546e-06, 1.3907e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8610e-04,  2.0374e-05, -5.2525e-07,  1.5800e-05,  2.7030e-05,
         4.0808e-05,  8.0540e-06,  1.3099e-05,  3.8570e-05,  1.3592e-05,
         1.6254e-05,  1.7408e-05,  3.2710e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.2616e-04, 2.6947e-05, 1.6865e-05, 2.5355e-05, 3.9754e-05, 5.2353e-05,
        1.4036e-05, 1.8340e-05, 4.2328e-05, 2.6795e-05, 2.1555e-05, 3.5529e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([3.4682e-04, 2.2510e-05, 1.0883e-05, 1.7235e-05, 4.1063e-05, 5.0034e-05,
        1.3736e-05, 3.1097e-05, 3.9344e-05, 1.9703e-05, 3.1658e-05, 2.2081e-05,
        4.4537e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9672e-04, 1.3182e-04, 6.8806e-06, 1.8986e-05, 3.3808e-05, 1.4604e-05,
        2.8473e-05, 1.5845e-05, 9.6995e-06, 1.6110e-05, 2.3346e-04, 1.1964e-05,
        1.7913e-04, 2.6403e-05, 2.1056e-05, 1.3568e-04, 2.1321e-04, 1.4711e-05,
        5.0607e-05, 1.5418e-05, 2.8149e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2650: [tensor([3.9685e-04, 2.9919e-04, 1.3931e-05, 9.1462e-06, 6.7921e-05, 3.7779e-05,
        1.9979e-05, 4.4292e-05, 2.2895e-04, 3.6046e-05, 4.9397e-05, 1.6341e-05,
        2.5221e-05, 1.0253e-05, 2.1750e-05, 1.6011e-04, 6.4996e-05, 1.4745e-04,
        4.9692e-05, 4.1214e-05, 3.4597e-05, 1.8959e-05, 3.9544e-05, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.5166e-04, 1.3554e-04, 5.8373e-06, 3.8221e-06, 3.5108e-05, 2.6436e-05,
        1.9675e-05, 2.7558e-05, 3.2821e-04, 2.6917e-05, 5.4429e-05, 1.4368e-05,
        3.1984e-05, 1.1247e-05, 1.8120e-05, 5.5218e-04, 1.8570e-05, 8.5004e-05,
        5.5649e-05, 2.8367e-04, 7.3555e-05, 1.4620e-05, 3.9446e-05, 2.0813e-05,
        2.6790e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.0471e-04, 2.6266e-04, 1.4610e-05, 7.3487e-06, 5.0193e-05, 4.5010e-05,
        1.5526e-05, 4.6073e-05, 3.9499e-04, 4.1566e-05, 6.9429e-05, 1.0877e-05,
        3.1340e-05, 1.3801e-05, 3.1909e-05, 7.0188e-05, 2.1993e-04, 1.2378e-04,
        5.2751e-05, 2.3628e-05, 3.1630e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.0098e-04, 2.0185e-04, 4.7982e-05, 6.1349e-05, 1.7516e-05, 1.6286e-05,
        2.1574e-05, 2.7299e-05, 3.4945e-05, 2.1565e-05, 8.8483e-06, 3.0904e-05,
        4.3328e-04, 4.0777e-05, 6.2257e-05, 3.5162e-05, 6.5735e-05, 6.9999e-05,
        1.3519e-04, 6.8635e-05, 1.3901e-05, 3.7857e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.7764e-04, 3.3473e-04, 3.9640e-05, 5.1594e-05, 1.7375e-05, 1.4522e-05,
        1.4420e-05, 2.3408e-05, 4.5448e-05, 2.4271e-05, 5.4830e-06, 2.0017e-05,
        5.2212e-04, 6.2591e-05, 1.1545e-05, 2.4884e-05, 5.1162e-05, 5.9825e-05,
        3.0058e-05, 5.7447e-05, 3.1747e-05, 1.1899e-05, 2.5169e-05, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.7588e-04, 1.7553e-04, 4.2320e-05, 7.0133e-05, 1.4135e-05, 1.3097e-05,
        1.5615e-05, 2.8087e-05, 3.7123e-05, 1.8881e-05, 4.9841e-06, 2.4664e-05,
        5.5930e-05, 7.2489e-05, 3.9096e-05, 2.2583e-05, 3.7361e-05, 3.1570e-05,
        4.6611e-04, 4.2557e-05, 6.7706e-05, 8.9269e-06, 3.9673e-05, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.2086e-04, 2.4150e-04, 3.8250e-05, 1.4313e-05, 3.1947e-05, 6.5709e-05,
        2.3294e-04, 3.3837e-05, 2.0586e-04, 2.7765e-04, 3.7801e-05, 3.5663e-05,
        1.3666e-04, 3.5817e-05, 1.4641e-05, 2.8821e-05, 2.0028e-04, 1.5638e-04,
        4.4662e-05, 3.3565e-05, 3.8526e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.7125e-04, 2.8748e-04, 2.4734e-05, 2.0621e-05, 3.2861e-05, 4.5159e-05,
        2.1122e-04, 2.5222e-05, 1.6058e-04, 2.0144e-04, 1.9524e-05, 1.9338e-05,
        1.3026e-04, 3.5093e-05, 7.1803e-06, 2.2283e-05, 2.4628e-04, 2.8330e-05,
        2.3892e-04, 2.2361e-05, 3.4297e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.7642e-04, 2.8888e-04, 2.3094e-05, 1.6183e-05, 3.1391e-05, 4.2027e-05,
        1.6223e-04, 2.9781e-05, 1.1444e-04, 3.4511e-04, 3.5003e-05, 1.5287e-05,
        1.5421e-04, 2.8703e-05, 8.6130e-06, 2.6012e-05, 1.3840e-04, 3.9508e-04,
        4.2724e-05, 5.1582e-05, 3.6907e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.4879e-04, 8.2055e-05, 1.1065e-05, 1.6279e-04, 2.4804e-05, 6.5183e-05,
        1.2371e-05, 6.9013e-06, 9.7833e-05, 1.9515e-05, 7.7622e-06, 2.2012e-05,
        1.6443e-04, 1.1888e-05, 4.7878e-05, 1.6475e-04, 5.2771e-05, 9.9755e-05,
        1.3996e-04, 1.4351e-05, 4.2353e-05, 3.7712e-05, 4.3641e-05, 2.2057e-05,
        4.1573e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.6267e-04, 1.7393e-04, 2.9015e-05, 2.0008e-04, 4.6506e-05, 1.2505e-04,
        1.8603e-05, 1.8331e-05, 1.6129e-04, 5.2520e-05, 6.5656e-06, 4.5946e-05,
        4.8912e-04, 3.7811e-05, 2.8841e-04, 7.8863e-05, 2.2392e-03, 3.5360e-05,
        5.7855e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.2403e-04, 1.0106e-04, 1.2989e-05, 1.5999e-04, 2.3273e-05, 9.6253e-05,
        1.0587e-05, 6.2148e-06, 1.0048e-04, 2.8456e-05, 4.7972e-06, 1.7390e-05,
        1.8488e-04, 1.7868e-05, 1.7927e-04, 3.0338e-05, 4.8512e-05, 5.5749e-05,
        5.7284e-05, 3.6589e-05, 1.5449e-05, 3.4591e-05, 1.7404e-05, 3.2458e-05,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2700: [tensor([4.9534e-04, 1.7745e-04, 3.8641e-05, 1.3341e-04, 2.0184e-05, 1.7396e-05,
        9.9912e-05, 2.2222e-05, 5.2700e-05, 1.0972e-04, 1.5378e-05, 3.7803e-05,
        2.2189e-05, 2.1371e-05, 2.6150e-05, 2.2194e-05, 1.3929e-05, 3.8069e-05,
        2.7046e-05, 3.1866e-04, 2.7217e-05, 3.3398e-05, 4.7911e-05, 7.5915e-05,
        8.7463e-05, 5.1373e-04, 3.2285e-05, 7.1046e-06, 2.9741e-05, 3.8733e-05,
        1.0158e-04, 3.5001e-05, 5.1537e-05, 1.9361e-04, 1.0823e-04, 3.1118e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4775e-04,  2.0256e-04,  2.5752e-05,  1.8560e-04,  1.1111e-05,
         1.6514e-05,  1.1040e-04,  1.9019e-05,  4.7669e-05,  1.1563e-04,
         1.1064e-05,  4.4812e-05,  2.4391e-05,  2.3983e-05,  2.0849e-05,
         1.0961e-05,  1.4816e-05,  4.0516e-05,  2.1312e-05,  1.9860e-04,
         2.3883e-05,  2.2759e-05,  5.2284e-05,  5.0982e-05,  8.8562e-05,
         3.7175e-04,  1.8691e-05, -4.1884e-07,  2.2049e-05,  3.9456e-04,
         3.6229e-05,  1.2288e-04,  3.1645e-05,  1.3871e-05,  5.5239e-05,
         1.2922e-05,  3.5510e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([5.4628e-04, 2.2559e-04, 8.6399e-05, 1.8482e-04, 2.0420e-05, 2.0637e-05,
        1.7852e-04, 2.9921e-05, 6.4700e-05, 1.3252e-04, 1.7119e-05, 4.0078e-05,
        3.1566e-05, 2.7834e-05, 3.3399e-05, 2.0009e-05, 2.4656e-05, 5.5458e-05,
        4.3703e-05, 2.5356e-04, 4.7747e-05, 3.3056e-05, 6.9443e-05, 5.6364e-05,
        1.6596e-04, 3.3392e-04, 1.5638e-05, 6.1066e-06, 2.5991e-05, 5.6189e-05,
        2.0528e-04, 7.1707e-04, 4.2998e-05, 3.2150e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4621e-04, -3.0441e-04, -4.2913e-05, -1.5256e-04, -2.7276e-05,
        -2.2555e-05, -1.7076e-04, -1.9844e-05, -5.0310e-05, -1.3033e-04,
        -2.0203e-05, -3.7437e-05, -2.7939e-05, -3.3014e-05, -3.0461e-05,
        -2.0498e-05, -2.8613e-05, -6.3607e-05, -3.2898e-05, -4.9934e-04,
        -2.9771e-05, -3.4869e-05, -4.7070e-05, -6.3142e-05, -1.5762e-04,
        -3.6372e-04, -1.6600e-05, -4.0188e-06, -3.3387e-05, -2.4019e-04,
        -6.1193e-05, -1.5644e-04, -3.7149e-05, -7.9589e-05, -5.7276e-04,
        -5.7102e-05, -4.8529e-05, -2.8037e-05, -4.5557e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([6.6426e-04, 1.4945e-04, 3.8921e-05, 1.4193e-04, 2.1619e-05, 2.1975e-05,
        1.2759e-04, 2.4345e-05, 3.8601e-05, 1.0347e-04, 1.6641e-05, 5.1499e-05,
        3.0590e-05, 1.3109e-05, 1.8450e-05, 2.0104e-05, 1.8854e-05, 4.5852e-05,
        3.3714e-05, 2.7397e-04, 1.7922e-05, 1.9920e-05, 3.9008e-05, 6.7714e-05,
        1.3629e-04, 2.7760e-04, 1.7758e-05, 1.2390e-05, 2.4610e-05, 1.4231e-04,
        4.3629e-05, 4.5012e-05, 2.4630e-04, 2.2096e-05, 4.2672e-05, 5.8787e-05,
        8.4990e-04, 7.9436e-05, 1.0629e-05, 2.8585e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.9604e-04, 1.4736e-04, 3.2780e-05, 1.2309e-04, 1.0163e-05, 2.1274e-05,
        1.1790e-04, 1.8218e-05, 4.5189e-05, 9.7891e-05, 1.3757e-05, 4.7772e-05,
        1.9777e-05, 1.8261e-05, 2.3885e-05, 2.1335e-05, 1.5753e-05, 3.6225e-05,
        2.6513e-05, 3.2047e-04, 2.6447e-05, 2.4609e-05, 4.1544e-05, 8.0008e-05,
        1.2896e-04, 2.3025e-04, 2.1371e-05, 7.5952e-06, 2.7147e-05, 1.4730e-04,
        5.1804e-05, 1.2760e-04, 2.5444e-05, 5.9208e-04, 2.4478e-05, 4.6847e-05,
        1.9942e-05, 1.8001e-05, 4.6507e-05, 2.9998e-05, 3.4123e-05, 1.5854e-05,
        4.2167e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6934e-04, -2.1789e-04, -4.5659e-05, -1.5631e-04, -1.8971e-05,
        -3.3682e-05, -1.8215e-04, -2.4660e-05, -6.7948e-05, -1.2401e-04,
        -2.0062e-05, -6.7465e-05, -3.1981e-05, -2.9262e-05, -4.0782e-05,
        -2.0015e-05, -3.0302e-05, -5.0737e-05, -3.6692e-05, -5.4143e-04,
        -4.2434e-05, -3.2693e-05, -4.4744e-05, -6.1740e-05, -1.7185e-04,
        -3.2166e-04, -3.4556e-05, -1.6404e-05, -2.8308e-05, -1.2048e-04,
        -1.0587e-04, -1.0459e-04, -4.6265e-05, -2.4109e-04, -3.3517e-05,
        -3.8926e-05, -2.7072e-05, -3.8794e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([4.3352e-04, 1.8352e-04, 3.4012e-05, 1.2158e-04, 1.8516e-05, 2.4074e-05,
        9.8110e-05, 2.1712e-05, 3.9794e-05, 1.0672e-04, 2.1662e-05, 5.8828e-05,
        2.3913e-05, 2.0959e-05, 2.2871e-05, 2.4955e-05, 1.8684e-05, 4.4596e-05,
        2.5388e-05, 2.2400e-04, 2.9449e-05, 3.7616e-05, 6.1850e-05, 5.3779e-05,
        1.1184e-04, 2.8703e-04, 2.6402e-05, 6.6031e-06, 2.1541e-05, 3.6207e-04,
        3.8891e-05, 3.4140e-05, 2.0326e-04, 3.7431e-05, 3.4261e-05, 3.8637e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.1404e-04, 2.1411e-04, 3.4440e-05, 1.0862e-04, 1.4736e-05, 1.8860e-05,
        1.0527e-04, 2.4198e-05, 5.1725e-05, 1.3822e-04, 1.5209e-05, 5.9178e-05,
        3.7072e-05, 1.9412e-05, 2.5374e-05, 1.5546e-05, 2.5345e-05, 3.4473e-05,
        3.2071e-05, 3.0672e-04, 3.0684e-05, 3.0515e-05, 5.8878e-05, 6.2624e-05,
        1.0968e-04, 2.7466e-04, 1.8322e-05, 1.4883e-05, 2.7493e-05, 4.3322e-04,
        4.9954e-05, 1.4526e-04, 1.7788e-05, 6.7246e-05, 3.2564e-05, 4.3211e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6820e-04, -2.1368e-04, -4.9401e-05, -1.4797e-04, -8.7568e-06,
        -3.8048e-05, -1.0492e-04, -3.3648e-05, -5.6724e-05, -1.4163e-04,
        -2.1611e-05, -6.2484e-05, -4.1899e-05, -2.4917e-05, -3.6720e-05,
        -1.8719e-05, -1.6252e-05, -5.3018e-05, -3.3463e-05, -5.0535e-04,
        -3.1360e-05, -4.4304e-05, -5.8000e-05, -5.6773e-05, -1.6064e-04,
        -7.8167e-04, -2.3041e-05, -9.7845e-06, -2.3663e-05, -9.2845e-05,
        -1.8846e-04, -7.6557e-05, -3.4911e-05, -1.7273e-04, -1.3786e-03,
        -9.5112e-05, -3.7415e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.3022e-04, 3.6293e-04, 5.5929e-05, 8.1733e-05, 2.8964e-04, 6.2306e-05,
        4.9879e-05, 2.4943e-05, 5.0299e-05, 6.2511e-05, 9.0685e-05, 1.8817e-04,
        8.3090e-05, 6.9662e-05, 4.6867e-04, 7.2847e-05, 1.5968e-04, 5.8157e-05,
        6.2176e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.6553e-04, 3.2224e-04, 5.9148e-05, 1.1187e-04, 2.0287e-04, 6.9504e-05,
        7.1914e-05, 2.6474e-05, 4.6425e-05, 7.0009e-05, 7.2169e-05, 1.5694e-04,
        1.2121e-04, 1.0887e-04, 3.7281e-05, 7.7461e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2750: [tensor([9.5796e-04, 4.1168e-04, 6.5632e-05, 6.4613e-05, 5.3503e-04, 4.8004e-05,
        2.9396e-05, 1.7545e-04, 1.0499e-04, 5.6184e-04, 5.6382e-05, 5.0506e-05,
        5.4541e-05, 5.1602e-05, 8.6084e-05, 1.5916e-04, 1.0506e-04, 1.0001e-04,
        1.5667e-05, 1.8866e-05, 4.6653e-05, 1.1381e-03, 8.4954e-05, 5.1434e-05,
        2.0201e-03, 8.6427e-05, 5.6681e-05, 6.2124e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9343e-05, -3.2708e-04, -4.7936e-05, -4.7966e-05, -4.1540e-04,
        -3.3443e-05, -3.4771e-05, -1.2981e-04, -1.0170e-04, -5.3225e-04,
        -4.9099e-05, -3.7252e-05, -3.4361e-05, -3.8254e-05, -7.7766e-05,
        -9.7763e-05, -7.2565e-05, -6.8771e-05, -2.3728e-05, -1.4946e-05,
        -2.8445e-05, -7.0080e-04, -6.4853e-05, -5.7185e-05, -1.2861e-04,
        -9.9963e-05, -3.8658e-04, -7.5548e-05, -3.2576e-04, -2.6012e-04,
        -3.9100e-05, -2.9771e-05, -4.3386e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7615e-04, -1.6447e-04, -4.2467e-05, -3.7178e-05, -2.1657e-04,
        -3.7488e-05, -1.4837e-05, -4.1040e-05, -4.4089e-05, -4.6448e-05,
        -2.8439e-05, -2.1619e-04, -1.3044e-05, -3.2672e-05, -3.2152e-05,
        -5.1436e-04, -5.3839e-05, -6.9304e-05, -1.7045e-05, -5.6171e-05,
        -2.5067e-05, -4.7356e-06, -1.0221e-05, -6.3350e-05, -4.4437e-07,
        -2.9085e-05, -1.7656e-03, -4.8148e-05, -5.3385e-05, -5.4099e-04,
        -3.4095e-05, -1.9405e-05, -5.1445e-04, -3.1179e-05, -9.3431e-05,
        -2.4507e-05, -7.9275e-05, -5.0880e-05, -3.1423e-05, -3.8511e-05,
        -8.8857e-05, -3.2549e-05, -4.4002e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1984e-04, -1.6541e-04, -3.9903e-05, -2.5719e-05, -2.3842e-04,
        -2.5066e-05, -1.8918e-05, -3.4175e-05, -3.8859e-05, -2.7665e-05,
        -1.4957e-05, -2.0197e-04, -2.3799e-05, -1.7295e-05, -2.7522e-05,
        -4.2729e-04, -4.1935e-05, -7.0524e-05, -1.4663e-05, -2.7664e-05,
        -2.3086e-05, -1.0225e-05, -1.5006e-05, -4.4984e-05, -1.2685e-05,
        -1.8640e-05, -1.0489e-04, -1.9981e-04, -5.6329e-05, -2.2642e-05,
        -2.1296e-04, -4.0483e-05, -3.0737e-05, -1.0133e-05, -6.8415e-05,
        -2.7728e-05, -3.7038e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.0367e-03, 3.0045e-04, 6.4269e-05, 4.5184e-05, 2.5584e-04, 4.6516e-05,
        2.0500e-05, 5.0695e-05, 4.0798e-05, 6.7554e-05, 2.6763e-05, 3.4340e-04,
        2.5921e-05, 3.5646e-05, 4.8923e-05, 4.0771e-04, 6.7074e-05, 9.4089e-05,
        1.4252e-05, 8.7960e-05, 4.6463e-05, 9.1889e-06, 3.1639e-05, 6.0718e-05,
        8.2905e-08, 3.4855e-05, 1.9501e-03, 5.0712e-05, 1.3272e-04, 8.0138e-05,
        8.7993e-05, 3.4046e-05, 9.1795e-05, 1.8982e-05, 6.3611e-05, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4572e-04, -2.2725e-04, -5.0636e-05, -2.6146e-05, -2.9279e-04,
        -3.1959e-05, -1.4621e-05, -4.5014e-05, -4.6267e-05, -3.7457e-05,
        -2.6530e-05, -2.3084e-04, -1.9424e-05, -2.7840e-05, -3.5863e-05,
        -4.2496e-04, -5.0315e-05, -6.4595e-05, -1.5171e-05, -5.5624e-05,
        -3.6453e-05, -5.3773e-06, -2.2431e-05, -4.5988e-05, -8.0682e-06,
        -3.0118e-05, -5.3493e-05, -4.8710e-05, -5.8217e-05, -5.0916e-05,
        -3.4578e-05, -4.9523e-05, -5.6042e-05, -1.9815e-05, -6.2920e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([9.2636e-04, 2.5641e-04, 3.7054e-05, 4.1418e-05, 2.2480e-04, 3.1252e-05,
        1.7601e-05, 4.9360e-05, 4.6039e-05, 4.0377e-05, 2.7175e-05, 2.1274e-04,
        2.0302e-05, 2.9212e-05, 4.6832e-05, 2.8150e-04, 5.3756e-05, 5.9168e-05,
        1.4979e-05, 5.3311e-05, 2.8833e-05, 1.0180e-05, 2.1813e-05, 4.7779e-05,
        9.6473e-06, 2.9571e-05, 1.0033e-03, 5.7123e-05, 5.4538e-04, 4.8758e-05,
        4.5904e-05, 7.4967e-04, 3.4731e-05, 4.5849e-05, 2.6203e-05, 8.3649e-05,
        2.4154e-05, 4.0326e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.9009e-04, 1.8359e-04, 4.3177e-05, 3.4478e-05, 1.9163e-04, 2.5638e-05,
        9.6956e-06, 4.0119e-05, 3.3213e-05, 3.6368e-05, 1.4015e-05, 2.2535e-04,
        2.5756e-05, 1.7002e-05, 3.7913e-05, 4.0944e-04, 3.2313e-05, 5.9650e-05,
        1.1093e-05, 3.8201e-05, 2.9383e-05, 3.9047e-06, 1.8632e-05, 4.5849e-05,
        7.2844e-06, 2.3609e-05, 2.8021e-04, 4.9330e-05, 4.4352e-05, 1.7268e-04,
        4.3540e-05, 4.0854e-05, 1.7116e-05, 2.7421e-05, 2.2561e-04, 8.3796e-05,
        5.2806e-05, 3.9938e-05, 4.3835e-05, 2.9423e-05, 3.6046e-05, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3010e-04, -2.4374e-04, -5.3717e-05, -3.7292e-05, -3.2796e-04,
        -3.8143e-05, -2.9585e-05, -5.3385e-05, -5.1697e-05, -4.1976e-05,
        -2.8805e-05, -2.3140e-04, -1.6286e-05, -3.5673e-05, -3.9766e-05,
        -5.9972e-04, -7.7517e-05, -8.0886e-05, -1.4347e-05, -6.0467e-05,
        -2.8113e-05, -3.2978e-06, -2.3775e-05, -6.0148e-05, -1.4396e-05,
        -3.4727e-05, -1.1122e-04, -1.0731e-04, -6.8426e-04, -5.8155e-05,
        -1.8450e-05, -3.9213e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([3.8830e-04, 2.9268e-04, 5.2308e-05, 4.4302e-05, 1.8923e-04, 4.1512e-05,
        1.8998e-05, 4.8299e-05, 3.7522e-05, 3.3282e-05, 2.4513e-05, 2.7544e-04,
        1.4641e-05, 2.8202e-05, 4.0921e-05, 4.1345e-04, 5.1870e-05, 7.1435e-05,
        1.5531e-05, 5.0413e-05, 4.6607e-05, 1.6295e-05, 2.2105e-05, 5.5635e-05,
        9.4049e-06, 3.1151e-05, 1.5027e-03, 3.3780e-05, 3.0928e-05, 3.4143e-04,
        1.2073e-04, 1.0319e-04, 4.0469e-05, 4.1266e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([7.3590e-04, 3.3240e-04, 4.6994e-05, 3.1108e-05, 2.9335e-04, 2.4967e-05,
        1.5137e-05, 4.8850e-05, 2.8207e-05, 5.3633e-05, 2.6124e-05, 1.5140e-04,
        1.7032e-05, 2.6221e-05, 3.4957e-05, 3.2124e-04, 7.0723e-05, 9.1310e-05,
        2.3974e-05, 5.2123e-05, 3.7102e-05, 4.1637e-06, 2.8405e-05, 6.5987e-05,
        3.1138e-06, 2.7734e-05, 1.5542e-03, 8.0160e-05, 3.3685e-04, 7.6385e-05,
        8.4170e-05, 2.1178e-05, 4.6802e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.1635e-04, 1.8791e-04, 2.5858e-05, 3.3330e-05, 2.4276e-04, 2.4779e-05,
        1.7724e-05, 3.5952e-05, 3.4551e-05, 3.5110e-05, 2.1799e-05, 1.4026e-04,
        1.5631e-05, 2.2781e-05, 4.2435e-05, 3.4308e-04, 3.7623e-05, 6.7311e-05,
        9.8226e-06, 4.5713e-05, 2.0513e-05, 1.0488e-05, 1.5612e-05, 4.1413e-05,
        5.4617e-06, 2.6337e-05, 6.6264e-04, 4.5758e-05, 7.3967e-05, 1.9441e-05,
        6.4502e-05, 4.8467e-05, 4.0367e-04, 2.9869e-05, 7.3836e-05, 1.5100e-03,
        2.3876e-05, 2.7304e-05, 4.5706e-05, 7.1570e-05, 1.2856e-05, 3.8481e-05,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2800: [tensor([2.7257e-04, 4.0830e-05, 8.9257e-05, 2.6095e-04, 3.5076e-05, 1.1472e-05,
        6.8137e-05, 5.0658e-05, 4.3531e-04, 7.3633e-05, 3.0174e-05, 1.6527e-05,
        3.9399e-05, 5.4776e-05, 7.6578e-05, 4.1838e-04, 9.1824e-05, 2.9115e-05,
        9.0534e-05, 8.3145e-05, 8.3141e-04, 4.3230e-06, 5.4916e-05, 7.5717e-05,
        2.2085e-05, 5.5048e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.1312e-03, 7.2205e-05, 5.3707e-05, 3.0831e-04, 5.0959e-05, 2.9818e-05,
        8.2300e-05, 8.6626e-05, 4.8085e-04, 8.2547e-05, 5.1823e-05, 2.2426e-05,
        6.3090e-05, 1.0314e-04, 1.2209e-04, 6.8189e-04, 8.3856e-05, 1.3268e-04,
        9.4120e-05, 2.2260e-03, 9.4390e-05, 9.5216e-05, 5.9470e-05, 9.0018e-05,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2066e-04, 9.9947e-05, 8.5835e-04, 7.0833e-05, 8.1822e-05, 4.8711e-05,
        5.9860e-05, 1.9696e-05, 5.7738e-05, 4.5413e-04, 9.7235e-05, 4.0122e-04,
        1.4240e-04, 6.1366e-05, 1.2821e-04, 9.9113e-05, 6.9498e-05, 1.0289e-04,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2946e-04, -1.6332e-04, -1.0132e-03, -7.8319e-05, -1.1631e-04,
        -5.4625e-05, -1.2921e-04, -3.7486e-05, -8.0200e-05, -7.6379e-04,
        -1.4602e-04, -7.4658e-04, -1.4232e-04, -2.1850e-04, -6.5122e-05,
        -1.3564e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.1965e-03, 1.2371e-04, 7.6887e-04, 7.0735e-05, 1.0074e-04, 6.6295e-05,
        9.3566e-05, 3.0790e-05, 7.2593e-05, 4.4512e-04, 1.0792e-04, 5.5241e-04,
        1.0608e-03, 1.7778e-04, 1.9000e-04, 1.4873e-03, 9.4497e-05, 5.3859e-05,
        7.1986e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9280e-04, -3.3329e-04, -1.0053e-04, -2.2276e-05, -4.2315e-05,
        -4.0546e-05, -2.8492e-05, -8.0787e-05, -3.9297e-05, -3.6363e-04,
        -5.3687e-05, -6.8655e-05, -2.6150e-04, -2.2585e-04, -8.8504e-05,
        -3.1277e-05, -2.5202e-05, -5.3560e-05, -6.0214e-05, -3.5000e-05,
        -5.7894e-05, -3.9929e-05, -2.1289e-04, -8.5458e-05, -1.3036e-04,
        -5.1124e-05, -6.5281e-05, -2.2572e-05, -4.6004e-05, -1.1640e-03,
        -5.9839e-05, -1.3019e-04, -9.2589e-04, -1.2136e-04, -4.2240e-05,
        -9.7186e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([7.9606e-04, 5.2348e-04, 6.9096e-05, 1.9616e-05, 4.4001e-05, 3.6920e-05,
        2.1526e-05, 7.0986e-05, 3.7249e-05, 5.7224e-04, 3.4415e-05, 4.6307e-05,
        2.7376e-04, 2.2260e-04, 7.9778e-05, 2.4755e-05, 2.9781e-05, 4.9727e-05,
        3.9553e-05, 2.6932e-05, 5.5142e-05, 2.9491e-05, 2.0462e-04, 5.7500e-05,
        1.4728e-04, 3.7249e-05, 3.7138e-05, 1.9266e-05, 4.0066e-05, 8.8480e-04,
        6.5979e-05, 8.7226e-05, 1.1043e-03, 3.9869e-05, 7.3613e-05, 6.2074e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([8.8923e-04, 1.8999e-04, 1.3728e-04, 2.2778e-05, 3.5945e-05, 3.7041e-05,
        1.8548e-05, 5.7287e-05, 2.7500e-05, 4.1914e-04, 3.0889e-05, 5.2673e-05,
        2.3007e-04, 2.0474e-04, 7.8664e-05, 2.3024e-05, 2.3112e-05, 3.3917e-05,
        4.1904e-05, 2.2612e-05, 5.3398e-05, 4.0285e-05, 1.7322e-04, 5.6162e-05,
        9.4306e-05, 4.9480e-05, 3.0440e-05, 1.7754e-05, 2.7850e-05, 5.9844e-04,
        4.5989e-05, 8.7209e-05, 4.4759e-04, 6.3450e-04, 8.5586e-05, 5.5986e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7743e-04, -8.0330e-05, -7.1418e-05, -1.0136e-04, -4.2425e-05,
        -4.6373e-05, -5.0788e-05, -7.0263e-05, -5.6730e-05, -8.7081e-05,
        -6.4796e-05, -2.6917e-05, -6.4014e-05, -5.0434e-04, -8.8909e-05,
        -9.9750e-04, -6.1410e-05, -5.7679e-05, -8.6012e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.6102e-04, 7.8855e-05, 6.4535e-05, 6.3405e-05, 3.1620e-05, 2.9083e-05,
        6.3470e-05, 5.0826e-05, 5.8347e-05, 6.0467e-05, 5.0249e-05, 2.5310e-05,
        4.4459e-05, 4.4258e-04, 4.3195e-05, 9.7332e-04, 1.8393e-04, 5.6433e-05,
        9.0734e-05, 1.5970e-04, 5.2531e-05, 5.8230e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4336e-04, -8.3953e-05, -6.2377e-05, -7.6385e-05, -5.3961e-05,
        -3.8240e-05, -8.1168e-05, -6.8506e-05, -2.5092e-05, -5.4905e-05,
        -5.6981e-05, -3.1496e-05, -6.2721e-05, -4.7142e-04, -9.2763e-05,
        -7.9608e-04, -1.8650e-04, -4.6329e-05, -1.1065e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.5574e-04, 6.0126e-04, 1.4585e-04, 9.5630e-05, 6.1438e-05, 4.7610e-04,
        5.7907e-05, 8.6899e-05, 3.8820e-05, 6.2882e-05, 1.0215e-04, 8.4972e-04,
        1.4985e-04, 1.1050e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2850: [tensor([-9.2688e-04, -8.9260e-04, -7.9691e-05, -1.0492e-04, -6.9379e-05,
        -4.0914e-04, -5.8018e-05, -1.7568e-04, -3.9972e-05, -5.6438e-05,
        -4.5552e-04, -1.2391e-04, -4.2523e-04, -8.1297e-05, -1.7582e-04,
        -2.2832e-03, -2.1096e-04, -2.2059e-04, -9.9533e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5580e-04, -1.1206e-03, -1.1883e-04, -1.2786e-04, -1.7674e-04,
        -4.8940e-04, -8.5110e-05, -1.9344e-04, -7.8371e-05, -1.3025e-04,
        -2.9915e-04, -5.0547e-04, -2.4343e-04, -1.6630e-04, -1.0416e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([9.0141e-04, 2.1703e-05, 6.2837e-05, 6.5058e-05, 7.5608e-05, 1.4117e-04,
        2.5478e-05, 1.5247e-05, 2.6322e-04, 3.4606e-05, 6.8013e-05, 3.0571e-05,
        1.1355e-05, 5.4117e-05, 1.2789e-04, 3.2950e-05, 3.0634e-04, 1.8180e-04,
        6.6471e-05, 1.0696e-04, 3.5009e-04, 2.3844e-05, 7.2228e-06, 3.6534e-05,
        3.1730e-05, 1.6412e-04, 2.5000e-04, 4.3913e-05, 1.3678e-05, 3.1552e-05,
        6.7779e-05, 5.4238e-05, 8.1051e-05, 4.8223e-04, 1.0226e-04, 4.9209e-05,
        2.8089e-04, 6.0703e-05, 5.7340e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5047e-04, -2.9848e-05, -8.4483e-05, -5.4528e-05, -8.1945e-05,
        -1.5430e-04, -2.4286e-05, -2.7460e-05, -2.5785e-04, -6.4033e-05,
        -8.8673e-05, -3.9561e-05, -2.6059e-05, -4.0738e-05, -1.9786e-04,
        -3.8140e-05, -3.9936e-04, -2.0233e-04, -1.0009e-04, -1.2259e-04,
        -4.1001e-04, -2.7253e-05, -1.1606e-05, -4.7042e-05, -5.8933e-05,
        -1.5955e-04, -2.2379e-04, -3.9757e-05, -2.4999e-05, -3.9180e-05,
        -4.1360e-05, -6.7753e-05, -4.9525e-04, -7.6178e-05, -3.2091e-04,
        -4.3260e-04, -8.5991e-05, -6.3218e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1640e-03, -2.5681e-05, -6.3522e-05, -6.2630e-05, -7.2428e-05,
        -9.4522e-05, -3.2335e-05, -1.7045e-05, -1.9205e-04, -4.9819e-05,
        -7.2473e-05, -3.1433e-05, -1.6740e-05, -4.1097e-05, -1.7683e-04,
        -6.3757e-05, -3.7430e-04, -1.8148e-04, -7.2922e-05, -1.1964e-04,
        -2.4092e-04, -2.0421e-05, -9.0783e-06, -3.0494e-05, -4.0210e-05,
        -1.4409e-04, -2.3823e-04, -5.9541e-05, -2.0794e-05, -3.4275e-05,
        -6.9697e-05, -1.1139e-04, -7.4190e-05, -1.9905e-04, -1.0893e-04,
        -8.6175e-05, -1.2131e-04, -2.7107e-05, -5.5737e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6489e-04, -3.4651e-04, -4.0731e-05, -2.4386e-04, -4.7940e-04,
        -2.7391e-05, -3.4857e-05, -3.8666e-05, -2.2804e-04, -4.5257e-05,
        -9.4601e-05, -8.2208e-05, -7.2470e-05, -3.7365e-05, -6.0787e-05,
        -2.6170e-05, -4.6784e-04, -2.4097e-05, -4.9528e-06, -2.4233e-05,
        -4.4454e-05, -3.7790e-04, -5.5828e-05, -4.3075e-04, -1.0456e-04,
        -1.1078e-03, -1.2130e-04, -5.7374e-05, -4.2348e-05, -5.2789e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4303e-04, -5.7697e-04, -5.2510e-05, -3.5406e-04, -4.9216e-04,
        -2.7531e-05, -2.8901e-05, -3.9571e-05, -3.1114e-04, -5.1284e-05,
        -1.0792e-04, -8.6273e-05, -9.2680e-05, -3.6072e-05, -8.4708e-05,
        -3.8757e-05, -5.5522e-04, -2.7681e-05,  3.1925e-07, -2.7831e-05,
        -4.2998e-05, -4.8545e-04, -1.3853e-04, -1.2524e-04, -7.0847e-05,
        -6.2664e-04, -7.0970e-05, -5.0707e-05, -8.9757e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2213e-04, -4.3186e-04, -5.1961e-05, -2.5312e-04, -4.7868e-04,
        -2.7755e-05, -2.4297e-05, -2.3834e-05, -1.7630e-04, -3.7450e-05,
        -9.0231e-05, -7.6778e-05, -7.9284e-05, -7.2456e-05, -7.2547e-05,
        -3.0469e-05, -4.5951e-04, -2.6996e-05, -1.6231e-05, -1.3653e-05,
        -3.7349e-05, -4.9240e-05, -2.2787e-04, -3.2712e-04, -1.3487e-04,
        -3.0651e-04, -1.1895e-04, -4.8610e-04, -1.0703e-04, -1.1847e-04,
        -6.2239e-05, -3.5065e-05, -6.6811e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1540e-03, -7.9690e-04, -1.1050e-04, -1.0860e-04, -4.1660e-05,
        -3.2764e-04, -1.3310e-04, -2.7057e-04, -3.7151e-04, -1.2037e-04,
        -1.1329e-04, -9.9408e-05, -1.0289e-04, -6.9186e-05, -8.1278e-06,
        -4.3480e-05, -9.2958e-05, -2.3766e-03, -8.1927e-05, -2.4440e-04,
        -1.5464e-04, -2.4597e-05, -7.5556e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([8.5041e-04, 5.3410e-04, 1.0870e-04, 6.8996e-05, 1.9034e-05, 2.8273e-04,
        9.3626e-05, 1.8921e-04, 2.3402e-04, 6.8435e-05, 1.0366e-04, 8.7561e-05,
        1.0710e-04, 1.1971e-04, 2.9737e-05, 2.9940e-05, 5.9032e-05, 1.2311e-03,
        5.3162e-05, 1.7565e-04, 1.2813e-04, 1.3384e-05, 4.3390e-04, 5.2022e-05,
        8.5445e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([9.6329e-04, 5.6811e-04, 9.4590e-05, 7.2960e-05, 3.3237e-05, 2.2697e-04,
        9.5537e-05, 2.8692e-04, 1.9446e-04, 8.9713e-05, 1.1617e-04, 8.4914e-05,
        1.0919e-04, 1.0324e-04, 3.0526e-05, 2.5576e-05, 6.9849e-05, 1.2391e-04,
        2.2853e-04, 1.7237e-03, 6.6769e-05, 1.3395e-04, 9.7122e-05, 1.6493e-04,
        6.6608e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2350e-04, -5.9286e-05, -8.1952e-05, -8.4233e-05, -3.8167e-05,
        -6.0542e-05, -1.8117e-04, -4.0481e-05, -7.1815e-05, -2.3090e-04,
        -3.3472e-05, -7.1286e-05, -1.8264e-04, -7.4056e-05, -4.0337e-05,
        -2.3145e-04, -1.3289e-04, -6.0866e-05, -2.1707e-04, -1.0301e-04,
        -3.3588e-05, -1.8319e-05, -4.2503e-05, -4.9133e-04, -5.3996e-05,
        -9.9870e-05, -7.2179e-05, -1.0622e-04, -2.8192e-05, -7.0545e-05,
        -9.8264e-05, -1.6239e-04, -5.5957e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2900: [tensor([8.2402e-04, 6.4999e-04, 8.9147e-05, 7.8931e-05, 7.7675e-05, 3.7794e-05,
        1.3596e-04, 2.7379e-04, 5.5770e-05, 1.1734e-04, 5.3232e-05, 6.2565e-05,
        3.3132e-04, 1.1173e-04, 2.7311e-05, 4.2894e-05, 2.6896e-04, 9.3096e-05,
        1.0068e-04, 1.2307e-04, 1.5695e-04, 5.1350e-05, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([6.7296e-04, 1.2965e-03, 7.9649e-05, 1.6736e-04, 1.1332e-04, 4.9731e-05,
        1.9829e-04, 3.4492e-04, 1.0600e-04, 1.5087e-04, 7.9672e-05, 6.4989e-05,
        4.7366e-04, 1.4747e-04, 2.2890e-05, 9.2746e-05, 2.6195e-04, 6.9012e-05,
        6.6420e-05, 1.3058e-04, 1.2338e-04, 1.0517e-04, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5466e-03, -5.7940e-04, -8.1350e-05, -7.3157e-05, -8.1423e-05,
        -3.3710e-05, -1.1589e-04, -2.0605e-04, -8.4739e-05, -1.2128e-04,
        -4.2569e-05, -5.8930e-05, -2.6111e-04, -1.1539e-04, -1.2889e-05,
        -7.7266e-05, -2.9252e-03, -1.7845e-04, -7.4085e-05, -9.3308e-05,
        -3.7376e-04, -9.4885e-05, -1.9416e-04, -9.6148e-05, -5.2974e-04,
        -8.9744e-05, -5.6167e-05, -9.2612e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.3699e-03, 5.9449e-04, 8.2648e-05, 1.0125e-04, 1.0305e-04, 6.1983e-05,
        1.7606e-04, 4.1140e-04, 1.5608e-04, 1.4880e-04, 7.6225e-05, 6.2273e-05,
        4.0213e-04, 1.2613e-04, 1.8856e-05, 9.6699e-05, 6.9374e-03, 1.7176e-04,
        1.7118e-04, 2.0399e-04, 3.0913e-04, 1.9319e-03, 2.7704e-04, 7.1558e-04,
        4.5987e-03, 3.1240e-04, 7.1343e-05, 1.7162e-04, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1217e-04, -6.6194e-04, -5.9853e-05, -9.9997e-05, -8.2271e-05,
        -3.2204e-05, -1.2585e-04, -3.0073e-04, -8.7839e-05, -6.3044e-05,
        -5.8578e-05, -5.4364e-05, -3.5057e-04, -1.5229e-04, -1.4756e-05,
        -7.7499e-05, -8.8727e-04, -2.0653e-04, -6.7516e-04, -1.0481e-04,
        -4.8853e-05, -1.6489e-04, -3.9345e-03, -1.1689e-04, -1.3526e-04,
        -6.0987e-05, -1.1760e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7535e-03, 7.8990e-04, 1.0613e-04, 1.0631e-04, 7.3115e-05, 3.8891e-05,
        1.0362e-04, 3.0686e-04, 8.7645e-05, 1.4027e-04, 5.8870e-05, 6.3680e-05,
        3.2971e-04, 1.1155e-04, 2.4092e-05, 6.9404e-05, 3.9462e-03, 1.9986e-04,
        1.2859e-04, 1.2619e-04, 6.0797e-05, 1.8882e-04, 1.5237e-04, 1.3364e-04,
        4.6811e-04, 1.1020e-04, 1.1168e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.0988e-03, 1.1510e-03, 8.8192e-05, 1.0090e-04, 8.7312e-05, 4.1676e-05,
        1.6078e-04, 3.6611e-04, 1.2986e-04, 1.5634e-04, 7.4514e-05, 7.3049e-05,
        3.7061e-04, 1.4593e-04, 2.7503e-05, 8.9065e-05, 1.6273e-04, 1.3496e-04,
        1.1120e-04, 2.5138e-04, 8.2305e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0564e-03, -8.4246e-04, -6.8568e-05, -8.5615e-05, -9.3969e-05,
        -2.2108e-05, -1.3787e-04, -3.1628e-04, -1.0324e-04, -1.0567e-04,
        -4.1248e-05, -8.0085e-05, -2.9857e-04, -1.3946e-04, -2.3338e-05,
        -6.9830e-05, -8.5225e-04, -1.4033e-04, -9.1457e-04, -2.8159e-04,
        -1.7716e-03, -1.5392e-04, -6.4321e-05, -9.3616e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2683e-03, -6.2921e-04, -8.6800e-05, -7.3829e-05, -5.3936e-05,
        -2.9487e-05, -1.2600e-04, -3.3620e-04, -9.7306e-05, -1.1504e-04,
        -6.4204e-05, -5.3400e-05, -3.8080e-04, -1.4698e-04, -1.8968e-05,
        -5.8157e-05, -4.5094e-03, -1.2148e-04, -6.0071e-04, -1.3278e-04,
        -4.7106e-05, -8.0867e-05, -1.1511e-04, -2.3330e-04, -9.4936e-04,
        -8.3290e-05, -6.6452e-05, -1.4704e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.8622e-03, 1.0909e-03, 8.1194e-05, 8.5180e-05, 7.8210e-05, 3.1930e-05,
        1.8024e-04, 3.0465e-04, 8.8949e-05, 1.1924e-04, 5.8701e-05, 7.3098e-05,
        3.3449e-04, 1.1320e-04, 2.6799e-05, 9.3199e-05, 6.1448e-03, 1.4075e-04,
        1.2026e-04, 9.7608e-04, 1.4677e-04, 9.1665e-05, 7.8006e-04, 1.9175e-04,
        7.7208e-05, 9.5450e-05, 2.0113e-03, 1.1226e-04, 7.0128e-05, 1.0172e-04,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([8.0888e-04, 7.4040e-04, 8.5556e-05, 9.5421e-05, 8.4749e-05, 4.7028e-05,
        2.1937e-04, 3.4322e-04, 1.1514e-04, 1.5400e-04, 6.3452e-05, 8.9851e-05,
        5.1555e-04, 1.3834e-04, 2.4551e-05, 9.9963e-05, 1.1400e-03, 1.9295e-04,
        7.9807e-04, 4.9447e-04, 2.0883e-04, 3.2391e-05, 1.3579e-04, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3397e-04, -3.8016e-04, -5.0504e-05, -6.1228e-05, -2.5365e-05,
        -1.8265e-05, -8.2042e-05, -1.9951e-04, -7.0601e-05, -5.9824e-05,
        -3.6161e-05, -4.7425e-05, -1.7708e-04, -6.1670e-05, -7.0599e-06,
        -4.3806e-05, -4.4702e-04, -4.0652e-05, -1.2156e-04, -7.5410e-05,
        -2.9779e-05, -6.6632e-05, -4.4149e-04, -2.5257e-04, -7.2748e-05,
        -1.1619e-04, -5.0079e-04, -1.3831e-04, -7.1299e-05, -1.9291e-05,
        -4.0893e-05, -5.2477e-05, -5.4473e-05, -4.2147e-04, -5.6366e-05,
        -5.0364e-05, -7.4927e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2950: [tensor([-1.7557e-03, -7.9304e-04, -1.8105e-04, -5.9176e-04, -7.7704e-05,
        -5.2640e-05, -2.0053e-04, -3.1238e-04, -1.2787e-04, -1.8571e-04,
        -6.7171e-04, -6.3816e-05, -8.1646e-05, -9.3713e-05, -3.0166e-05,
        -1.4035e-04, -1.7785e-04, -7.6154e-05, -4.2254e-04, -1.2451e-03,
        -3.2500e-05, -2.4864e-04, -1.9391e-05, -3.1310e-05, -5.2521e-05,
        -6.1528e-03, -1.9995e-04, -2.3295e-03, -3.6613e-04, -1.4975e-03,
        -1.3914e-04, -4.6794e-05, -1.2122e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1857e-03, -8.8026e-04, -1.0280e-04, -5.6381e-04, -7.9813e-05,
        -3.1033e-05, -1.8911e-04, -2.4244e-04, -9.5485e-05, -1.4564e-04,
        -6.3701e-04, -5.3867e-05, -7.1490e-05, -1.2448e-04, -5.2175e-05,
        -1.1402e-04, -1.4592e-04, -4.5083e-05, -3.6415e-04, -1.0194e-03,
        -2.3950e-05, -2.5331e-04, -4.2377e-05, -3.5779e-05, -6.8844e-05,
        -7.9200e-04, -1.7381e-04, -1.1124e-03, -1.7865e-04, -2.9397e-03,
        -1.6468e-04, -8.8013e-05, -6.7630e-05, -9.3416e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5551e-03, -6.8773e-04, -1.0309e-04, -4.8773e-04, -1.1654e-04,
        -4.1388e-05, -1.5877e-04, -3.2328e-04, -1.4793e-04, -2.0774e-04,
        -7.4087e-04, -6.1509e-05, -8.2144e-05, -9.0517e-05, -6.5865e-05,
        -1.4322e-04, -1.6738e-04, -9.3825e-05, -3.3336e-04, -1.0807e-03,
        -3.5628e-05, -1.9991e-04, -4.3766e-05, -3.4945e-05, -6.9933e-05,
        -1.6594e-04, -1.3703e-03, -1.8885e-04, -9.2105e-05, -2.3085e-04,
        -2.9250e-03, -1.1937e-04, -2.0895e-04, -2.4450e-04, -1.0292e-04,
        -5.0736e-05, -1.1406e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4524e-03, -9.3490e-04, -1.0615e-04, -6.2600e-04, -7.3889e-05,
        -5.0262e-05, -1.3551e-04, -2.6161e-04, -1.1602e-04, -1.5465e-04,
        -5.1431e-04, -4.9613e-05, -1.0105e-04, -7.2090e-05, -2.9217e-05,
        -1.0769e-04, -1.5537e-04, -8.4585e-05, -3.8384e-04, -1.3150e-03,
        -2.2592e-05, -1.8503e-04, -2.2545e-05, -1.5241e-05, -5.2470e-05,
        -2.7237e-03, -1.5729e-04, -1.2810e-03, -6.1911e-05, -1.0127e-04,
        -1.8151e-03, -8.9895e-05, -6.0704e-05, -1.0580e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6215e-03, -7.6560e-04, -1.3451e-04, -8.1212e-04, -6.0684e-05,
        -4.7427e-05, -2.4547e-04, -3.3496e-04, -1.2380e-04, -1.7889e-04,
        -6.9480e-04, -4.2957e-05, -7.3888e-05, -8.5180e-05, -4.4316e-05,
        -1.2167e-04, -1.9203e-04, -8.5792e-05, -5.0305e-04, -2.6957e-03,
        -3.6255e-05, -2.8979e-04, -6.3432e-05, -1.8743e-05, -7.4565e-05,
        -2.4029e-04, -2.0683e-03, -3.1068e-04, -1.5878e-04, -1.3713e-04,
        -1.8091e-04, -2.2793e-04, -1.1523e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1977e-03, -1.3372e-03, -1.1865e-04, -5.5676e-04, -8.0021e-05,
        -4.7818e-05, -1.7193e-04, -3.5361e-04, -1.0967e-04, -1.4399e-04,
        -8.7983e-04, -5.1971e-05, -6.8766e-05, -8.8575e-05, -5.0677e-05,
        -1.3220e-04, -1.0621e-04, -5.4737e-05, -3.5737e-04, -1.7756e-03,
        -3.6989e-05, -1.9897e-04, -3.7506e-05, -3.7627e-05, -7.0033e-05,
        -7.1797e-04, -1.4786e-04, -1.3083e-03, -1.5153e-04, -1.1695e-04,
        -7.6334e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([1.3345e-03, 8.4410e-04, 9.1172e-05, 5.6619e-04, 9.1121e-05, 2.9449e-05,
        1.5103e-04, 2.4689e-04, 1.6494e-04, 1.3179e-04, 4.8091e-04, 3.7380e-05,
        6.4982e-05, 8.2404e-05, 5.4464e-05, 9.8749e-05, 1.2292e-04, 4.2621e-05,
        2.9017e-04, 8.6082e-04, 1.3993e-05, 2.0567e-04, 3.5263e-05, 3.1006e-05,
        6.4150e-05, 6.0246e-04, 1.5015e-04, 1.2370e-03, 6.3393e-05, 1.8828e-04,
        1.5922e-03, 9.9136e-05, 1.0441e-04, 1.0166e-04, 1.6092e-04, 3.7293e-04,
        1.2569e-04, 2.5804e-04, 7.2340e-05, 7.5414e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.4640e-03, 8.0848e-04, 1.4390e-04, 1.5811e-04, 1.3381e-03, 8.9179e-05,
        5.6155e-05, 6.2204e-05, 9.9240e-05, 7.8891e-05, 7.8297e-05, 1.2826e-04,
        1.1478e-04, 5.8037e-05, 6.9964e-05, 4.4481e-04, 6.4685e-04, 2.3051e-04,
        4.8380e-05, 9.8059e-05, 1.3574e-03, 2.0185e-04, 2.1990e-03, 2.5097e-04,
        8.9076e-03, 1.4867e-04, 1.0070e-04, 1.5675e-04, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([1.6209e-03, 8.5211e-04, 1.1643e-04, 2.2750e-04, 9.9808e-04, 1.1166e-04,
        7.7715e-05, 5.9132e-05, 8.1897e-05, 1.3331e-04, 8.4090e-05, 1.1894e-04,
        1.1070e-04, 9.0939e-05, 8.0167e-05, 8.2844e-04, 6.8202e-04, 2.4120e-04,
        6.3994e-05, 9.5989e-05, 1.7477e-03, 1.6833e-04, 1.9011e-03, 2.7399e-04,
        5.0706e-03, 1.7182e-04, 1.4767e-04, 7.4615e-05, 2.3174e-04, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3898e-03, -1.1226e-03, -2.0361e-04, -2.2145e-04, -1.2737e-03,
        -1.7051e-04, -6.3607e-05, -4.4141e-05, -1.0413e-04, -1.1893e-04,
        -1.0598e-04, -1.2971e-04, -1.1047e-04, -1.0813e-04, -6.6160e-05,
        -7.8025e-04, -7.9215e-04, -2.6171e-04, -4.0888e-05, -1.3035e-04,
        -2.3719e-03, -1.8853e-04, -2.5610e-03, -1.3137e-04, -1.0377e-04,
        -2.4840e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5982e-03, -1.0171e-03, -1.1207e-04, -1.1215e-03, -2.4619e-04,
        -6.3284e-04, -1.8302e-04, -1.3148e-04, -5.8106e-04, -2.8172e-04,
        -9.4768e-05, -2.5677e-04, -3.9542e-03, -1.6666e-04, -1.0684e-04,
        -4.9559e-05, -1.2643e-04, -3.1749e-03, -1.2972e-04, -1.8752e-03,
        -3.7727e-04, -3.5258e-03, -1.6684e-04, -2.7632e-04, -1.7090e-04,
        -9.5697e-05, -1.6701e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9169e-03, -1.0064e-03, -1.3508e-04, -8.1595e-04, -2.1535e-04,
        -9.1623e-04, -1.6312e-04, -1.0525e-04, -6.3016e-04, -2.2741e-04,
        -1.1222e-04, -2.4844e-04, -1.3335e-03, -1.4851e-04, -1.0930e-04,
        -8.8741e-05, -1.9999e-04, -5.3071e-03, -1.7673e-04, -2.8207e-03,
        -4.0321e-04, -5.2755e-03, -2.7528e-04, -1.8030e-04, -1.0761e-04,
        -1.5988e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3000: [tensor([-0.0030, -0.0030, -0.0003, -0.0023, -0.0006, -0.0003, -0.0005, -0.0001,
        -0.0003, -0.0220, -0.0004, -0.0033, -0.0009, -0.0004, -0.0003, -0.0005,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9074e-03, -2.3051e-03, -2.8193e-04, -1.9137e-03, -5.0082e-04,
        -2.3351e-04, -3.9823e-04, -7.8000e-05, -2.4516e-04, -1.4951e-02,
        -3.5259e-04, -1.7427e-03, -5.7488e-04, -5.4184e-04, -5.7416e-03,
        -2.7458e-04, -2.7179e-04, -5.1728e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1213e-03, 1.4382e-03, 2.1508e-04, 1.3384e-04, 1.6542e-04, 1.1066e-04,
        1.9766e-04, 4.6635e-05, 1.1872e-04, 2.4514e-03, 1.5602e-04, 3.1480e-04,
        3.6300e-04, 2.0783e-04, 2.7615e-04, 2.3515e-03, 2.3576e-04, 1.6141e-04,
        5.2069e-05, 1.1277e-04, 3.4878e-03, 3.4041e-04, 5.1229e-03, 3.9286e-04,
        9.4845e-05, 1.7391e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2449e-03, -1.0065e-03, -1.6283e-04, -1.6533e-04, -1.5973e-04,
        -1.0258e-04, -2.1062e-04, -3.6865e-05, -1.2466e-04, -1.3390e-03,
        -8.0685e-05, -2.3157e-04, -3.0308e-04, -1.6469e-04, -3.0632e-04,
        -2.5105e-03, -1.9582e-04, -1.4737e-04, -7.1528e-05, -1.0549e-04,
        -3.5447e-03, -2.2172e-04, -2.3977e-03, -1.8430e-04, -1.1692e-02,
        -2.1895e-04, -3.6473e-04, -2.5746e-04, -1.4911e-04, -1.0577e-04,
        -1.8243e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1612e-03, -2.6277e-03, -2.2040e-04, -2.1332e-04, -1.6789e-04,
        -1.6163e-04, -2.6739e-04, -6.3395e-05, -1.9724e-04, -3.2342e-03,
        -1.4259e-04, -4.1192e-04, -3.8892e-04, -3.6864e-04, -2.9519e-04,
        -5.6108e-03, -3.5641e-04, -2.6726e-04, -7.7363e-05, -1.6442e-04,
        -1.2324e-03, -2.5967e-04, -4.1888e-04, -2.4158e-04, -1.6764e-03,
        -4.0557e-04, -1.5951e-04, -2.5373e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2495e-03, -2.2697e-03, -2.3808e-04, -2.5683e-04, -1.7552e-04,
        -1.4607e-04, -2.2824e-04, -5.2883e-05, -1.8964e-04, -1.6247e-03,
        -9.6041e-05, -3.1882e-04, -4.0652e-04, -2.3519e-04, -1.9465e-04,
        -2.5278e-03, -2.8173e-04, -2.5194e-04, -7.2178e-05, -1.0877e-04,
        -1.1272e-03, -1.8577e-04, -3.2852e-04, -2.7278e-04, -3.8342e-04,
        -3.7771e-03, -2.8747e-04, -1.8614e-04, -2.5625e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5846e-03, -1.4531e-03, -2.3129e-04, -2.2352e-04, -1.6279e-04,
        -1.3103e-04, -3.1326e-04, -9.8640e-05, -1.8859e-04, -4.6513e-03,
        -6.4715e-05, -3.3249e-04, -4.1795e-04, -2.5643e-04, -2.3572e-04,
        -3.5531e-03, -3.0166e-04, -2.3629e-04, -3.9648e-05, -1.5246e-04,
        -6.2840e-03, -2.9720e-04, -3.1812e-03, -1.1855e-04, -8.7364e-03,
        -2.5292e-04, -1.7009e-04, -2.3581e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.2396e-03, 2.5504e-03, 2.1240e-04, 2.7399e-04, 1.8887e-04, 1.8666e-04,
        2.9159e-04, 7.5398e-05, 1.4673e-04, 3.6254e-03, 1.6183e-04, 4.5210e-04,
        3.7821e-04, 3.5798e-04, 3.1232e-04, 2.9537e-03, 3.3076e-04, 2.6399e-04,
        1.3848e-05, 1.1165e-04, 7.2701e-03, 4.1872e-04, 2.7201e-04, 1.5296e-02,
        1.2495e-04, 2.6214e-03, 3.2702e-04, 9.7539e-03, 2.8072e-04, 1.2667e-04,
        2.3108e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([5.3346e-03, 2.8911e-03, 2.8337e-04, 2.2880e-04, 2.0595e-04, 2.3496e-04,
        3.6683e-04, 1.0975e-04, 2.9723e-04, 3.0028e-03, 1.8439e-04, 5.1699e-04,
        5.6259e-04, 3.5796e-04, 4.1358e-04, 3.1372e-03, 5.4450e-04, 3.2572e-04,
        8.9894e-05, 2.0710e-04, 1.0427e-02, 3.8353e-04, 4.8415e-03, 6.4277e-04,
        3.6702e-04, 2.5878e-02, 2.5438e-04, 2.0224e-04, 4.9415e-04, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3145e-03, -2.0378e-03, -2.0914e-04, -2.5790e-04, -1.9984e-04,
        -1.8034e-04, -2.6471e-04, -9.5431e-05, -1.4760e-04, -2.4553e-03,
        -9.0657e-05, -4.4630e-04, -5.0774e-04, -2.7910e-04, -4.0074e-04,
        -5.6901e-03, -2.7831e-04, -2.2805e-04, -4.7130e-05, -1.4558e-04,
        -6.3246e-03, -3.6139e-04, -4.1777e-03, -3.6073e-04, -5.6114e-03,
        -1.6884e-03, -5.6546e-04, -1.3631e-04, -3.2007e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([1.7062e-03, 1.7964e-03, 2.8190e-04, 1.1715e-04, 1.1932e-04, 1.4530e-04,
        2.7163e-04, 5.9657e-05, 1.9894e-04, 2.8302e-03, 1.1867e-04, 4.0425e-04,
        2.7771e-04, 2.1903e-04, 2.6076e-04, 3.8387e-03, 3.1977e-04, 2.4056e-04,
        7.8655e-05, 1.4144e-04, 2.6875e-04, 3.0404e-04, 2.8128e-04, 2.4629e-04,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2781e-04, -1.7289e-03, -3.0860e-04, -1.7086e-04, -2.3969e-04,
        -1.8393e-04, -2.1451e-04, -7.4976e-05, -1.6582e-04, -3.1060e-03,
        -1.1300e-04, -3.8403e-04, -4.1917e-04, -2.3467e-04, -2.8049e-04,
        -5.4270e-03, -2.7828e-04, -2.4044e-04, -9.9328e-05, -1.6870e-04,
        -2.8760e-04, -2.8035e-04, -7.1743e-04, -2.5688e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3050: [tensor([6.1579e-03, 6.4391e-03, 8.6051e-04, 2.6852e-04, 4.9711e-04, 2.2341e-03,
        3.2736e-04, 4.9470e-03, 6.7135e-04, 4.7009e-03, 1.6003e-03, 9.7364e-05,
        9.3328e-04, 8.4258e-03, 2.1290e-04, 6.2776e-04, 3.2261e-03, 7.2306e-04,
        3.7757e-04, 2.5078e-05, 2.5784e-04, 1.4505e-02, 4.1280e-04, 1.4422e-03,
        9.9733e-03, 3.0965e-04, 6.7990e-04, 1.4186e-03, 4.8705e-04, 6.1652e-04],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9757e-02, -1.2007e-02, -1.7858e-03, -4.5723e-04, -1.1791e-03,
        -5.9349e-03, -1.3271e-03, -8.1315e-03, -1.0387e-03, -6.4310e-03,
        -3.9962e-03, -2.4099e-04, -2.9148e-03, -2.2833e-02, -1.1632e-03,
        -1.9375e-03, -6.8640e-03, -2.1304e-03, -9.8679e-04,  1.9585e-04,
        -6.5441e-04, -1.0898e-03, -8.8174e-04, -1.0685e-02, -1.2047e-02,
        -3.3376e-01, -2.0041e-03, -1.0963e-03, -2.0070e-03,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3721e-02,  1.1234e-02,  1.5866e-03,  2.7146e-04,  9.0121e-04,
         3.1176e-03,  7.7087e-04,  6.8552e-03,  1.0584e-03,  4.7098e-03,
         2.8764e-03, -7.7233e-05,  1.3807e-03,  1.7998e-02,  7.0998e-04,
         1.4145e-03,  4.5120e-03,  8.4391e-04,  5.4111e-04,  6.4854e-06,
         4.9481e-04,  4.4624e-04,  7.6352e-02,  3.4173e-03,  7.0750e-04,
         4.0815e-04,  1.3114e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.1908e-03, 2.3893e-04, 2.9887e-04, 2.9716e-04, 1.4704e-04, 2.1923e-04,
        3.2410e-04, 1.4086e-04, 4.6296e-04, 7.0936e-04, 5.8370e-05, 2.6894e-04,
        1.9144e-04, 2.2715e-04, 2.0343e-04, 3.0554e-04, 1.4980e-03, 5.6967e-03,
        2.1397e-04, 6.0828e-05, 1.4517e-04, 4.8542e-03, 2.6015e-04, 3.2163e-03,
        1.5421e-03, 2.4308e-04, 9.1261e-04, 3.9338e-04, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5329e-02, -1.0745e-03, -1.1759e-03, -1.5673e-03, -6.6892e-04,
        -1.8567e-03, -1.4131e-03, -1.0709e-03, -3.2866e-03, -4.3208e-03,
        -3.5057e-04, -1.6591e-03, -1.1789e-03, -1.4932e-03, -1.1002e-03,
        -2.7390e-03, -5.6948e-03, -2.8165e-02, -1.8766e-03,  1.8486e-04,
        -6.5858e-04, -3.4649e-03, -1.7598e-03, -1.1993e-02, -2.8480e-01,
        -9.6713e-04, -4.5119e-03, -1.5606e-03,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3204e-02, -4.0717e-04, -3.3929e-04, -6.9620e-04, -3.3550e-04,
        -6.4523e-04, -4.4616e-04, -5.1604e-04, -1.3512e-03, -1.8823e-03,
        -2.2121e-04, -6.1152e-04, -5.2044e-04, -5.1843e-04, -4.2767e-04,
        -8.3993e-04, -2.7883e-03, -9.2972e-03, -6.3180e-04, -1.9345e-05,
        -2.2167e-04, -2.0682e-03, -4.8219e-04, -1.0250e-03, -1.2422e-01,
        -8.4386e-04, -7.4381e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.0346e-03, 3.7031e-03, 3.1538e-04, 2.8548e-04, 6.2805e-04, 1.7962e-04,
        7.9712e-04, 4.7043e-05, 5.3377e-04, 1.7014e-04, 2.1681e-04, 1.6618e-04,
        1.3861e-03, 3.8094e-04, 1.3834e-03, 9.0943e-03, 1.4825e-04, 3.3557e-04,
        9.8013e-05, 1.7256e-04, 7.8739e-04, 8.3310e-04, 1.0504e-03, 1.0014e-03,
        3.9918e-04, 2.9642e-04, 3.7023e-04, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([2.2144e-03, 4.1759e-03, 3.4050e-04, 2.1994e-04, 4.1173e-04, 1.3517e-04,
        8.1469e-04, 2.7257e-06, 4.6335e-04, 1.7932e-04, 2.6018e-04, 2.2824e-04,
        1.3964e-03, 4.2687e-04, 1.2418e-03, 6.6479e-03, 1.7164e-04, 2.6550e-04,
        6.0402e-05, 1.3230e-04, 2.4019e-04, 4.4183e-04, 5.9805e-03, 7.2762e-03,
        8.4589e-04, 4.4621e-04, 4.0141e-05, 9.2946e-04, 2.2299e-04, 4.1319e-04],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0634e-02, -2.4663e-03, -4.3476e-04, -3.7607e-04, -5.9528e-04,
        -2.5380e-04, -9.5545e-04,  6.5550e-05, -8.4058e-04, -3.1074e-04,
        -3.5150e-04, -2.6063e-04, -1.3955e-03, -7.3905e-04, -1.4341e-03,
        -4.5166e-03, -1.8420e-04, -4.0577e-04, -4.4418e-05, -1.7824e-04,
        -5.1431e-04, -4.7546e-04, -6.5697e-03, -2.7380e-03, -8.3688e-02,
        -3.8661e-04, -9.9938e-04, -2.8650e-04, -4.9114e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([0.0020, 0.0002, 0.0003, 0.0002, 0.0001, 0.0006, 0.0002, 0.0001, 0.0048,
        0.0002, 0.0012, 0.0005, 0.0002, 0.0001, 0.0002, 0.0004, 0.0006, 0.0002,
        0.0002, 0.0032, 0.0004, 0.0002, 0.0004, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([4.8173e-03, 2.6818e-04, 2.8724e-04, 2.5255e-04, 1.7037e-04, 5.0618e-04,
        2.5899e-04, 2.2735e-04, 4.6096e-03, 3.0881e-04, 1.1004e-03, 9.9577e-04,
        3.3253e-04, 4.6998e-05, 1.7844e-04, 5.1728e-04, 7.0434e-04, 5.9333e-04,
        3.7590e-04, 1.7304e-03, 4.7280e-04, 1.9521e-03, 5.0899e-03, 7.6642e-04,
        1.9545e-03, 2.9704e-04, 1.2637e-04, 3.7647e-04, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([3.3058e-03, 1.9184e-04, 2.6341e-04, 2.4497e-04, 1.2083e-04, 4.8088e-04,
        1.4383e-04, 1.4561e-04, 4.5516e-03, 2.7790e-04, 1.0376e-03, 7.8506e-04,
        2.1856e-04, 7.6964e-05, 1.3889e-04, 5.3050e-03, 2.8292e-04, 3.8318e-03,
        7.4445e-04, 1.0012e-03, 2.0629e-03, 6.9616e-04, 1.5478e-04, 3.1197e-04,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3100: [tensor([-4.6354e-04, -8.7486e-05, -6.1511e-06, -1.2817e-06, -9.1793e-07,
         2.2590e-07,  4.5880e-06,  4.5359e-06, -1.0337e-03, -8.3049e-06,
        -1.3236e-06,  4.4625e-07, -5.6485e-06, -1.2296e-05, -1.8274e-07,
        -6.3712e-07, -2.2175e-06, -3.5469e-06,  1.5022e-06,  3.8347e-06,
        -6.6737e-04,  2.6450e-06, -1.1383e-03,  4.3415e-06, -3.2115e-08,
        -5.5091e-07, -6.3856e-06, -5.7242e-04, -1.5843e-05, -2.7231e-06,
        -6.2305e-06, -5.5990e-05, -3.9691e-04, -4.0151e-07, -3.5998e-05,
        -1.7312e-05, -4.0004e-04, -6.5521e-05, -1.9085e-05, -1.1190e-05,
        -1.1400e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3311e-04, -3.2950e-05, -4.3505e-06,  2.0255e-06, -1.1275e-06,
        -1.1341e-06,  3.3686e-06,  3.2804e-06, -9.8100e-04, -4.8081e-06,
        -1.4387e-07, -9.7268e-09, -4.1613e-06, -1.0268e-05,  1.1439e-06,
        -8.0366e-07, -1.9933e-06, -5.4605e-06,  3.4039e-06,  4.8445e-06,
        -9.0911e-04, -7.7959e-07, -7.4807e-04,  1.8703e-06,  1.0396e-06,
         9.7054e-07, -3.8676e-06, -2.4118e-04, -6.6561e-05, -2.4538e-04,
        -6.8870e-06, -3.9136e-04,  1.3233e-06, -3.8856e-05, -2.4287e-06,
        -3.3968e-05, -1.3865e-05, -9.7623e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8078e-04, -1.4513e-04, -6.5616e-06,  1.3256e-06, -6.7589e-07,
         1.5126e-07,  9.3474e-07,  1.2114e-06, -4.8151e-05, -4.2761e-06,
        -3.6041e-07,  1.1809e-07, -2.7385e-06, -7.9197e-06,  2.6369e-07,
        -1.0926e-06, -3.9382e-06, -3.6807e-06,  2.3863e-06,  6.6149e-06,
        -7.3621e-04,  1.7273e-06, -4.2889e-04,  3.1565e-06,  9.5364e-07,
         4.7120e-07, -4.7366e-06, -2.4740e-04, -3.2282e-05, -2.2978e-04,
        -1.5777e-06, -4.0335e-04, -2.0865e-05, -2.6176e-04, -7.0244e-05,
        -2.1525e-05, -1.2759e-05, -7.7834e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9590e-04, -1.2288e-05, -8.7889e-06, -7.4496e-07, -3.4400e-06,
        -1.9882e-06, -5.2261e-06,  1.3879e-06, -1.3090e-04, -6.9078e-06,
        -4.5236e-06, -7.8308e-07, -4.6344e-06, -9.4071e-06,  3.0667e-07,
        -4.3167e-06, -5.7592e-06, -3.0759e-06,  1.4198e-06,  2.4589e-06,
        -8.3342e-04,  1.5164e-06, -8.3317e-04, -7.6446e-07, -2.5406e-07,
        -3.2840e-06, -6.8705e-06, -3.5417e-04, -5.6052e-05, -7.6171e-06,
        -3.1603e-06, -2.3096e-04, -3.7358e-06, -4.8042e-04, -3.1169e-05,
        -3.3452e-04, -9.7503e-05, -1.0987e-05, -6.1279e-06, -1.2328e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7281e-04, -2.1040e-05, -5.6804e-06, -6.6524e-07, -2.1145e-06,
        -4.1702e-07, -1.9673e-07,  3.8156e-06, -9.7294e-04, -5.2717e-06,
        -9.3866e-07, -1.3358e-06, -2.5880e-06, -9.7878e-06, -2.1245e-07,
        -1.4173e-06, -3.6616e-06, -4.0388e-06,  8.3573e-07,  3.2042e-06,
        -6.7315e-04,  1.3305e-06, -8.2225e-04,  2.1422e-06, -6.3236e-07,
        -1.0429e-06, -4.9809e-06, -2.7641e-04, -1.7693e-05, -1.6405e-04,
        -1.0680e-06, -9.9308e-06, -2.7815e-05, -4.1266e-04, -8.2671e-05,
        -2.8604e-05,  2.7559e-06, -4.3326e-06,  2.7857e-06, -6.6846e-06,
        -2.4137e-05, -3.3148e-04, -2.1090e-06, -8.5403e-06, -1.0269e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6172e-04, -3.1561e-04, -4.9184e-06,  9.8343e-07, -6.5598e-07,
         1.3929e-06,  3.1063e-06,  4.1570e-06, -1.8824e-03, -7.3810e-06,
        -4.6278e-07,  1.3243e-06, -4.6967e-06, -1.0192e-05,  4.4389e-07,
        -1.1797e-06, -1.5212e-06, -2.0292e-06,  4.7189e-06,  4.9803e-06,
        -6.5927e-04,  8.5491e-07, -6.2669e-04,  4.1194e-06, -2.1389e-06,
         1.5061e-06, -4.8211e-06, -3.5439e-04, -3.1361e-05, -1.9443e-04,
        -1.0274e-05, -7.2130e-06, -2.3773e-06, -4.7229e-04, -6.9698e-05,
        -2.7557e-05, -9.6944e-06, -7.8621e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1484e-04,  4.3053e-05, -8.0644e-06, -3.7426e-07, -2.7938e-06,
        -1.5194e-06,  2.2345e-06,  3.0487e-06, -1.3365e-03, -5.2620e-06,
        -1.3630e-06, -1.0347e-06, -3.4573e-06, -8.8195e-06,  2.0553e-06,
        -3.1673e-06, -6.7952e-06, -7.8384e-06, -3.0734e-06,  4.2537e-06,
        -9.3536e-04,  1.8070e-06, -9.6738e-04,  1.1697e-06,  2.2794e-06,
         8.6653e-07, -7.5845e-06, -3.9166e-04, -3.1465e-05, -2.5559e-04,
        -7.2777e-05, -4.6749e-04, -2.3938e-04, -1.4843e-05, -1.9180e-05,
        -3.1699e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6327e-04, -1.8066e-04, -4.4064e-06,  1.5567e-06, -1.2836e-06,
        -1.1475e-06,  7.8677e-06,  1.9339e-06, -8.2745e-05, -6.5327e-06,
        -7.7769e-07, -2.2555e-07, -4.0507e-06, -1.3570e-05,  1.9678e-07,
        -1.9081e-06, -4.8287e-06, -4.3168e-06,  1.4378e-05,  6.6679e-06,
        -8.3202e-04,  9.3294e-07, -1.0515e-03,  8.3529e-07,  1.1536e-06,
         1.8081e-06, -3.9669e-06, -3.3091e-04, -8.0409e-05, -2.0262e-04,
        -1.1671e-05, -3.9995e-04, -2.6867e-05, -4.1282e-05, -6.4047e-05,
        -3.2186e-05, -1.1641e-05, -1.1056e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1761e-04, -3.9021e-05, -8.9156e-06,  2.9593e-07, -1.3613e-06,
         1.6661e-07, -1.0683e-06,  4.8043e-06, -3.0010e-03, -1.0489e-05,
        -2.7987e-06,  1.3943e-06, -5.4695e-06, -1.0521e-05,  1.0882e-06,
        -1.9767e-06, -8.3986e-06, -9.3699e-06,  4.0627e-06,  8.0274e-06,
        -1.4912e-03,  2.3014e-06, -1.5229e-03,  1.0566e-05,  4.5776e-06,
         2.1438e-06, -8.6572e-06, -4.4980e-04, -9.7443e-05, -4.1518e-04,
        -8.7973e-04, -7.9158e-06, -8.1181e-05, -2.2386e-05, -7.3817e-06,
        -3.8014e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8678e-04, -9.7961e-05, -2.2300e-05,  9.0247e-07, -3.9378e-05,
        -6.0901e-04, -1.3302e-05, -6.9559e-06, -3.4561e-06, -4.5652e-06,
        -3.7014e-05, -7.2939e-06, -6.7015e-06, -3.2342e-06,  4.0214e-05,
        -4.5089e-06, -7.8135e-07, -1.2981e-05, -1.1274e-05, -1.9393e-06,
        -8.4432e-08, -1.5187e-04, -2.7928e-06, -1.1000e-06, -3.1669e-03,
        -5.9594e-07, -2.5338e-06, -8.8225e-06, -1.3893e-05, -2.3376e-06,
         7.5187e-06, -3.6511e-06,  4.2212e-06, -6.5717e-05, -2.7220e-05,
        -4.1879e-05, -8.4779e-06, -8.5397e-06, -1.3333e-05, -2.5500e-06,
        -8.5971e-06, -5.3341e-06, -2.2740e-03, -2.5570e-05, -1.4633e-03,
        -9.1899e-05, -4.1357e-05, -1.2039e-04, -1.3845e-05, -1.2708e-05,
        -4.6905e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6681e-03, -1.7397e-03, -4.5909e-05, -4.3662e-07, -2.0452e-04,
        -3.4621e-03, -6.0963e-05, -2.6118e-05, -2.2520e-05, -2.2947e-05,
        -5.5843e-05, -8.9310e-06, -3.9341e-05, -5.7842e-05, -2.7419e-05,
        -1.4434e-05, -5.0430e-05, -3.9628e-05, -1.1627e-05,  1.4263e-05,
        -1.9420e-05, -4.8198e-04,  1.6020e-05, -8.4636e-06, -2.3587e-02,
        -6.3087e-06, -2.5714e-05, -6.7873e-06, -1.5133e-05,  4.6320e-06,
         1.7734e-05, -2.5451e-05,  3.0369e-05, -1.1591e-04,  2.1007e-06,
        -3.6726e-05, -4.9893e-05,  2.9640e-06, -1.8916e-05, -9.7091e-06,
         2.5512e-05, -3.5985e-05, -6.3564e-03, -6.3012e-05, -5.8689e-05,
        -5.7624e-03, -4.3482e-04, -1.9195e-03, -4.0518e-04, -6.7700e-05,
        -4.1339e-05, -9.7171e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4056e-04, -3.1549e-04, -2.8620e-05, -1.9400e-06, -1.0353e-05,
        -1.3946e-04, -1.0878e-05, -1.2005e-05, -4.4924e-06, -1.7357e-06,
        -2.0574e-05, -2.2454e-06, -4.6441e-06, -1.2886e-06,  1.3674e-05,
        -2.9076e-06, -2.6725e-06, -9.7166e-06, -1.0050e-05, -1.3063e-06,
         5.3101e-07, -9.4256e-05,  3.1712e-06,  2.1311e-06, -2.2873e-03,
        -1.8962e-07,  1.1367e-06, -6.2814e-06, -8.2427e-06,  1.7916e-06,
         3.6325e-06,  1.0288e-06,  4.8710e-06,  2.8351e-05, -2.5975e-05,
        -1.3640e-05, -6.9087e-06, -5.9275e-06, -2.6071e-06, -6.9127e-06,
        -5.5431e-06, -8.3155e-06, -1.0497e-03, -4.1505e-05, -8.2588e-04,
        -2.3183e-05, -5.7914e-04, -1.8130e-05, -4.8564e-05, -5.0141e-05,
        -9.3356e-05, -4.7488e-05, -3.0718e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3150: [tensor([-1.3074e-04, -5.8154e-05, -2.2459e-07,  1.6828e-05, -3.7505e-06,
         2.2055e-06,  1.3113e-05,  3.8254e-06, -3.0196e-06, -2.9746e-06,
        -2.7862e-06,  1.7012e-06, -1.8067e-06, -1.3937e-04,  1.9775e-06,
        -2.5251e-04, -7.7855e-05,  5.3554e-05, -9.3214e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1957e-04,  9.8654e-06,  6.0208e-06,  1.2228e-04,  5.8974e-07,
         3.4464e-06,  4.5969e-05, -5.9595e-07, -1.2350e-07,  3.8594e-06,
         1.3296e-06, -1.0019e-04, -2.5373e-05,  3.7174e-06, -1.6805e-06,
        -2.4579e-04, -2.6964e-06, -5.9926e-05, -7.5160e-05, -1.2751e-05,
         1.6089e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0313e-04, -1.6638e-06, -5.2647e-07,  5.8707e-06, -1.5069e-06,
         3.9038e-07, -5.9232e-06, -1.7627e-06, -1.7891e-06,  1.6047e-07,
        -1.0785e-06, -1.1413e-04, -2.0861e-05, -1.9682e-05, -3.0558e-06,
        -8.2562e-05, -2.8776e-05,  9.9246e-06, -4.9806e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3984e-04, -8.1687e-06,  8.1398e-07,  2.7301e-05,  1.7377e-07,
         1.1886e-06,  1.0937e-05, -1.2005e-06, -7.7259e-07,  1.5370e-06,
        -5.4419e-07, -1.5216e-04, -8.4041e-05, -5.5076e-05, -3.1778e-06,
        -1.4051e-04, -6.5556e-05, -7.3178e-06, -5.5461e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7491e-03, -4.2177e-04,  1.3841e-04, -3.0629e-04, -1.7404e-04,
        -1.5153e-04, -2.2228e-03, -5.9013e-05, -2.0420e-05,  2.9169e-04,
         8.2258e-05, -7.3175e-04, -6.9969e-03, -1.2880e-03, -3.5570e-04,
        -6.8684e-04, -6.3396e-04, -2.7129e-03, -5.0042e-04, -1.0570e-05,
        -5.9695e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9834e-02,  4.5184e-03, -2.4146e-03,  3.1632e-03,  1.9683e-03,
        -7.2430e-04,  5.2403e-02,  4.2321e-05, -6.8094e-04, -6.4038e-03,
        -4.8686e-04,  1.1037e-02,  1.2070e-02,  3.9009e-02,  1.0653e-03,
         4.4049e-03,  5.7028e-03, -9.7000e-02,  1.1100e-02,  2.7389e-03,
         2.8333e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5442e-04, -2.0163e-06,  3.5890e-06, -2.1528e-06, -3.4077e-06,
         9.7049e-07,  1.0519e-04, -1.0230e-06, -4.2701e-07,  2.6232e-06,
        -1.2417e-06, -2.2423e-05, -3.1986e-05,  1.8090e-05,  2.6052e-06,
        -2.4275e-04, -1.2624e-04, -2.2807e-05, -4.6975e-05, -1.1753e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1581e-04, -3.1935e-06, -1.8798e-06,  8.5722e-05, -2.2352e-06,
        -4.0446e-06, -1.2807e-06, -3.8371e-08, -4.8485e-04,  3.0246e-06,
        -4.6504e-06, -1.9678e-04, -9.4491e-06, -1.4720e-04, -8.7950e-06,
        -6.0622e-06, -1.0906e-05,  2.6916e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5462e-04, -2.9145e-06, -1.9533e-06,  1.1093e-04, -2.9355e-06,
        -3.1461e-06,  6.9817e-08, -2.0726e-06, -3.6820e-04, -4.2398e-06,
        -1.9150e-04, -5.7558e-06, -2.9771e-04, -3.4750e-05, -4.0020e-06,
        -8.6065e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2281e-04, -1.3049e-06, -1.4132e-06, -9.6157e-04, -9.9072e-07,
        -1.0404e-06, -1.4789e-06,  1.8495e-07, -6.0679e-06, -9.0877e-05,
        -7.0954e-06, -2.0334e-04, -1.2227e-04, -1.9980e-05, -7.4776e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0527e-04, -9.5989e-05,  1.1832e-05, -5.0563e-06,  1.1634e-06,
         1.6860e-06,  4.4713e-06, -1.2279e-05, -4.3522e-06, -1.6951e-05,
         6.3021e-06, -1.5658e-04, -3.5772e-06,  1.1745e-05,  6.2667e-06,
        -9.3212e-05, -1.5850e-04,  5.6511e-05, -2.3193e-05, -1.9100e-04,
        -5.0772e-05, -7.3083e-04,  9.7158e-05, -2.9351e-05,  6.9531e-05,
        -1.8232e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7664e-04, -2.4266e-06,  5.1735e-06, -1.7395e-06, -9.4459e-07,
         1.1407e-06,  4.1431e-07, -3.4224e-06, -2.2984e-06, -7.3519e-06,
         1.9861e-06,  8.5269e-06, -2.8176e-06, -1.9039e-07,  5.7378e-07,
        -1.0716e-04, -1.4988e-05, -3.6737e-05,  2.8757e-06,  1.7177e-07,
        -2.2998e-04,  2.7742e-06,  3.5089e-05, -8.2189e-05,  1.5834e-05,
        -9.4578e-06], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3200: [tensor([-1.4863e-04, -1.5657e-05, -3.6612e-06, -1.0130e-05, -1.9860e-06,
        -1.9166e-05, -1.6660e-06, -4.9963e-07, -2.9337e-06, -1.1493e-04,
        -1.9455e-05, -8.7751e-05, -1.6397e-05, -2.1004e-05, -1.0538e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1117e-04, -9.9694e-06, -2.1410e-06, -8.0270e-06, -1.3466e-06,
        -1.2506e-05, -1.9008e-06,  1.4759e-06, -8.1402e-07, -5.8609e-05,
        -7.9850e-06, -5.6656e-05, -1.4595e-04, -7.4638e-05, -3.5407e-06,
        -9.1805e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8540e-04,  5.7151e-05,  2.2080e-06,  3.4538e-06, -5.5179e-06,
         6.5793e-06,  2.8645e-06, -4.0312e-07,  1.9755e-06,  2.2132e-06,
         3.2464e-06,  2.1701e-05,  1.5751e-06,  4.9830e-06,  3.2498e-06,
        -2.5899e-04, -7.1318e-05,  1.0737e-05, -4.2482e-05, -5.0081e-06,
        -6.5283e-05, -5.4081e-05, -4.3101e-06, -9.4191e-05, -6.3053e-06,
        -9.0554e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1797e-05, -5.3167e-04,  1.9704e-07,  1.0153e-06, -2.1337e-06,
         3.0488e-06,  1.1942e-07,  7.0430e-07, -2.9895e-07,  8.0462e-07,
         1.9914e-06,  3.7751e-06,  8.6157e-07,  1.9608e-06,  1.1755e-06,
        -5.1018e-05, -5.1542e-06,  6.0095e-05, -6.8942e-07, -4.4184e-05,
        -3.5538e-05, -4.8302e-06, -7.4982e-06, -4.3782e-06, -1.2465e-06,
        -3.8428e-05, -2.4023e-05, -2.1159e-05,  9.5243e-08, -2.2706e-06,
        -2.6547e-06, -3.9539e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2499e-04,  8.6486e-05,  1.3745e-06,  1.0049e-06, -2.7149e-06,
         1.6567e-06,  2.3362e-07, -2.8537e-07, -3.8309e-07,  7.5934e-07,
        -1.2464e-06,  1.6356e-05,  5.6590e-07,  1.7396e-06,  8.0348e-07,
        -8.9478e-05, -3.2198e-05,  4.9677e-05, -1.0413e-06, -5.1337e-06,
         1.8956e-06,  2.8159e-06, -2.0698e-05, -1.4937e-05, -1.2846e-05,
        -6.9662e-05,  6.3402e-05, -7.3967e-06, -2.1039e-06, -7.2501e-07,
         3.1720e-06, -5.0635e-06, -6.4896e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5110e-05, -1.0079e-05,  1.0822e-05,  6.9769e-07,  2.3748e-06,
         3.1927e-06, -2.6118e-08,  4.0966e-08, -3.0006e-05,  3.6539e-06,
        -4.9250e-06, -1.6542e-06,  1.8186e-06,  4.9028e-06, -3.9501e-07,
        -1.4826e-04, -1.7291e-04,  4.1769e-05, -4.7177e-06, -1.7287e-04,
        -3.4411e-05, -7.2838e-07,  1.7291e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2316e-04, -2.4051e-05, -1.1625e-05, -3.2587e-06, -1.4746e-06,
         3.4196e-06,  2.5455e-06,  9.9441e-07, -7.3369e-05, -4.3919e-06,
        -1.0885e-05, -6.7019e-06,  2.9700e-06,  2.9567e-06,  1.1908e-06,
        -1.1819e-04, -4.9967e-04,  8.8297e-05, -3.1180e-05, -7.2292e-06,
        -4.0467e-05, -8.0593e-06,  4.4408e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2298e-04, -1.3677e-05,  1.9557e-05,  1.4176e-06,  5.8092e-06,
         2.7597e-06,  2.9219e-06,  6.7322e-06, -2.7927e-05,  5.7493e-06,
        -7.3670e-06,  4.9625e-05,  3.6065e-06,  3.2270e-06,  3.9486e-06,
        -6.8186e-05, -3.9876e-04,  4.1783e-05, -3.7594e-05, -1.9298e-05,
        -6.3485e-05,  5.5821e-06,  2.7627e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2599e-04, -1.3813e-03, -1.1156e-07, -2.3555e-06, -4.3515e-06,
        -4.8268e-08, -5.1300e-06,  6.1723e-07,  5.7564e-06,  1.2149e-05,
        -2.1811e-05, -2.7384e-06, -2.7411e-07,  2.1229e-06,  9.4792e-06,
         1.0984e-06, -3.8008e-06,  8.3318e-05, -4.2629e-04, -1.1229e-04,
         5.7797e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2696e-04,  4.2752e-05,  2.9073e-08, -3.5306e-06, -4.2650e-06,
        -2.3057e-06, -2.8077e-06,  4.7219e-06,  5.6402e-06, -3.5707e-04,
        -7.6553e-05, -1.3947e-06,  3.2216e-06,  2.1333e-06,  1.4120e-05,
        -1.9618e-06, -9.1070e-06,  3.6489e-04, -7.4883e-04, -1.5708e-04,
        -3.7289e-05, -2.3412e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5624e-04, -5.1358e-04,  4.1214e-07,  1.0532e-05, -4.7030e-07,
         3.4585e-06, -3.9632e-06, -2.6302e-07,  3.4801e-06, -9.6690e-04,
         2.5339e-05,  1.2830e-06,  2.0834e-06,  4.9274e-06,  5.9310e-06,
        -5.5755e-07, -2.6712e-05, -1.7915e-04, -4.0060e-05, -4.9767e-05,
        -1.6946e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6499e-05, -3.3139e-04, -3.0394e-07,  1.5058e-05,  1.9619e-05,
         1.1701e-06,  3.2675e-06,  2.2655e-06, -8.8772e-07,  7.6742e-05,
         4.9051e-06,  4.8761e-08,  7.1125e-07,  1.7983e-06,  9.9398e-06,
         9.8733e-07, -1.5749e-06,  7.9116e-07,  2.6236e-07, -7.5429e-05,
        -2.1938e-07, -4.2418e-05,  1.2766e-05, -3.9528e-05,  3.9393e-05,
        -1.7075e-05,  2.0365e-05,  1.7848e-05,  9.7121e-07, -1.2298e-05,
        -4.3273e-05, -3.8744e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3250: [tensor([-1.2983e-04, -1.7877e-05, -1.1790e-06, -6.3942e-06, -3.4575e-06,
         3.4130e-07, -3.0422e-06, -2.2524e-04, -7.9401e-06, -1.0049e-04,
        -7.1481e-06,  2.7602e-06, -6.3779e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5539e-04, -2.6694e-04,  2.1530e-06, -3.4363e-07,  6.6773e-06,
         1.2204e-05, -9.5650e-07, -4.1666e-04, -1.0710e-05, -2.1908e-05,
        -7.3679e-05, -5.3218e-05, -3.7375e-05, -2.1396e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3341e-04,  1.3269e-04, -4.9512e-07,  7.3661e-06,  3.0794e-06,
         4.9931e-06,  5.1801e-07,  2.6575e-07,  8.1226e-07, -3.8690e-06,
         2.4163e-08, -3.8304e-04,  3.9088e-06, -4.9003e-06,  3.0966e-06,
        -5.4003e-07, -9.1513e-05,  2.0913e-05, -4.6475e-05, -7.7214e-05,
         1.1466e-05, -5.6220e-05,  1.3942e-05, -3.7069e-06, -1.1258e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5845e-05,  4.2101e-05,  3.8265e-07,  4.0579e-06,  5.4864e-06,
         2.2550e-06, -1.4011e-07,  2.5630e-07,  3.8107e-07, -9.1338e-08,
        -9.8314e-07, -8.3175e-04,  1.2592e-06, -1.9236e-06,  1.7010e-06,
        -4.2421e-07, -4.9399e-05,  1.6239e-05, -2.7335e-06, -4.3073e-05,
        -2.7806e-06, -2.8127e-05, -2.2310e-05, -1.8783e-06, -5.6638e-06,
        -3.0417e-05,  2.0946e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9293e-05, -4.0232e-04,  8.5187e-07,  9.1934e-06,  1.8204e-05,
         4.1095e-06,  7.0080e-07,  1.7532e-06,  9.2606e-07,  5.5468e-07,
        -2.7538e-07, -5.5348e-04,  3.0743e-06, -2.1007e-06,  4.6956e-06,
         1.6568e-06, -7.5722e-05,  1.0616e-05, -4.4794e-07,  1.0849e-05,
        -5.1461e-05, -1.5542e-05,  6.7687e-06, -3.9526e-05, -1.5517e-06,
        -1.7843e-05,  1.4365e-05, -2.2817e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0085e-05, -2.3337e-06, -1.1528e-06, -4.4018e-06, -1.0283e-05,
        -1.3423e-06, -4.8951e-06, -2.1328e-06, -1.8109e-06, -2.1135e-06,
        -1.6552e-06, -4.8057e-05, -5.0166e-06, -4.1950e-05,  1.3020e-05,
         4.4926e-06, -1.1152e-04,  3.5083e-06, -6.4526e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3189e-04, -2.7071e-06, -4.6035e-07, -2.9262e-06, -6.9855e-06,
        -5.8589e-07, -6.9785e-06, -2.4204e-05, -2.1948e-06,  8.0781e-07,
        -2.0452e-06, -9.2093e-05, -7.3492e-06, -7.2140e-05, -1.6955e-04,
        -6.5585e-06, -1.2409e-04, -9.8805e-05, -8.8409e-06, -5.5561e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8844e-05, -1.2629e-06, -1.0993e-06, -2.5303e-06, -6.1329e-06,
        -9.6449e-07, -5.0428e-06, -1.6709e-05, -1.9914e-06, -1.7843e-06,
        -1.5334e-06, -3.1217e-05, -3.7029e-06, -3.3810e-05, -1.9740e-06,
        -4.5265e-05, -3.5413e-06,  5.3553e-06, -9.0199e-05, -1.3902e-06,
        -4.1572e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1883e-04,  8.5739e-07,  6.0293e-06, -3.5058e-07,  6.3255e-06,
        -1.8336e-06,  5.9583e-06,  7.5037e-07, -1.1690e-05, -5.2002e-06,
        -4.3201e-06, -2.1852e-04, -2.3132e-04, -5.6600e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0281,  0.0053,  0.0114,  0.0036,  0.0010,  0.0015,  0.0110,  0.0020,
        -0.0026,  0.1017, -0.0160,  0.1494, -0.0636,  0.0799,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1425e-04, -6.4246e-05, -7.2720e-05,  2.3247e-05,  9.3545e-05,
        -1.5535e-05, -1.7674e-04,  4.8293e-06,  2.8764e-04, -4.2372e-03,
         9.0627e-04, -1.1480e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7380e-04, -2.0010e-05, -4.9047e-06,  8.4875e-06,  9.2165e-06,
         2.1840e-05, -9.2576e-06, -5.2385e-05, -8.0900e-06, -6.2955e-04,
        -4.9894e-06, -4.1327e-05, -1.5578e-03, -3.2406e-05,  1.5694e-05,
        -6.3608e-06, -5.3280e-05,  3.5852e-04, -2.6685e-03, -9.0861e-05,
        -8.1364e-05, -8.4868e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3300: [tensor([-5.0406e-04, -9.4222e-04, -1.9522e-05, -9.7896e-06, -4.9320e-06,
        -6.1319e-06, -3.2978e-06, -2.3973e-05, -1.9704e-05,  4.3090e-06,
        -2.7064e-05, -3.9492e-06, -1.6583e-05, -4.0193e-05, -3.9502e-06,
        -2.5606e-05, -1.9059e-05, -7.1702e-06, -5.6529e-07,  3.5179e-06,
        -2.4072e-07, -7.9671e-04,  1.2645e-04, -9.4719e-05, -1.7570e-04,
        -7.9824e-05,  5.2702e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8320e-04, -3.3550e-05, -1.1491e-05, -8.7611e-06, -3.7834e-06,
        -2.4983e-05, -3.6398e-05, -6.7833e-06, -4.9946e-07, -2.1313e-05,
        -2.5851e-06,  3.7308e-06, -4.6848e-06, -4.2946e-04, -2.2127e-05,
        -2.1199e-05, -5.7806e-06, -2.2811e-04, -4.0174e-04, -4.9504e-05,
        -5.7518e-06, -1.1528e-04,  1.5515e-04, -7.5151e-06, -1.2072e-05,
        -6.3313e-05,  2.6591e-04, -1.3277e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3428e-04, -7.7131e-05, -8.0633e-06, -1.3206e-05, -7.9496e-06,
        -2.5279e-05, -8.6623e-05, -1.0284e-05, -3.0407e-06, -2.6757e-05,
        -4.8499e-06, -5.5283e-06, -7.6649e-06, -3.5526e-04, -2.0602e-05,
        -2.6603e-05, -5.9006e-04,  6.2935e-05, -8.7254e-04, -7.9756e-06,
        -8.7218e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5165e-04, -1.1737e-04, -1.7215e-05, -1.9462e-05, -4.3122e-06,
        -8.3308e-05, -7.4068e-05, -1.2083e-05, -8.1149e-06, -3.0195e-05,
        -1.0538e-05,  1.6399e-06, -1.2803e-05, -1.0327e-03, -4.5191e-05,
        -6.3681e-05, -3.5516e-05, -1.9138e-05, -1.1414e-03, -1.1725e-05,
        -1.3900e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6949e-04, -5.1881e-05, -3.7158e-06, -1.0738e-05, -2.8366e-05,
        -3.0893e-05, -6.0352e-06, -6.9681e-06, -2.3404e-06, -7.0430e-06,
        -5.6486e-06, -4.2440e-06, -1.6738e-06, -7.9699e-06,  2.5526e-07,
        -2.1386e-05, -2.3947e-05, -6.7055e-06, -2.1050e-06, -1.5019e-06,
        -1.3455e-05,  5.0436e-06,  4.7960e-06, -3.2612e-06, -2.0600e-06,
        -7.9088e-06,  4.8789e-07,  1.2520e-06, -1.6273e-06, -5.5703e-04,
        -9.4777e-05, -8.8966e-05, -1.5011e-05, -7.0769e-05, -3.2297e-05,
        -2.5300e-05, -1.6789e-04, -5.1394e-04, -1.1207e-04, -1.1354e-05,
        -3.8182e-04, -4.9204e-04, -4.0248e-05, -1.6976e-05, -1.1209e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3018e-04, -6.0263e-05, -9.9835e-06, -8.9555e-06, -1.6218e-05,
        -7.0941e-05, -3.7178e-06, -1.5919e-05, -1.6898e-06, -9.0776e-06,
        -3.1984e-06, -3.2579e-06,  2.4566e-06, -8.8745e-06, -8.7590e-07,
        -2.5562e-05, -3.4621e-05, -2.7397e-06, -1.7387e-06, -1.8469e-06,
        -1.5692e-05,  3.5363e-06,  7.2180e-06, -3.5469e-06, -3.8759e-06,
        -7.7774e-06,  5.7563e-06,  3.5314e-06, -1.6931e-06, -1.0359e-03,
        -4.7905e-05, -1.9125e-04, -1.6174e-05, -5.1322e-05, -3.0481e-05,
        -4.0908e-04, -1.5411e-05, -1.2183e-05, -3.8256e-04, -6.6073e-05,
        -1.0919e-04, -3.9087e-06, -4.3626e-04, -6.5397e-04, -8.6829e-06,
        -6.3456e-05, -1.7113e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1928e-04, -2.0678e-05, -5.6741e-06, -5.1027e-06, -1.1240e-05,
        -2.5555e-05, -1.7740e-06, -4.4355e-06, -1.3797e-06, -4.7432e-06,
        -2.5454e-06, -6.9265e-07,  3.5927e-07, -3.6916e-06, -1.3109e-06,
        -9.7821e-06, -1.6665e-05, -2.9163e-06, -2.1796e-06, -1.7262e-06,
        -7.0461e-06,  6.9821e-07,  2.1376e-06, -1.4128e-06, -9.1186e-07,
        -1.5190e-06,  1.0330e-06,  1.1132e-06, -3.1868e-07, -2.3038e-04,
        -8.4640e-06, -1.6994e-04, -4.5816e-06, -2.2456e-04, -5.8118e-06,
        -3.9951e-05, -1.4034e-04, -1.0030e-06, -2.9581e-05, -3.3841e-04,
        -1.0862e-04, -3.7596e-05, -9.9611e-05, -5.4576e-06, -1.6091e-06,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9880e-04, -6.6819e-05, -1.0353e-05, -9.2229e-06, -7.0483e-06,
        -1.8184e-05, -4.2388e-05, -2.7090e-06, -5.0676e-06, -1.3477e-06,
        -9.1854e-06, -5.3233e-06,  1.7056e-06, -5.8013e-06,  1.6701e-06,
        -3.2174e-06, -6.0738e-06, -2.2621e-06, -7.4986e-06,  1.4898e-07,
         1.7691e-06, -1.3412e-05, -4.2754e-07,  1.0258e-06, -4.1397e-07,
        -5.1714e-04,  3.2260e-05, -7.6573e-05, -2.6440e-04, -4.7890e-06,
        -1.1956e-05, -4.9114e-04, -1.0857e-05, -5.9890e-06, -1.4062e-05,
         7.8307e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2367e-04, -1.5454e-04, -1.1209e-05, -1.3727e-05, -1.0966e-05,
        -2.9943e-05, -6.8146e-05, -5.0256e-06, -1.9179e-06, -3.5219e-06,
        -1.2017e-05, -5.4872e-06,  3.5987e-06, -7.7488e-06,  2.4101e-06,
        -2.8694e-06, -6.7736e-06, -3.6909e-06, -1.0506e-05,  5.1993e-07,
         4.0473e-06,  1.0701e-05,  1.4778e-06, -5.5133e-07,  2.8223e-06,
        -1.5126e-03,  1.6680e-05, -1.6152e-05, -4.3834e-04, -5.0199e-05,
        -2.2466e-05, -1.3072e-04, -1.1894e-05, -4.7876e-05, -1.2364e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7793e-04, -1.3207e-04, -1.6791e-05, -9.4400e-06, -1.1234e-05,
        -3.7651e-05, -7.0579e-05, -1.1217e-06, -7.2678e-06, -2.7914e-06,
        -8.4284e-06, -7.6332e-06,  1.2276e-06, -7.2876e-06,  3.0620e-06,
        -3.5468e-06, -9.1398e-06, -1.8844e-06, -6.3050e-06, -1.8455e-06,
         7.0100e-06, -2.8811e-05,  1.3784e-06, -8.7074e-07, -2.1658e-06,
        -5.2162e-04,  6.9821e-05, -9.0028e-05, -2.9735e-04,  9.1364e-07,
        -1.2841e-05, -5.9238e-04, -3.1193e-05, -5.8761e-05, -1.3937e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3667e-03, -3.7430e-04, -1.0868e-05, -1.5572e-04, -1.9529e-05,
        -3.4538e-05,  6.6922e-07, -3.0032e-04, -4.5856e-06, -5.7629e-05,
        -4.2448e-05, -3.3748e-05, -3.5178e-05, -4.0127e-05, -3.5013e-05,
        -4.3550e-05, -3.4128e-04, -1.3904e-05, -1.7175e-05, -4.3902e-05,
         2.7141e-06,  2.5210e-05, -2.6973e-06, -4.1318e-03, -3.9658e-05,
        -3.7518e-03,  6.2062e-04, -4.1263e-05,  1.4614e-04, -9.4155e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5461e-04, -1.0156e-04, -5.8317e-06, -5.6128e-05, -8.2335e-06,
        -1.7304e-05, -2.6715e-06, -1.0406e-04, -4.1444e-07, -1.7515e-05,
        -1.4837e-05, -1.4763e-05, -1.3714e-05, -9.0824e-06, -1.5242e-05,
        -1.9855e-05, -1.2058e-04, -1.1084e-05, -1.1732e-05, -5.8000e-06,
        -9.0260e-06, -3.8678e-06,  2.2065e-06, -6.8067e-04,  3.2303e-05,
        -6.0055e-04, -1.1944e-05, -7.7623e-06, -5.0099e-04, -2.2825e-04,
        -5.7531e-05, -1.8693e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3350: [tensor([ 1.3019e-02,  3.8458e-04,  1.6397e-04, -2.3665e-05,  1.3867e-04,
         3.8384e-04,  1.4227e-04,  1.8449e-03,  1.3588e-04, -7.3828e-05,
         9.0311e-05,  5.2981e-02,  4.5688e-04,  4.3361e-04,  1.5003e-04,
         6.0000e-04,  4.8558e-04,  3.6258e-04,  5.6676e-04,  2.0415e-03,
         4.4486e-04,  1.5578e-04,  2.3049e-04,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0423e-03,  1.0024e-04,  1.1867e-05,  5.7971e-07,  5.4512e-05,
         1.0734e-04,  8.5667e-05,  6.4085e-04,  7.1236e-05, -2.9304e-06,
         3.4577e-05,  1.6963e-02,  1.7175e-04,  1.4986e-04,  3.3758e-04,
         1.0736e-04,  1.8506e-04,  1.7236e-04,  1.0317e-04,  4.8373e-05,
         8.4605e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8755e-03, -1.1276e-02, -3.7964e-04, -6.6816e-03, -6.6960e-03,
         4.3284e-04, -1.5086e-03, -1.0837e-03, -6.0904e-04, -2.3366e-04,
        -3.5918e-04, -2.6180e-01, -2.0135e-03, -2.3664e-03, -1.1309e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.4941,  0.1516,  0.0114,  0.0547,  0.0770, -0.0069,  0.0231,  0.0146,
         0.0067, -0.0051,  0.0054,  2.9866,  0.0042,  0.0344,  0.0259,  0.0198,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4590e-07, -1.8846e-06, -7.2650e-08, -9.2112e-07, -5.2239e-07,
         2.4583e-08, -8.7180e-08, -9.9328e-08, -2.3126e-08, -1.1131e-08,
        -3.8246e-08, -4.0587e-07, -7.1293e-06, -2.5884e-07, -1.1435e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4425e-07, -4.9124e-08, -7.0760e-08, -9.2366e-08, -3.3294e-08,
        -4.4366e-08, -2.6273e-08, -2.8833e-08, -1.1754e-08, -1.1243e-07,
        -1.1444e-07, -3.1943e-07, -1.7027e-06, -8.0052e-08, -5.4500e-08,
        -3.6581e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2897e-07, -9.2541e-08, -8.8956e-08, -1.0242e-07, -6.9566e-08,
        -1.0085e-07, -4.3993e-08, -1.0618e-08, -4.9198e-08, -5.0972e-07,
        -2.0615e-07, -5.2588e-06, -1.6432e-07, -2.0577e-07, -6.7447e-08,
        -8.3509e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3399e-07, -1.5325e-07, -1.9190e-07, -1.9948e-07, -1.5176e-07,
        -1.0358e-07, -1.2154e-07, -4.4708e-08, -6.2202e-08, -1.6816e-07,
        -1.4568e-07, -7.3546e-06, -2.4564e-07, -8.4297e-07, -1.4585e-07,
        -2.1562e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.0057e-06, 2.2671e-06, 3.9508e-07, 1.9850e-06, 1.6266e-07, 2.1424e-06,
        2.7465e-07, 2.5534e-07, 2.0238e-06, 2.2191e-07, 2.7287e-08, 2.1251e-07,
        5.9790e-05, 1.1244e-06, 6.0537e-07, 6.3076e-07, 4.4734e-07, 9.5072e-07,
        7.1753e-07, 6.6662e-05, 3.9424e-07, 1.8937e-07, 1.9565e-07, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0329e-07, -5.5542e-07, -5.6594e-08, -3.0182e-07, -4.5203e-08,
        -5.4702e-07, -6.2124e-08, -2.9752e-08, -2.6089e-07, -1.5366e-08,
        -3.5629e-09, -2.1496e-08, -1.8903e-05, -1.2304e-07, -4.5532e-07,
        -4.3950e-08, -1.3568e-06, -1.2686e-07, -1.1724e-07, -4.5313e-08,
        -2.3236e-08, -5.2542e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3984e-06,  1.8215e-06,  1.8914e-07,  1.4765e-06,  1.6619e-07,
         1.0603e-06,  2.1280e-07,  4.0601e-08,  1.2258e-06,  1.1810e-07,
        -4.4666e-08,  1.5903e-07,  2.6715e-05,  5.3693e-07,  2.2036e-07,
         1.7111e-06,  9.7097e-07,  5.8235e-07,  3.6275e-07,  2.2670e-07,
         4.7411e-05,  1.1747e-07,  8.6005e-09,  5.7808e-08,  1.6109e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([4.1597e-04, 1.0489e-04, 9.1516e-06, 2.7978e-05, 8.5798e-06, 1.2805e-05,
        4.5711e-05, 1.2368e-05, 7.5032e-06, 3.8138e-05, 1.0188e-05, 1.3155e-05,
        5.4960e-06, 7.7888e-06, 4.4584e-05, 2.3819e-05, 6.6033e-05, 2.3393e-03,
        1.8593e-05, 2.1603e-05, 1.1469e-05, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3400: [tensor([ 2.9635e-07, -1.1906e-06, -9.4493e-08, -6.6853e-07, -1.0083e-07,
        -1.6407e-08, -6.4338e-08, -9.6226e-07, -8.0574e-08, -5.6491e-08,
        -4.9372e-08, -7.6686e-08, -7.8047e-08, -2.8639e-08, -2.7727e-08,
        -3.0546e-06, -1.1200e-07, -6.3607e-06, -1.0609e-07, -1.9058e-07,
        -5.9248e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0114e-07, -5.6939e-07, -5.8415e-08, -2.7253e-07, -7.0600e-08,
        -1.5466e-08, -2.2863e-08, -6.9859e-07, -3.8017e-08, -4.2256e-08,
        -2.4806e-08, -5.2898e-08, -6.5699e-08, -2.1961e-08, -5.1930e-09,
        -5.6516e-06, -7.3834e-08, -1.5859e-07, -7.5905e-08, -2.1839e-06,
        -8.9631e-08, -3.8254e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2251e-06, -3.9861e-08, -4.5628e-07, -5.9296e-08, -1.2670e-07,
        -4.4255e-08, -5.2211e-08, -5.4294e-08, -5.2566e-08, -6.7045e-08,
        -1.6393e-08, -1.8947e-08, -6.3283e-08, -9.6367e-07, -2.7234e-08,
        -4.9124e-08, -3.8868e-09, -5.3776e-08, -5.2538e-07, -7.7908e-08,
        -9.3380e-07, -1.3536e-06, -2.3710e-08, -9.2523e-08, -3.8046e-06,
        -1.3734e-06, -7.4350e-08, -2.8343e-07, -8.5977e-08, -8.2954e-08,
        -1.1222e-07, -3.4106e-08, -5.9172e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3836e-07, -2.1358e-08, -2.4710e-07, -1.8875e-08, -7.5154e-08,
        -1.0596e-08, -2.5965e-08, -1.9292e-08, -1.3194e-08, -4.4532e-08,
        -1.1644e-08, -1.5683e-08, -3.8554e-08, -1.8622e-06, -8.3307e-09,
        -2.6064e-08, -9.2738e-09, -1.5577e-08, -4.0191e-07, -3.8500e-08,
        -4.1933e-07, -5.7546e-07, -3.5772e-08, -5.3615e-08, -2.3132e-08,
        -3.2848e-07, -1.4508e-08, -1.0049e-07, -2.0183e-06, -2.7115e-08,
        -4.7597e-08, -3.4937e-08, -2.3259e-07, -4.0768e-08, -3.7929e-08,
        -4.3519e-09, -9.5461e-08, -3.2456e-08, -1.5405e-08, -1.6491e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4729e-07, -3.3309e-08, -1.8926e-07, -3.6072e-08, -7.2133e-08,
        -2.1332e-08, -2.0490e-08, -3.4716e-08, -2.6251e-08, -3.4054e-08,
        -2.7749e-08, -1.5845e-08, -3.2308e-08, -5.6963e-07, -1.5859e-08,
        -3.0735e-08, -8.6477e-09, -2.2034e-08, -2.7545e-07, -4.4338e-08,
        -5.2164e-08, -3.8424e-07, -6.0245e-08, -1.0192e-07, -1.5457e-06,
        -6.2704e-08, -3.2605e-06, -6.6961e-08, -1.0762e-07, -2.2616e-08,
        -4.1652e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5111e-07, -7.4674e-08, -5.6714e-08, -4.3908e-07, -3.7951e-08,
        -9.9200e-07, -2.4287e-08, -4.5118e-08, -3.0612e-07, -2.0874e-07,
        -1.2972e-08, -8.9834e-08, -7.0402e-08, -1.4377e-07, -3.3464e-07,
        -9.3035e-08, -2.4526e-07, -7.9176e-08, -7.0189e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9937e-07, -2.6547e-08, -3.2635e-08, -1.6908e-07, -3.1857e-08,
        -3.7532e-07, -1.6260e-08, -3.0088e-08, -1.5017e-07, -1.1010e-07,
        -3.1023e-09, -1.0546e-08, -5.6035e-08, -6.0032e-08, -9.1790e-08,
        -9.7707e-08, -1.5100e-06, -1.4088e-08, -2.6857e-08, -8.8689e-08,
        -7.3369e-08, -1.4306e-08, -2.7692e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5146e-07, -3.4915e-08, -3.1575e-08, -1.4365e-07, -2.3625e-08,
        -5.1382e-07, -2.3702e-08, -3.1465e-08, -1.4670e-07, -1.0224e-07,
        -3.2088e-09, -4.0928e-08, -6.0498e-08, -8.4968e-08, -1.2169e-07,
        -6.3005e-08, -8.5556e-08, -1.5101e-06, -9.2713e-08, -1.2117e-07,
        -3.6974e-06, -8.7421e-08, -3.2309e-08, -3.7138e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5728e-07, -1.6713e-06, -5.8006e-08, -4.3693e-08, -7.2561e-07,
        -2.6902e-08, -4.1161e-08, -6.1396e-08, -3.2943e-08, -3.6549e-07,
        -9.5663e-08, -5.5114e-08, -2.6443e-08, -5.0343e-08, -1.5626e-06,
        -1.2140e-07, -4.2686e-07, -3.5192e-07, -4.7453e-08, -1.0257e-07,
        -5.4729e-08, -8.4063e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5575e-07, -6.4334e-07, -3.5811e-08, -2.3682e-08, -2.3949e-07,
        -6.0495e-09, -2.9589e-08, -3.6548e-08, -1.7008e-08, -2.0891e-07,
        -4.3901e-08, -2.5256e-08, -1.0543e-08, -2.4625e-08, -8.6094e-07,
        -6.5193e-08, -3.9974e-08, -8.2267e-08, -6.7150e-08, -3.1554e-06,
        -4.2193e-08, -3.7757e-07, -2.7988e-07, -2.8810e-08, -5.0985e-09,
        -2.1989e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1037e-07, -4.8043e-07, -2.9604e-08, -2.4475e-08, -2.0944e-07,
        -1.6638e-08, -1.9013e-08, -2.2973e-08, -1.0702e-08, -2.5199e-07,
        -3.6179e-08, -1.8936e-08, -6.5734e-09, -1.4609e-08, -3.8331e-07,
        -5.7530e-08, -2.6374e-08, -1.2421e-07, -4.9745e-08, -5.8866e-08,
        -4.5974e-07, -3.3044e-07,  1.8123e-09, -9.8571e-09, -1.5383e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0835e-07, -2.7777e-08, -5.4592e-08, -2.0240e-07, -1.5755e-08,
        -3.0043e-07, -3.8851e-08, -2.6289e-08, -3.7478e-07, -1.5965e-08,
        -2.7410e-09, -2.7562e-08, -2.8296e-07, -6.5862e-08, -6.4724e-08,
        -4.1995e-08, -3.9667e-08, -5.4722e-08, -4.4567e-08, -9.6149e-08,
        -1.1145e-07, -8.2424e-08, -1.8783e-07, -6.8162e-08, -8.0158e-08,
        -2.8758e-08, -2.4679e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3450: [tensor([ 2.0600e-07, -7.2903e-07,  2.7999e-09, -1.1916e-07, -2.6251e-08,
        -1.8298e-08, -4.6288e-08, -5.4425e-07, -2.0727e-08, -1.9194e-08,
        -5.2371e-09, -3.0163e-08, -6.1914e-07, -6.6746e-08, -6.4701e-08,
        -3.0298e-08, -3.9293e-07, -6.9504e-08, -3.0764e-08, -3.6040e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9206e-07, -3.4993e-07, -1.0281e-09, -6.5708e-08, -1.5346e-08,
        -1.2619e-08, -3.1770e-08, -4.5102e-07, -1.6215e-08, -3.7552e-08,
        -3.6876e-09, -1.4444e-08, -1.9153e-07, -1.1634e-08, -3.1553e-08,
        -3.4634e-08, -2.2587e-08, -5.7226e-08, -5.1559e-08, -3.6143e-08,
        -3.5779e-08, -1.4660e-08, -2.9822e-09,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5157e-07, -6.0393e-08, -8.9838e-08, -7.0247e-08, -8.2138e-07,
        -4.6992e-08, -5.6746e-08, -1.0357e-08, -2.2413e-08, -1.3431e-07,
        -1.5193e-07, -2.0459e-07, -1.2760e-07, -1.1312e-05, -7.3943e-08,
        -1.3762e-07, -7.6272e-07, -1.2351e-07, -1.1243e-07, -4.3798e-08,
        -5.3223e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0226e-07, -7.0085e-08, -8.4678e-08, -6.8635e-08, -4.8048e-07,
        -3.1170e-08, -5.3686e-08, -3.8282e-09, -1.9085e-08, -1.7482e-07,
        -1.7203e-07, -1.3176e-07, -2.7912e-08, -3.5459e-08, -3.1712e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4128e-07, -7.4460e-08, -9.1879e-08, -1.1385e-07, -4.9856e-07,
        -3.8958e-08, -3.9427e-08, -8.5229e-09, -2.4094e-08, -1.6697e-07,
        -6.4108e-08, -1.3973e-06, -2.0649e-07, -2.2189e-07, -2.7874e-07,
        -6.2199e-08, -5.4301e-08, -5.8809e-08, -5.5151e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5937e-07, -3.6854e-07, -2.2381e-08, -3.8464e-08, -2.9488e-07,
        -2.5454e-08, -2.9297e-08, -8.1709e-08, -1.6646e-07, -2.5988e-09,
        -2.3493e-08, -5.4881e-07, -8.2025e-07, -3.3759e-08, -3.1567e-08,
        -1.4929e-08, -3.8010e-08, -9.4517e-08, -2.5803e-08, -1.3204e-06,
        -7.9411e-09, -2.4629e-07, -2.4877e-09, -3.7507e-08, -2.8111e-08,
         1.1532e-09,  1.5722e-09, -2.0441e-08, -9.1893e-07, -3.8201e-08,
        -1.6733e-08, -9.1270e-07, -2.5895e-06, -1.8090e-08, -3.8937e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8078e-08, -7.9337e-07, -2.6360e-08, -5.1146e-08, -3.1167e-07,
        -2.7238e-08, -3.4941e-08, -4.6411e-08, -1.3480e-07, -8.3900e-09,
        -2.6747e-08, -3.8018e-07, -1.1104e-06, -3.4499e-08, -3.4278e-08,
        -9.4297e-09, -3.4956e-08, -8.1986e-08, -3.0705e-08, -1.0281e-06,
        -1.3901e-08, -1.7315e-07, -4.6937e-09, -1.6909e-08, -3.2752e-08,
        -9.3149e-09, -2.2520e-08, -2.7519e-08, -1.1740e-06, -4.0710e-08,
        -7.5800e-08, -2.8646e-08, -3.2567e-08,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7665e-07, -6.9446e-07, -1.6899e-08, -2.7995e-08, -2.3720e-07,
        -1.5445e-08, -2.9173e-08, -3.9206e-08, -1.0704e-07, -6.6159e-09,
        -2.4871e-08, -4.7582e-07, -5.7366e-07, -3.5733e-08, -3.1742e-08,
        -1.6826e-08, -2.2939e-08, -9.3268e-08, -2.9923e-08, -5.1851e-07,
        -1.8103e-08, -2.3938e-07, -7.3702e-10, -1.8790e-08, -2.7900e-08,
        -6.1615e-09, -1.2010e-08, -2.6951e-08, -1.0763e-06, -3.2508e-08,
        -2.1880e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8829e-06, -1.7883e-07, -8.2427e-07, -2.7187e-06, -6.9524e-08,
        -2.2991e-07, -1.8381e-07, -1.3703e-06, -1.4934e-07, -8.1385e-07,
        -5.5986e-07, -3.5167e-07, -1.2759e-07, -1.4336e-07, -7.4962e-09,
        -1.1905e-07, -3.8246e-08, -3.9288e-07, -1.1805e-07, -5.0549e-08,
        -5.6169e-07, -1.8331e-06, -3.3055e-05, -1.5220e-07, -1.7467e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1657e-08, -4.2451e-08, -3.3575e-07, -1.1512e-06, -2.3589e-08,
        -3.6593e-08, -3.8766e-08, -4.4275e-07, -2.6666e-08, -6.8394e-07,
        -1.7651e-07, -9.3348e-08, -1.4252e-08, -5.8743e-08, -1.1527e-08,
        -2.1003e-08, -1.2978e-08, -8.8989e-08, -1.7325e-08, -1.1682e-07,
        -8.1898e-07, -2.3249e-06, -4.5036e-09, -2.9461e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3962e-06, -1.0731e-07, -4.8311e-07, -8.7529e-07, -3.5179e-08,
        -1.4530e-07, -8.2871e-08, -7.1238e-07, -1.3706e-07, -4.8522e-07,
        -4.2340e-07, -1.9136e-07, -8.1216e-08, -1.4703e-07, -3.4945e-08,
        -1.3735e-08, -3.2315e-08, -2.6049e-07, -3.6279e-08, -5.2109e-08,
        -3.5981e-07, -6.7172e-07, -2.2628e-05, -1.0424e-07, -8.4659e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3721e-06, -2.2823e-07, -6.3576e-07, -1.7716e-06, -5.8937e-08,
        -1.9450e-07, -1.5490e-07, -1.3214e-06, -1.3323e-07, -7.9846e-07,
        -6.4636e-07, -2.6454e-07, -1.3342e-07, -1.3743e-07, -4.2602e-08,
        -8.3426e-08, -2.3488e-08, -3.7827e-07, -1.1209e-07, -4.0455e-07,
        -1.3526e-06, -7.8847e-07, -3.2411e-05, -1.8769e-07, -1.5372e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3500: [tensor([-5.3320e-07, -5.0562e-08, -8.8121e-08, -1.4640e-07, -4.2102e-08,
        -3.3272e-08, -4.5988e-08,  3.2948e-09, -4.3999e-08, -5.7368e-08,
        -4.2400e-08, -6.7846e-08, -6.8212e-07, -7.7636e-08, -2.6991e-08,
        -3.3330e-08, -1.5656e-07, -1.9971e-07, -8.9311e-08, -5.4669e-07,
        -9.0880e-08, -7.8830e-08, -5.9675e-08, -4.0109e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0895e-06, -1.2924e-07, -1.1358e-07, -2.8392e-07, -9.1745e-08,
        -1.0116e-07, -9.8859e-08, -1.8128e-08, -8.3095e-08, -8.8861e-08,
        -1.0922e-07, -1.0233e-07, -1.8994e-06, -1.2304e-07, -4.8685e-08,
        -7.8649e-08, -2.7825e-07, -2.4048e-07, -1.8948e-07, -4.5160e-07,
        -9.1801e-08, -1.7969e-07, -2.2415e-05, -1.0598e-07, -5.1554e-08,
        -1.2419e-07,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6980e-07, -5.0114e-08, -7.9446e-08, -1.2317e-07, -4.2855e-08,
        -3.3428e-08, -4.7651e-08, -1.3003e-08, -3.4383e-08, -4.8129e-08,
        -4.6839e-08, -5.0112e-08, -1.0589e-06, -6.4698e-08, -2.0179e-08,
        -2.8231e-08, -1.1405e-07, -1.8012e-06, -1.7483e-07, -1.1245e-07,
        -6.5245e-08, -4.3783e-08, -6.0431e-08, -3.9234e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4444e-08, -2.5458e-08, -2.1908e-08,  5.6870e-09, -3.2830e-08,
         5.1419e-09, -3.5644e-08, -4.2704e-08, -2.2654e-07, -6.5715e-08,
        -2.7468e-08, -4.9643e-06, -4.0481e-08, -1.4933e-08, -1.7105e-08,
        -5.4570e-08, -6.6038e-08, -5.2151e-08, -9.0704e-08, -5.7451e-08,
        -1.8594e-08, -3.1746e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6247e-07, -9.1042e-09, -7.0681e-09, -1.1648e-08, -1.0369e-08,
        -4.7308e-09, -1.9344e-08, -2.3004e-08, -8.3650e-08, -2.7915e-08,
        -1.2665e-08, -2.2176e-06, -1.5177e-08, -1.0879e-08, -1.0538e-08,
        -3.1683e-08, -1.7959e-08, -5.2474e-08, -3.6740e-08, -3.0694e-08,
        -2.5763e-08, -1.1221e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1010e-07, -3.8819e-08, -8.2933e-09, -1.2162e-08, -3.0204e-08,
        -8.9231e-09, -2.4911e-08, -3.1426e-08, -1.1267e-07, -4.8372e-08,
        -2.4652e-08, -4.3084e-06, -5.3453e-08, -1.2030e-08, -9.1847e-09,
        -5.8722e-08, -2.9248e-08, -7.7169e-08, -3.5754e-07, -6.2453e-08,
        -1.0213e-07, -8.7877e-08, -1.3962e-07, -6.9977e-08, -4.3208e-07,
        -1.1311e-07, -3.0665e-08, -3.9609e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3677e-07, -6.9249e-07, -3.1211e-08, -9.6323e-08, -4.9191e-08,
        -1.0779e-07, -3.9365e-09, -5.0646e-07, -2.2788e-08, -3.9645e-08,
        -2.2159e-08, -6.3631e-08, -1.4189e-08, -2.3010e-08, -3.0421e-08,
        -2.0498e-07, -4.9714e-08, -2.1867e-06, -2.5989e-07, -5.9526e-08,
        -2.1376e-08, -2.4267e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3095e-08, -7.8987e-07, -1.2863e-08, -4.4551e-08, -2.6002e-08,
        -6.4506e-08, -2.2008e-09, -3.4049e-07, -1.9977e-08, -3.0527e-08,
        -2.3430e-08, -2.8648e-08, -1.2385e-08, -1.6625e-08, -6.5962e-07,
        -3.4257e-08, -5.7032e-08, -4.7693e-08, -2.4817e-07, -5.9931e-11,
        -3.3362e-07, -2.6042e-08, -5.6204e-10, -1.2854e-08, -2.3045e-08,
        -2.0611e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8281e-07, -5.4986e-07, -2.2452e-08, -5.4409e-08, -4.5611e-08,
        -5.4703e-08,  1.3543e-09, -3.1711e-07, -2.1445e-08, -2.7983e-08,
        -1.4883e-08, -4.0761e-08, -4.6436e-09, -2.0764e-08, -4.0170e-08,
        -2.6679e-07, -3.4496e-08, -9.5110e-07, -9.4753e-07, -3.7832e-08,
        -2.6378e-08, -1.4176e-08, -3.3896e-08, -1.9434e-08, -1.6265e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0945e-07, -7.3659e-07, -2.0479e-08, -4.1426e-08, -5.3142e-08,
        -5.2628e-07, -3.4903e-08, -4.2043e-08, -1.1160e-08, -8.0490e-09,
        -7.6579e-07, -7.3240e-08, -1.0614e-06, -9.2843e-08, -3.2179e-07,
        -1.4081e-07, -2.6480e-08, -6.2722e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0494e-06, -1.5501e-06, -4.1295e-08, -6.8547e-08, -7.3901e-08,
        -6.1672e-07, -5.2971e-08, -7.2231e-08, -1.6718e-08, -4.6604e-08,
        -1.2414e-06, -8.5097e-08, -1.0110e-06, -1.4733e-07, -1.4954e-06,
        -7.2442e-08, -3.9662e-08, -8.3835e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0020e-07, -6.8758e-07, -4.0543e-08, -5.8920e-08, -5.8241e-08,
        -4.9083e-07, -5.8435e-08, -7.5101e-08, -1.1339e-08, -1.3147e-08,
        -4.0017e-07, -7.9500e-08, -7.6428e-07, -1.3101e-07, -4.2834e-07,
        -5.9071e-08, -1.0417e-07, -8.5228e-06, -9.3436e-08, -2.8656e-08,
        -6.3940e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3550: [tensor([-2.0451e-08, -1.8307e-07, -3.1859e-08, -2.3813e-07, -1.2444e-07,
        -5.3079e-08, -2.8938e-08, -5.3071e-08, -6.2144e-08, -1.6491e-08,
        -1.4464e-08, -5.3791e-07, -1.1134e-08, -1.1026e-08, -1.5793e-08,
        -6.0490e-07, -6.1526e-08, -2.8589e-06, -3.3954e-08, -7.1066e-08,
        -2.6940e-06, -3.6196e-08, -4.8000e-08, -2.8033e-08, -3.5902e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3771e-07, -2.1242e-07, -1.2825e-08, -2.3115e-07, -1.3797e-07,
        -3.0572e-08, -2.0230e-08, -3.6129e-08, -4.1942e-08, -2.5475e-08,
        -1.3261e-08, -3.9417e-07, -1.7745e-08, -6.0041e-09, -1.1720e-08,
        -5.5261e-07, -3.6348e-08, -2.0858e-06, -3.8577e-08, -8.5512e-08,
        -5.2679e-08, -3.9050e-08, -2.0814e-07, -5.6699e-08, -1.1324e-08,
        -3.9460e-08, -2.4985e-08, -1.6583e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8116e-07, -3.0493e-08, -8.6898e-09, -8.4409e-08, -1.2595e-07,
        -9.8476e-07, -3.3569e-08, -2.0817e-08, -1.1268e-08, -2.0791e-08,
        -2.5454e-08, -4.7108e-08, -8.2223e-09, -8.1521e-07, -6.2167e-08,
        -4.9915e-07, -6.9128e-08, -1.4856e-07, -4.9378e-08, -6.0313e-07,
        -2.6351e-08, -2.1052e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9694e-07, -5.0217e-08,  2.8100e-09, -6.4886e-08, -2.1218e-07,
        -5.6073e-07, -3.0479e-08, -1.1551e-08, -4.1952e-09, -2.6858e-08,
        -3.6513e-08, -9.7245e-08, -1.8241e-08, -1.0314e-06, -5.5217e-08,
        -5.7437e-07, -7.5989e-08, -3.0464e-08, -8.9125e-08, -1.3869e-08,
        -5.5979e-08, -1.6196e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4451e-07, -1.1264e-08, -2.9292e-09, -3.1456e-08, -1.3166e-07,
        -2.5088e-07, -1.9540e-08, -1.5544e-08, -1.0599e-08, -1.0241e-08,
        -1.3299e-08, -2.7765e-08, -1.3473e-08, -1.3494e-06, -4.1280e-08,
        -4.7096e-07, -6.0068e-08, -2.7588e-08, -3.6733e-08, -2.9177e-08,
        -2.3656e-08, -3.1092e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2059e-07, -4.7831e-07, -3.1833e-08, -2.2459e-07, -2.7495e-08,
        -1.3083e-08, -3.1454e-08, -3.0518e-07, -5.0556e-08, -1.9674e-08,
        -1.8725e-08, -2.3655e-08, -1.1831e-08, -1.7755e-07, -8.7971e-09,
         6.1518e-10, -1.7002e-08, -3.2170e-06, -4.8302e-08, -4.4618e-08,
        -3.7492e-08, -4.6303e-08, -2.3614e-08, -3.3805e-07, -2.6446e-08,
        -2.5917e-08, -6.7638e-08, -5.5710e-08, -2.5169e-07, -4.5622e-08,
        -2.3586e-08, -3.0678e-08, -1.8446e-08, -2.1280e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8013e-08, -8.6098e-07, -4.0400e-08, -3.2877e-07, -2.0134e-08,
        -1.6181e-08, -3.0305e-08, -5.2879e-07, -4.8717e-08, -2.4039e-08,
        -1.4586e-08, -3.1777e-08, -1.1928e-08, -2.1607e-07, -6.2152e-09,
        -3.6492e-09, -1.1315e-08, -1.4603e-06, -4.5281e-08, -4.1387e-08,
        -4.1085e-08, -3.7060e-07, -1.7117e-08, -1.5904e-08, -2.6718e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4765e-07, -8.7183e-07, -1.3625e-07, -7.1656e-07, -9.3672e-08,
        -9.2313e-08, -1.2092e-07, -8.7601e-07, -2.0493e-07, -8.6082e-08,
        -1.4188e-07, -7.0776e-08, -7.6735e-08, -6.7383e-07, -6.0945e-09,
         3.5620e-09, -9.9302e-08, -5.9040e-06, -2.8397e-07, -3.4831e-07,
        -1.7054e-07, -1.6630e-07, -1.1299e-05, -1.4915e-07, -8.4844e-08,
        -1.1813e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1028e-07, -6.6371e-07, -9.4666e-08, -6.6770e-07, -1.1888e-07,
        -8.3330e-08, -5.1816e-07, -1.2913e-07, -9.5126e-08,  3.2782e-08,
        -6.7223e-08, -3.0852e-05, -3.0789e-07, -3.2431e-07, -2.6837e-07,
        -1.2336e-07, -2.9310e-07, -1.3553e-07, -1.8010e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2942e-06, -9.3891e-07, -2.3880e-07, -9.2381e-07, -1.4002e-07,
        -6.1709e-08, -8.0366e-07, -1.9619e-07, -1.1584e-07,  3.9301e-08,
        -8.0886e-08, -3.2043e-06, -4.8916e-07, -1.3791e-06, -2.6123e-07,
        -6.3430e-07, -4.5805e-07, -2.3156e-05, -4.3677e-07, -7.3280e-07,
        -2.6955e-07, -1.4672e-07, -3.3723e-05, -2.3654e-07, -4.1787e-08,
        -1.7068e-07, -3.8976e-08, -2.1286e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0463e-07, -4.3605e-07, -7.7377e-08, -4.3189e-07, -4.7391e-08,
        -3.2679e-08, -2.8733e-07, -1.1220e-07, -3.3371e-08,  7.0702e-09,
        -1.3847e-08, -1.9594e-05, -1.3681e-07, -1.4702e-06, -1.5084e-07,
        -1.1599e-07, -1.2853e-07, -1.2189e-07, -1.1693e-07, -5.0232e-08,
        -8.4121e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7978e-07, -8.4783e-07, -1.1325e-07, -5.4287e-09, -2.5955e-08,
        -3.6086e-07, -4.0044e-08, -2.1450e-08, -4.2073e-08, -1.7961e-08,
        -6.1113e-08, -5.3751e-06, -6.8905e-08, -1.6376e-06, -1.1072e-07,
        -6.5299e-08, -8.5098e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3600: [tensor([-1.3159e-06, -6.7717e-07, -1.1827e-07, -6.2344e-08, -3.4590e-07,
        -7.4443e-08, -3.2070e-07, -1.1230e-07, -8.7235e-08, -8.6386e-08,
        -1.0936e-07, -1.3903e-07, -6.7380e-07, -4.3011e-08,  3.3034e-08,
        -6.2128e-08, -1.9677e-07, -1.3295e-07, -1.8622e-07, -1.8269e-07,
        -1.1113e-07, -1.5503e-07, -1.9984e-05, -1.2853e-07, -1.1782e-07,
        -1.5924e-07, -1.0323e-07, -1.0337e-07,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9542e-06, -9.9568e-06, -1.6234e-07, -7.8117e-08, -4.5647e-07,
        -1.3250e-07, -4.4057e-07, -1.4091e-07, -4.6378e-08, -9.6701e-08,
        -1.2304e-07, -2.1660e-07, -2.9959e-06, -6.8150e-08, -9.2950e-09,
        -7.8189e-08, -1.7805e-07, -2.5803e-07, -1.8845e-07, -4.8453e-07,
        -1.1122e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6264e-07, -5.9735e-07, -2.9698e-08, -2.6087e-07, -2.5178e-07,
        -2.4937e-08, -6.2413e-08, -4.1115e-08, -5.7261e-08, -2.3171e-08,
        -2.5763e-08, -2.6353e-08, -4.0745e-08, -3.7636e-06, -8.6051e-08,
        -6.2809e-07, -1.0206e-07, -1.0150e-05, -1.0990e-07, -1.0477e-07,
        -1.0038e-07, -8.7831e-08, -3.5308e-08, -5.2668e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0548e-07, -1.2811e-06, -2.7797e-08, -4.8838e-07, -4.3725e-07,
        -3.3594e-08, -6.3241e-08, -5.9860e-08, -7.0666e-08, -2.3283e-08,
        -3.5609e-08, -3.8183e-08, -4.9512e-08, -8.7060e-06, -8.4151e-08,
        -1.1543e-06, -1.6961e-07, -7.6978e-08, -1.6948e-07, -6.2707e-06,
        -7.0668e-08, -1.4412e-07, -6.7437e-08, -6.1312e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1760e-06, -7.3553e-07, -1.8813e-08, -4.0989e-07, -4.4133e-07,
        -3.4929e-08, -6.4907e-08, -9.4099e-08, -6.2482e-08, -6.7648e-08,
        -3.9050e-08,  2.0020e-10, -4.7904e-08, -7.0339e-06, -1.2138e-07,
        -1.0764e-06, -1.2542e-07, -1.5656e-05, -1.3418e-07, -1.9528e-07,
        -1.0326e-07, -1.6140e-07, -2.0299e-08, -8.2641e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2221e-07, -3.2595e-07, -9.6514e-09, -2.0487e-08, -7.6419e-08,
        -3.8221e-07, -1.1120e-08, -1.3506e-08, -2.2466e-08, -9.0853e-09,
        -1.2153e-08, -2.5999e-08, -1.4673e-08, -1.1503e-08, -1.6176e-08,
        -2.2640e-08, -2.4982e-08, -2.8354e-08, -1.3250e-08, -1.9313e-08,
        -3.5552e-08, -1.4503e-08, -1.4156e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0413e-06, -5.6794e-07, -4.8246e-08, -4.8183e-08, -1.6304e-07,
        -6.0295e-07, -2.2449e-08, -7.1684e-08, -8.4152e-08, -2.1537e-08,
        -3.0144e-08, -3.6723e-08, -2.8069e-09, -3.7290e-08, -5.2232e-07,
        -7.8701e-08, -1.2028e-07, -1.1735e-07, -1.0079e-07, -5.3727e-08,
        -7.3796e-08, -5.1463e-08, -2.6528e-07, -1.0596e-07, -1.2990e-05,
        -1.8709e-07, -5.1692e-08, -5.5626e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0121e-07, -1.7702e-06, -4.9130e-08, -5.0994e-08, -2.9201e-07,
        -9.4454e-07, -5.7703e-08, -8.3084e-08, -1.0297e-07, -5.2692e-08,
        -4.0287e-08, -6.9520e-08, -1.1000e-08, -2.1694e-08, -2.2012e-06,
        -9.1793e-08, -1.4813e-07, -1.6332e-07, -7.2255e-06, -1.2876e-07,
        -4.7311e-08, -6.6651e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4436e-07, -5.7437e-07, -1.4566e-08, -4.2864e-08, -5.9921e-09,
        -1.7683e-08, -1.4776e-07, -5.8112e-07, -4.0262e-08, -2.6476e-08,
        -1.4968e-07, -1.1965e-07, -3.2441e-08, -1.8599e-08, -3.5293e-08,
        -7.8284e-08, -5.1823e-08, -1.7531e-08, -1.8267e-08, -4.3920e-06,
        -5.1894e-08, -5.3328e-07, -8.2104e-08, -8.1398e-08, -7.8640e-08,
        -4.4899e-06, -4.2725e-07,  3.5203e-10, -5.0471e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6010e-08, -3.6595e-07, -2.6665e-09, -1.9037e-08, -1.1054e-08,
        -1.3835e-08, -1.4152e-07, -3.7696e-07, -3.9733e-08, -2.5481e-08,
        -1.0079e-07, -9.5469e-08, -1.6678e-08, -6.7483e-09, -2.7616e-08,
        -6.5432e-08, -3.7760e-08, -2.9660e-09, -2.2311e-08, -2.9937e-06,
        -3.4954e-08, -5.0410e-07, -2.2867e-08, -5.1033e-08, -1.5636e-06,
        -3.1076e-08, -2.2984e-08, -3.1790e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0598e-07, -8.3949e-07,  2.9403e-09, -2.6248e-08, -4.6757e-10,
        -3.0272e-08, -1.4536e-07, -4.9035e-07, -5.4556e-08, -2.4222e-08,
        -1.6303e-07, -1.5626e-07, -1.4836e-08, -2.3108e-08, -3.7531e-08,
        -1.1819e-07, -6.0911e-08, -1.9647e-08, -2.2439e-08, -7.6938e-06,
        -7.0854e-08, -6.7793e-07, -5.3534e-08, -5.1397e-08, -2.2242e-08,
        -4.3000e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4689e-07, -4.1017e-08, -2.8619e-07, -5.2907e-08, -1.8497e-08,
        -1.1953e-08, -3.1352e-08, -5.6828e-08, -2.8644e-08, -6.7936e-08,
        -3.8294e-08, -5.1272e-08, -7.6998e-08, -8.7796e-09, -1.2477e-08,
        -5.6798e-06, -6.4131e-08, -1.7024e-07, -2.0340e-06, -8.5588e-08,
        -1.0647e-07, -1.1210e-07, -3.0327e-08, -4.4451e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3650: [tensor([-9.2947e-07, -1.5200e-06, -3.0006e-08, -3.4697e-08, -1.6659e-08,
        -4.5308e-08, -5.3248e-08, -6.4066e-09, -2.1450e-08, -6.4948e-08,
        -4.3661e-08, -1.3405e-07, -2.5443e-08,  4.2725e-10, -1.6175e-08,
        -2.1419e-06, -6.4991e-08, -1.3499e-06, -6.8014e-08, -1.2075e-07,
        -2.1036e-08, -3.7756e-08, -5.2455e-08, -4.0256e-08,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4862e-07, -5.3830e-07, -2.7738e-08, -3.1394e-07, -7.8416e-09,
        -3.5274e-07, -1.8492e-08, -6.4911e-08, -2.3248e-08, -2.1996e-08,
        -3.3527e-08, -2.3927e-08, -2.0337e-08, -1.2820e-08, -3.0652e-09,
        -2.9326e-06, -3.7962e-08, -1.7711e-07, -4.7070e-08, -3.2168e-08,
        -2.1700e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0208e-07, -8.2266e-07, -4.3836e-08, -3.4389e-07, -2.7185e-08,
        -5.1824e-07, -3.0962e-08, -8.5555e-08, -3.0159e-08, -4.7370e-08,
        -4.5325e-08, -3.4276e-08, -5.5323e-08, -6.5816e-09, -1.0166e-08,
        -3.0768e-06, -6.8936e-08, -1.7511e-07, -2.3002e-06, -1.0021e-07,
        -3.6804e-08, -3.5552e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6421e-08, -6.9287e-07, -2.1552e-08, -1.3494e-07, -1.6259e-08,
        -3.6140e-07, -2.2526e-08, -4.9802e-08, -1.9650e-08, -1.0275e-08,
        -2.2924e-08, -3.5098e-08, -1.1444e-08, -5.3178e-09, -1.1450e-08,
        -1.0505e-06, -2.9908e-08, -1.0252e-06, -5.9063e-08, -8.7951e-07,
        -2.0405e-08, -2.0344e-08, -1.5061e-08,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7382e-07, -8.8886e-09, -6.9106e-09, -1.0264e-06, -2.9731e-08,
        -3.4362e-08, -4.9108e-08, -5.8039e-08, -2.2013e-08, -2.7392e-08,
        -2.7646e-07, -1.6293e-07, -3.8604e-08, -4.3684e-08, -4.4740e-08,
        -3.2898e-08, -3.4755e-08, -4.3534e-08, -4.3259e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3147e-07, -6.4940e-09,  2.2221e-09, -9.0028e-07, -2.3381e-08,
        -3.8818e-08, -2.9249e-08, -3.5442e-08, -9.1769e-09, -1.6993e-08,
        -1.7938e-07, -1.6488e-07, -3.3596e-08, -3.7224e-07, -1.2307e-08,
        -1.8620e-08, -4.5146e-08, -8.6820e-08, -3.8111e-08, -2.9183e-08,
        -3.2607e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2896e-06,  2.2492e-10, -1.0206e-08, -2.5755e-06, -2.8002e-08,
        -4.6438e-08, -4.5546e-08, -6.5205e-08, -1.5760e-08, -1.4105e-08,
        -3.4700e-07, -2.6514e-07, -4.0379e-08, -7.4961e-08, -1.7084e-06,
        -1.4940e-07, -7.8051e-08, -1.5809e-07, -6.8315e-08, -3.8783e-08,
        -3.9788e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7956e-07, -3.3368e-08, -1.6464e-08, -2.1206e-07, -3.6288e-08,
        -1.2846e-08, -1.8114e-08, -6.3446e-09, -2.3774e-07, -4.3235e-08,
        -5.8218e-09, -1.3126e-08, -1.0155e-08, -6.7651e-08, -3.3254e-08,
        -4.0901e-08, -3.0838e-08, -7.7447e-08, -1.5703e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4572e-07, -9.5386e-08, -5.3713e-08, -3.2113e-07, -3.5492e-08,
        -2.7900e-08, -3.4729e-08, -2.5499e-08, -4.4216e-07, -7.4975e-08,
        -2.7457e-08, -1.5469e-08, -3.0281e-08, -2.1207e-07, -6.7420e-08,
        -5.4593e-07, -8.9302e-08, -6.0043e-08, -1.1989e-07, -7.2094e-08,
        -5.0053e-06, -1.1063e-07, -2.6924e-07, -7.7437e-08, -3.8786e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1264e-06, -1.9380e-07, -1.1315e-07, -4.9117e-07, -1.2315e-07,
        -7.2186e-08, -6.2216e-08, -5.0819e-08, -6.5681e-07, -2.1282e-07,
        -6.2806e-08, -1.0607e-08, -4.9301e-08, -2.9722e-07, -1.9265e-07,
        -4.5636e-07, -1.5196e-07, -2.1803e-05, -1.7273e-07, -2.6354e-07,
        -8.5664e-08, -1.3077e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1306e-07, -8.6163e-08, -1.3548e-08, -3.2482e-07, -6.1916e-08,
        -3.5994e-08, -4.9296e-08, -5.4918e-07, -4.2452e-08, -3.0811e-08,
        -7.4088e-08, -6.6264e-08, -2.8814e-07, -6.3980e-08, -6.1485e-08,
        -5.7354e-06, -5.8075e-08, -1.1825e-07, -3.5154e-08, -4.8028e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4265e-06, -1.5554e-07, -5.2910e-08, -4.1991e-07, -1.2320e-07,
        -6.0952e-08, -1.2438e-07, -7.1987e-07, -1.4211e-07, -7.7889e-08,
        -1.7612e-07, -7.8388e-08, -1.2998e-07, -1.4383e-06, -1.8978e-07,
        -1.2721e-07, -1.8813e-07, -1.8921e-05, -1.8106e-07, -1.8971e-07,
        -1.2218e-07, -4.2677e-08, -9.5224e-08,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3700: [tensor([-2.0995e-07, -2.4388e-07, -1.9956e-08, -3.9598e-08, -3.3748e-09,
        -2.1094e-08, -3.5812e-07, -4.4710e-08, -5.9485e-08, -2.1272e-08,
        -6.6421e-08, -2.5359e-08, -5.1659e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2972e-07, -1.5972e-07, -2.6711e-08, -2.7753e-08, -2.0706e-08,
        -7.0359e-09, -1.6982e-07, -2.6089e-08, -6.2358e-08, -8.3700e-08,
        -4.1638e-08, -3.8398e-08, -6.1736e-08, -4.3205e-08, -3.8472e-08,
        -7.9927e-08, -2.6163e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8822e-07, -5.5502e-07, -4.4491e-08, -9.8826e-08, -4.9018e-08,
        -5.2786e-08, -1.5445e-07, -8.5203e-07, -7.9161e-08, -3.7600e-07,
        -6.7688e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2945e-08, -2.2048e-07, -2.9210e-08, -4.0126e-08, -3.1370e-08,
        -1.5801e-08, -2.5878e-07, -5.2971e-08, -4.3058e-08, -8.7243e-08,
        -2.5788e-08, -7.2756e-08, -7.9651e-08, -1.0420e-06, -3.3857e-08,
        -3.0245e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1487e-07, -2.2442e-07, -1.6821e-08, -5.5715e-08, -2.0810e-08,
        -2.3431e-08, -2.9517e-07, -5.0864e-08, -5.1294e-08, -4.6179e-08,
        -2.8965e-08, -3.0512e-07, -1.6267e-07, -6.6506e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3097e-07, -6.0957e-07, -4.7146e-08, -8.4794e-08, -5.0268e-08,
        -5.3692e-08, -5.6993e-07, -8.0755e-08, -3.1880e-07, -9.1169e-08,
        -9.9721e-08, -7.5510e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7831e-07, -4.6221e-07, -4.3705e-08, -4.7480e-08, -2.4184e-08,
        -4.2633e-08, -3.3933e-07, -4.8858e-08, -5.2478e-07, -4.4570e-08,
        -5.5121e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3678e-07, -4.6088e-07, -2.9739e-08, -7.2944e-08, -6.0323e-08,
        -3.1428e-08, -5.4597e-07, -1.0382e-07, -1.6000e-07, -5.5337e-08,
        -5.2582e-08, -6.2967e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8360e-07, -4.8016e-07, -6.3651e-08, -7.6249e-08, -4.6570e-08,
        -3.1766e-08, -7.5479e-07, -1.2975e-07, -6.9731e-08, -2.1885e-07,
        -6.4417e-08, -1.6442e-07, -2.5630e-07, -5.7975e-07, -6.1011e-08,
        -6.0875e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2204e-07, -7.4936e-07, -1.1415e-08, -1.5755e-08, -4.2701e-08,
        -1.8068e-08, -9.1208e-08, -1.2163e-07, -2.0821e-08, -6.0092e-08,
        -1.7265e-07, -2.0988e-08, -3.1012e-08, -1.6935e-08, -1.7647e-08,
        -2.0457e-08,  2.8848e-09, -1.6630e-08, -3.8642e-07, -7.2615e-09,
        -1.5505e-08, -1.1057e-08,  1.8187e-09, -1.5819e-08, -1.1299e-08,
        -1.5100e-08, -1.8473e-08, -1.6928e-08,  3.8916e-11, -2.9692e-08,
        -2.7523e-08, -3.4789e-08, -7.6473e-09, -6.9612e-09, -4.1505e-06,
        -4.0569e-08, -7.5331e-07, -9.7323e-08, -8.2014e-08, -2.1986e-08,
        -5.4844e-08, -5.3979e-08, -3.6375e-08, -3.0512e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6833e-07, -1.0069e-06, -1.1026e-08, -1.5631e-08, -2.8972e-08,
        -1.1674e-08, -1.0569e-07, -1.2716e-07, -1.6084e-08, -6.0289e-08,
        -1.5417e-07, -8.7967e-09, -2.3369e-08, -1.0602e-08, -1.2589e-08,
        -4.0678e-08,  3.0275e-09, -1.4065e-08, -3.7128e-07,  2.2245e-09,
        -1.3587e-08, -2.7595e-09, -6.7077e-09, -1.3188e-08, -8.5241e-09,
        -1.2743e-08, -3.6040e-09, -2.3100e-08,  4.3272e-09, -2.6171e-08,
        -3.7053e-08, -3.1938e-08, -8.8906e-09, -6.4589e-09, -1.5963e-07,
        -5.8721e-08, -9.1903e-07, -3.9023e-08, -5.3815e-08, -7.3373e-08,
        -9.6769e-07, -6.0723e-08, -3.7763e-08, -3.7308e-08, -2.0712e-08,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7615e-07, -3.2257e-07, -1.9274e-09, -1.1922e-08, -2.1241e-08,
        -1.7034e-08, -8.5695e-08, -9.5056e-08, -1.8371e-08, -7.9669e-08,
        -1.2235e-07, -1.2211e-08, -4.2277e-08, -2.0678e-08, -1.4950e-08,
        -3.1961e-08,  9.2695e-10, -1.3985e-08, -2.7791e-07, -2.6307e-09,
        -8.9348e-09, -5.8523e-09,  3.0260e-09, -1.0075e-08, -6.6957e-09,
        -1.0009e-08,  2.0711e-09, -2.0465e-08,  9.3310e-10, -1.7838e-08,
        -1.1880e-08, -2.2049e-08, -5.1646e-09, -7.7120e-09, -5.9216e-07,
        -6.7549e-08, -3.4291e-08, -9.0626e-08, -4.2206e-06, -4.2040e-08,
        -9.0149e-08, -2.7801e-08, -3.7221e-08, -3.9263e-08, -4.2340e-08,
        -1.8558e-08, -2.5212e-08], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3750: [tensor([-8.5422e-07, -4.6225e-08, -5.9454e-08, -8.4971e-08, -3.0731e-08,
        -9.9097e-08, -3.7675e-07, -7.3106e-08, -2.9192e-09, -2.5764e-08,
        -1.2491e-07, -2.7024e-06, -5.7960e-06, -9.3877e-08, -1.3704e-07,
        -8.4330e-08, -8.8222e-08, -6.4763e-08, -8.4065e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1266e-09, -8.2085e-07, -3.7692e-08, -1.6235e-07, -3.4202e-07,
        -2.2897e-08, -2.2192e-08,  7.8523e-09, -3.0611e-08, -3.9974e-08,
        -7.6591e-08, -5.6056e-06, -1.5881e-08, -1.0189e-07, -3.2645e-08,
        -2.3957e-08, -1.3057e-08, -2.6742e-08, -7.5598e-07, -6.6144e-08,
        -8.4438e-07, -1.5862e-07, -5.6446e-08, -5.3374e-08, -2.3342e-06,
        -3.2603e-08, -4.1914e-08, -4.6304e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9309e-07, -9.7103e-07, -1.7518e-08, -2.8294e-07, -5.4120e-07,
        -2.4598e-08, -2.2930e-08,  3.4414e-09, -4.1471e-08, -2.9562e-08,
        -7.4678e-08, -3.6183e-06, -2.1299e-08, -9.3392e-08, -4.1590e-08,
        -4.6000e-08, -8.5640e-09, -4.1905e-08, -1.2229e-06, -7.0333e-08,
        -9.7942e-07, -8.4834e-08, -2.4912e-08, -3.5668e-08, -5.8795e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0416e-07, -6.7521e-07, -9.4518e-09, -1.9844e-07, -2.9777e-07,
        -2.0612e-08, -2.0859e-08, -1.3033e-09, -2.4316e-08, -1.7461e-08,
        -4.8762e-08, -4.4769e-06, -1.9716e-08, -9.9557e-08, -3.7678e-08,
        -1.7402e-08, -4.3573e-09, -2.1164e-08, -1.5418e-06, -4.7122e-08,
        -6.7536e-07, -5.5754e-08, -1.9344e-07, -3.3099e-08, -6.6957e-09,
        -3.6570e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6784e-06, -9.6540e-07, -7.3640e-08, -5.7416e-07, -6.3938e-07,
        -1.0803e-07, -8.5134e-08, -8.8562e-08, -5.0766e-07, -4.7306e-08,
         1.8142e-08, -1.0484e-07, -1.2689e-07, -1.6470e-08, -2.0690e-07,
        -3.8784e-07, -9.4194e-08, -2.0651e-08, -2.6096e-08, -4.8147e-06,
        -1.4210e-07, -2.8290e-07, -1.4410e-07, -2.2022e-07, -5.0221e-08,
        -7.5458e-07, -1.9056e-05, -1.8633e-07, -1.2233e-07, -4.4844e-08,
        -1.0240e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3138e-07, -1.0564e-06, -2.5131e-08, -2.7505e-07, -4.4729e-07,
        -7.2056e-08, -3.1468e-08, -3.5950e-08, -5.4725e-07, -2.3400e-08,
         1.7179e-09, -4.8481e-08, -5.4662e-08, -1.2382e-08, -1.4131e-07,
        -2.3033e-07, -2.9378e-08, -1.9079e-08, -2.9500e-08, -6.8417e-07,
        -4.0991e-08, -6.9291e-07, -1.1349e-07, -3.9177e-06, -1.3267e-08,
        -4.9099e-08, -4.3189e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6553e-09, -2.5983e-07, -1.8974e-08, -1.9166e-07, -2.5858e-07,
        -3.9755e-08, -1.9240e-08, -4.1370e-08, -1.8750e-07, -1.8001e-08,
         6.1793e-09, -2.3624e-08, -3.3825e-08, -8.9016e-09, -1.0383e-07,
        -2.0610e-07, -2.1395e-08, -1.3020e-08, -1.9787e-08, -2.1949e-06,
        -2.7737e-08, -4.1413e-08, -9.0670e-08, -3.5990e-06, -9.5681e-08,
        -3.2893e-08, -3.7081e-08, -2.1427e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3978e-07, -3.3310e-07, -4.0076e-09, -3.0911e-08, -2.7781e-08,
        -2.8013e-07, -2.6221e-08, -1.4587e-08, -1.7587e-08, -3.6279e-08,
        -2.8576e-08, -1.1781e-08, -1.6760e-08, -3.0550e-08, -9.5839e-09,
        -4.7057e-09, -1.1983e-07, -2.9185e-07, -1.5352e-08,  1.2771e-09,
        -5.1303e-09, -1.8230e-06, -1.5865e-08, -1.6781e-08, -8.3295e-08,
        -2.3527e-06, -9.6746e-09, -3.3535e-08, -4.6885e-08, -4.5809e-08,
        -4.4169e-08, -3.0135e-08, -3.6843e-08, -6.6195e-08, -1.2655e-06,
        -7.6006e-07, -1.6629e-08, -1.6087e-08, -3.1947e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6632e-07, -4.6185e-07, -7.9675e-09, -1.8563e-08, -2.4788e-08,
        -3.5923e-07, -1.8848e-08, -1.3230e-08, -3.2744e-08, -3.5823e-08,
        -2.8683e-08, -1.4894e-08, -2.0902e-08, -3.8100e-08, -9.9871e-09,
        -1.4395e-08, -1.3808e-07, -2.8940e-07, -2.6148e-08, -1.1347e-09,
        -1.8514e-08, -3.3218e-06, -2.1463e-08, -2.4286e-08, -3.1658e-08,
        -1.3402e-06, -8.3314e-08, -3.9498e-08, -5.5959e-08, -4.7860e-08,
        -5.7699e-08, -2.6905e-08, -3.8949e-08, -8.8816e-08, -2.8821e-08,
        -3.4719e-08, -8.4279e-07, -2.8015e-07, -3.9409e-08, -3.5777e-08,
        -7.9935e-07, -6.0143e-09, -3.7739e-08, -3.8917e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7155e-07, -4.8929e-07, -6.4341e-09, -1.4107e-08, -1.7113e-08,
        -4.7187e-07, -2.0361e-08, -9.6860e-09, -2.1259e-08, -3.1260e-08,
        -2.5286e-08, -1.4775e-08, -1.0566e-08, -2.1264e-08, -4.5320e-09,
        -1.1007e-08, -1.3359e-07, -1.9480e-07, -1.2003e-08, -1.5486e-09,
        -9.1955e-09, -1.9938e-06, -3.1962e-08, -3.7409e-07, -1.5887e-07,
        -4.3924e-08, -1.0676e-08, -4.2509e-08, -3.7434e-08, -5.3642e-08,
        -4.5814e-08, -2.1165e-08, -8.1131e-08, -8.0100e-08, -2.0425e-08,
        -2.2291e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7337e-06, -1.7192e-06, -1.8052e-07, -1.1060e-06, -1.3194e-07,
        -3.5348e-08, -3.6973e-07, -1.6136e-08, -2.3328e-07, -1.5745e-07,
        -2.6460e-07, -8.9581e-07, -1.7725e-06, -5.2201e-07, -1.8241e-07,
         5.8049e-08, -2.5638e-07, -3.3769e-05, -4.9206e-07, -1.9308e-06,
        -3.4416e-07, -1.4539e-06, -2.9401e-05, -5.0704e-07, -2.6938e-07,
        -1.5200e-07, -1.8359e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8104e-06, -1.0878e-06, -9.7490e-08, -4.9202e-07, -8.4337e-08,
        -3.4535e-08, -1.3870e-07, -2.0208e-08, -8.5021e-08, -6.8708e-08,
        -7.9731e-08, -5.1816e-07, -1.0804e-06, -1.7420e-07, -6.0405e-08,
        -3.8161e-11, -1.9272e-08, -1.8514e-05, -1.8513e-07, -6.6457e-07,
        -1.5231e-07, -3.1788e-06, -6.7380e-07, -1.3300e-07, -1.0786e-07,
        -8.8811e-08, -2.4979e-08, -7.7083e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3800: [tensor([-3.7421e-07, -1.3408e-06, -1.1941e-08, -3.3975e-08, -1.9969e-08,
        -1.5988e-07, -3.2917e-08, -4.0767e-08, -3.4993e-08, -1.5280e-06,
        -3.7986e-08, -1.8533e-08, -4.5362e-08, -1.0624e-06, -5.4696e-08,
        -3.2790e-08, -1.1728e-08, -7.8078e-08, -3.9402e-08, -2.4221e-08,
        -1.8892e-08,  1.9034e-09, -1.8465e-08, -2.5058e-06, -9.7911e-08,
        -1.0905e-06, -8.1131e-08, -3.8798e-08, -3.3924e-08, -4.1158e-08,
        -5.2038e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9285e-07, -6.9299e-07, -3.5779e-08, -1.4435e-07, -6.6009e-08,
        -4.0940e-07, -7.6457e-08, -7.4733e-08, -1.2775e-07, -1.5114e-06,
        -9.0021e-08, -2.0505e-08, -1.0860e-07, -1.2949e-06, -2.1884e-07,
        -1.1106e-07, -6.8041e-08, -2.7916e-07, -6.2530e-08, -6.5890e-08,
        -5.4990e-08, -2.0639e-09, -4.6536e-08, -8.3259e-06, -1.1970e-07,
        -1.9202e-06, -7.6427e-08, -2.0810e-07, -2.5854e-05, -1.8146e-07,
        -1.0331e-07, -1.1528e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2434e-07, -3.8066e-07, -5.4187e-09, -1.7588e-08, -8.0604e-09,
        -1.2186e-07, -1.3952e-08, -1.4220e-08, -2.9082e-08, -2.1154e-06,
        -2.4328e-08, -1.0175e-08, -2.3251e-08, -8.0479e-07, -2.9258e-08,
        -2.9591e-08, -1.0808e-08, -4.1930e-08, -2.8429e-08, -1.6347e-08,
        -3.5821e-08, -3.4641e-09, -5.3092e-09, -2.6181e-07, -3.6036e-08,
        -8.3946e-07, -5.3461e-08, -1.5371e-08, -3.1462e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8214e-08, -7.2429e-07, -1.3064e-08, -3.8678e-08, -1.0617e-08,
        -2.0239e-07, -1.1271e-08, -2.3281e-08, -3.1815e-08, -1.4994e-06,
        -3.4187e-08, -2.4848e-08, -2.7708e-08, -9.5243e-07, -3.8089e-08,
        -2.7311e-08, -1.6965e-08, -7.5370e-08, -1.3902e-08, -2.7255e-08,
        -5.5008e-08, -7.9377e-09, -1.2947e-08, -1.3452e-06, -5.5405e-08,
        -4.2002e-08, -1.8411e-08, -1.7078e-08, -3.6005e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3660e-07, -3.4784e-07, -1.5118e-08, -2.8503e-08, -1.5982e-08,
        -1.6938e-07, -2.8537e-08, -2.8897e-08, -4.0503e-08, -2.0924e-06,
        -2.7493e-08, -1.6853e-08, -2.6492e-08, -1.1471e-06, -4.4700e-08,
        -3.1393e-08, -1.2000e-08, -7.2986e-08, -2.1747e-08, -1.9700e-08,
        -2.4620e-08,  2.2113e-09,  4.6014e-09, -2.2562e-06, -5.1326e-08,
        -3.0702e-08, -1.3696e-07, -5.3993e-08, -3.7023e-08, -1.4636e-06,
        -5.0219e-08, -6.2459e-08, -6.4052e-08, -2.9751e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3708e-07, -3.3649e-07, -9.7784e-09, -2.5971e-08, -2.4232e-08,
        -1.9799e-07, -2.9050e-08, -2.6404e-08, -3.4720e-08, -1.5765e-06,
        -4.4098e-08, -1.8844e-08, -6.1772e-08, -7.8178e-07, -4.9299e-08,
        -3.9107e-08, -2.4946e-08, -9.7092e-08, -2.5735e-08, -3.0599e-08,
        -4.1005e-08, -1.5620e-09, -2.0131e-08, -6.6094e-06, -4.0158e-08,
        -7.1574e-08, -7.0240e-08, -3.1264e-06, -5.3471e-08, -1.0006e-07,
        -3.8994e-08, -2.4630e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6853e-07, -3.2720e-07, -8.9839e-09, -2.0090e-08, -1.3322e-08,
        -1.2211e-07, -9.0656e-09, -2.2363e-08, -2.8497e-08, -3.7383e-06,
        -2.3121e-08, -1.6616e-08, -2.6694e-08, -5.4951e-07, -2.3066e-08,
        -1.4273e-08, -1.3911e-08, -4.1191e-08, -1.9935e-08, -1.1646e-08,
        -3.6684e-08,  1.8137e-09, -4.2653e-09, -7.6744e-07, -5.2021e-08,
        -4.1045e-08, -2.6899e-08, -3.6497e-07, -4.7318e-08, -8.8035e-08,
        -2.4419e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1835e-08, -7.2214e-07, -1.5015e-08, -4.9894e-08, -1.9797e-08,
        -1.5864e-07, -3.2008e-08, -4.3712e-08, -4.7190e-08, -1.3115e-06,
        -5.7490e-08, -2.8041e-08, -3.4313e-08, -1.0190e-06, -6.1635e-08,
        -4.0695e-08, -2.9802e-08, -9.3829e-08, -4.1138e-08, -3.4321e-08,
        -2.9840e-08, -2.1800e-08,  1.6506e-08, -2.2332e-06, -5.9637e-08,
        -6.8461e-08, -5.5609e-06, -7.1847e-08, -9.9525e-07, -6.2122e-08,
        -1.0301e-07, -2.5957e-06, -9.3631e-08, -6.5126e-08, -9.1882e-08,
        -1.1865e-07, -4.9027e-07, -6.8668e-08, -4.6059e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8416e-07, -2.7915e-07, -1.1778e-08, -1.6714e-08, -9.5560e-09,
        -8.5590e-08, -1.0723e-08, -1.3136e-08, -2.1415e-08, -1.3738e-06,
        -2.2191e-08,  7.8860e-10, -1.8901e-08, -4.8911e-07, -2.1585e-08,
        -1.9730e-08, -7.2270e-09, -4.5490e-08, -1.4260e-08, -1.5080e-08,
        -2.0084e-08,  2.8017e-09, -6.3983e-10, -1.2553e-06, -2.4445e-08,
        -3.8742e-07, -4.2843e-08, -6.0746e-07, -4.9782e-08, -2.6764e-08,
        -1.1797e-08, -1.0964e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0990e-06, -4.9208e-07, -9.7680e-09, -3.5233e-08, -1.8019e-08,
        -1.7802e-07, -2.7383e-08, -3.4861e-08, -4.9682e-08, -1.0254e-05,
        -4.8164e-08, -2.3910e-08, -3.6734e-08, -1.0295e-06, -5.2072e-08,
        -3.8969e-08, -1.8638e-08, -5.6478e-08, -2.4338e-08, -2.4966e-08,
        -3.8898e-08, -3.2346e-09, -5.7299e-09, -2.8116e-06, -5.9922e-08,
        -5.7773e-08, -2.2078e-07, -5.8220e-08, -1.0592e-07, -5.9022e-08,
        -5.7935e-08, -5.3618e-08, -4.5678e-07, -1.1467e-07, -5.2259e-08,
        -1.0526e-07, -1.2461e-07, -4.7894e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7055e-08, -7.9072e-07, -5.1454e-09, -2.6933e-08, -1.2397e-08,
        -1.6997e-07, -2.6586e-08, -3.4477e-08, -2.9432e-08, -1.0268e-06,
        -3.2662e-08, -1.9266e-08, -3.2288e-08, -6.3548e-07, -3.5467e-08,
        -2.3697e-08, -1.2150e-08, -6.3846e-08, -3.4432e-08, -1.7003e-08,
        -2.9348e-08, -6.2258e-09, -1.5723e-08, -4.8855e-07, -7.4925e-08,
        -1.5074e-06, -1.5928e-08, -1.8480e-08, -3.1419e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2136e-06, -6.5087e-07, -1.9971e-08, -4.1950e-08, -4.0968e-08,
        -2.4058e-07, -3.1325e-08, -5.0450e-08, -4.8633e-08, -5.9460e-06,
        -6.6861e-08, -2.4310e-08, -3.7573e-08, -6.8660e-07, -1.1420e-07,
        -5.1950e-08, -3.6046e-08, -1.4028e-07, -3.7045e-08, -2.5659e-08,
        -2.1661e-08,  1.7154e-08, -1.8021e-09, -1.6911e-06, -4.6629e-08,
        -1.5811e-06, -5.8080e-08, -9.3024e-06, -1.0778e-07, -1.0760e-07,
        -1.3331e-07, -4.1159e-06, -1.0813e-07, -3.9912e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3850: [tensor([-1.5166e-06, -1.5603e-07, -8.2508e-08, -1.0432e-07, -8.4482e-08,
        -2.0877e-07, -5.2899e-08,  9.5114e-10, -8.6309e-08, -6.0021e-08,
        -3.5410e-08, -1.3624e-07, -1.1106e-07, -2.9610e-06, -1.3671e-07,
        -2.7894e-07, -7.1058e-08, -1.1610e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0758e-07, -9.9320e-08, -5.1264e-08, -5.2783e-08, -7.1267e-08,
        -1.0086e-07, -2.9534e-08,  4.0523e-09, -6.4273e-08, -1.6445e-08,
        -2.5671e-08, -1.9288e-07, -3.3996e-07, -8.5688e-08, -2.0361e-07,
        -3.0210e-08, -5.8581e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2634e-08, -7.4812e-08, -3.7372e-08, -5.8724e-08, -5.2544e-08,
        -1.0289e-07, -2.0339e-08,  9.8581e-09, -2.8525e-08, -1.8547e-08,
        -1.5629e-08, -1.1438e-07, -3.0483e-07, -2.1128e-07, -2.8784e-08,
        -6.8828e-08, -1.3963e-06, -5.6650e-08, -4.0441e-08, -3.1598e-08,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7127e-07, -1.4890e-07, -9.6535e-08, -1.1007e-07, -1.0931e-07,
        -2.6595e-07, -9.0374e-08,  3.7631e-08, -1.1910e-07,  7.5053e-09,
        -3.6738e-08, -2.5754e-07, -3.0438e-07, -1.2584e-05, -1.0239e-07,
        -5.7835e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8306e-07, -1.1171e-07, -7.0714e-08, -1.0039e-07, -8.1978e-08,
        -1.3075e-07, -3.1327e-08, -3.6035e-09, -6.3806e-08, -3.8727e-08,
        -1.7789e-08, -4.0627e-08, -5.4963e-07, -3.5926e-07, -7.2178e-08,
        -1.9763e-07, -6.6424e-08, -6.8776e-08, -6.3658e-08,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3226e-07, -1.9485e-08, -3.6969e-08, -2.8175e-08, -7.4048e-09,
        -3.0842e-08, -3.2157e-08, -4.7479e-08, -1.9108e-08, -4.0044e-08,
        -3.7383e-07, -4.3513e-08, -2.1314e-07, -2.8522e-08, -9.1636e-08,
        -3.9595e-08, -3.7996e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9203e-07, -3.7733e-08, -5.3817e-08, -3.6357e-08,  3.0425e-09,
        -3.6655e-08, -3.6223e-08, -5.0190e-08, -3.3092e-08, -1.5358e-08,
        -2.8743e-07, -7.8506e-08, -1.1096e-06, -8.5501e-08, -5.5596e-08,
        -3.9575e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2094e-07, -4.9084e-08, -7.2476e-08, -2.3236e-08, -1.1880e-08,
        -4.2387e-08, -5.4472e-08, -3.8201e-08, -1.5414e-08, -4.3858e-08,
        -5.7082e-07, -7.8818e-08, -6.1851e-07, -5.3532e-08, -6.6250e-08,
        -8.0427e-08, -6.3070e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3793e-07, -1.8745e-06, -1.4122e-07, -1.6617e-07, -6.6763e-08,
        -1.8847e-07, -7.5430e-08, -1.9123e-07, -9.8390e-08,  2.0514e-08,
        -7.0940e-08, -3.9207e-05, -1.6292e-07, -3.5005e-07, -2.2876e-07,
        -1.5037e-07, -2.7436e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1085e-06, -1.3102e-06, -1.1251e-07, -1.4442e-07, -2.2573e-08,
        -9.8799e-08, -9.4744e-08, -1.5629e-07, -7.8291e-08, -1.0913e-08,
        -5.9835e-08, -3.3885e-05, -2.4694e-07, -2.8540e-07, -1.7844e-07,
        -2.2350e-07, -1.4850e-06, -3.9516e-07, -2.7697e-07, -1.4056e-07,
        -1.5692e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7314e-06, -1.3838e-06, -1.6023e-07, -1.3405e-07, -1.1616e-07,
        -4.6788e-08, -1.5672e-07, -1.5122e-07, -1.0271e-07, -2.5240e-08,
        -1.1195e-07, -1.9710e-05, -6.2469e-07, -1.7361e-07, -3.4027e-07,
        -3.0203e-07, -3.9346e-07, -2.6949e-05, -4.0349e-07, -6.6328e-08,
        -1.6113e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2772e-07, -1.0888e-06, -4.7319e-08, -2.8143e-08, -2.8047e-08,
        -5.9256e-08, -4.4579e-08, -8.6150e-09, -1.4929e-08, -3.3815e-08,
        -2.9160e-07, -6.9745e-08, -7.9231e-08, -9.1037e-08, -2.0808e-08,
        -3.1765e-08, -5.8322e-08, -2.8392e-07, -2.7860e-08, -2.2572e-08,
        -4.1836e-08], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3900: [tensor([-6.5965e-07, -4.7974e-08, -1.7251e-07, -2.7717e-08, -1.7247e-08,
        -1.3012e-08, -3.1205e-08, -3.1320e-07, -1.8697e-07, -1.9797e-08,
         4.6219e-10, -3.3590e-08, -7.4535e-08, -7.0391e-07, -5.7302e-08,
        -2.0123e-06, -4.1432e-08, -5.3466e-08, -4.3342e-08, -3.9673e-08,
        -3.3682e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5214e-08, -1.3613e-06, -1.8471e-08, -5.9954e-08, -1.4055e-08,
        -1.8616e-08, -9.7693e-09, -8.0007e-08, -1.9955e-08, -4.9423e-08,
        -2.9086e-08, -1.5749e-08, -1.7869e-08, -5.3051e-08, -2.0970e-08,
        -5.2062e-08, -2.6792e-08, -2.5221e-08, -4.3865e-07, -3.5173e-08,
        -1.8499e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0876e-08, -1.3217e-06, -3.5485e-08, -8.3948e-08, -7.4164e-09,
        -1.8101e-08, -2.8197e-08, -5.1708e-08, -1.7676e-08, -6.8897e-08,
        -3.1604e-08, -1.6650e-08, -2.1975e-08, -3.4694e-08, -3.8710e-08,
        -2.9523e-08, -4.0393e-08, -4.9246e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2470e-07, -1.3462e-06, -3.3100e-08, -8.8300e-08, -1.1216e-08,
        -1.0506e-08, -2.8163e-08, -6.2943e-08, -2.4373e-08, -4.8724e-08,
        -3.5006e-08, -9.9987e-09, -2.7765e-08, -3.6534e-08, -3.1398e-08,
        -2.5779e-08, -7.2789e-08, -4.3474e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3913e-07, -9.4011e-07,  1.8842e-09, -3.7053e-08, -1.8440e-08,
        -3.0859e-08, -2.9305e-08, -4.2668e-08, -1.1331e-08, -1.9490e-08,
        -2.2512e-08, -1.0740e-07, -4.6912e-08, -8.7559e-08, -4.0740e-07,
        -2.2995e-08, -9.3255e-08, -3.0287e-07, -5.2410e-08, -3.9870e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3483e-07, -1.2323e-06, -4.6429e-09, -3.3918e-08, -2.4481e-08,
        -2.8808e-08, -3.8437e-08, -3.5844e-08, -1.9970e-09, -2.6962e-08,
        -1.0592e-07, -1.3709e-07, -8.0554e-08, -4.3819e-06, -5.9483e-08,
        -1.4208e-07, -6.4384e-08, -7.2134e-08, -4.8534e-08, -3.3593e-08,
        -8.4575e-08, -5.1884e-08, -1.0028e-07, -5.7739e-08, -2.1843e-08,
        -4.1099e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0033e-07, -5.2582e-07,  5.3199e-09, -4.2591e-08, -3.0959e-08,
        -2.5616e-08, -4.7988e-08, -2.7388e-08, -1.4343e-08, -2.9479e-08,
        -2.4061e-07, -1.0681e-07, -1.2912e-07, -9.9879e-07, -3.8466e-07,
        -8.7290e-08, -7.7470e-08, -3.6874e-08, -4.0841e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0198e-07, -9.2532e-07,  5.7348e-09, -6.0326e-08, -2.0787e-08,
        -5.3685e-08, -6.2318e-08, -4.2160e-08, -9.9521e-09, -4.1326e-08,
        -3.2948e-07, -1.8485e-07, -3.8211e-07, -3.0095e-07, -1.0104e-07,
        -5.2474e-08, -3.8903e-08, -8.5499e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1902e-07, -3.1775e-07, -4.0256e-10, -2.0607e-08, -1.4901e-08,
        -2.1036e-08, -8.8628e-09, -2.7205e-08, -1.7594e-08, -1.6204e-08,
        -3.4747e-07, -1.0975e-07, -1.9175e-07, -3.2822e-08, -3.2070e-08,
        -2.6302e-08, -1.3657e-06, -5.2003e-08, -9.3497e-08, -3.7762e-08,
        -4.4269e-08, -1.8390e-08, -1.6314e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2416e-07, -1.7135e-06,  9.7405e-09, -3.4203e-08, -3.0974e-08,
        -4.1318e-08, -6.3904e-08, -6.1524e-08, -1.9527e-08, -2.6264e-08,
        -8.0789e-08, -1.0468e-06, -7.6975e-08, -7.2108e-08, -6.3242e-08,
        -1.5903e-07, -6.3705e-08, -9.4723e-08, -3.1161e-06, -3.2547e-08,
        -4.2410e-07, -3.4630e-08, -4.7575e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1257e-06, -7.9688e-07,  2.4069e-09, -9.7787e-08, -3.0868e-08,
        -6.2748e-08, -9.0588e-08, -7.9511e-08, -1.7114e-08, -6.8385e-08,
        -2.4493e-07, -2.6645e-07, -2.0501e-07, -1.5140e-05, -1.0855e-07,
        -2.2194e-07, -1.2088e-07, -1.2309e-07, -2.2335e-06, -8.8555e-08,
        -7.7519e-08, -4.9087e-08, -2.8671e-07, -5.2987e-08, -8.3790e-08,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9432e-07, -1.1418e-06, -1.9018e-09, -4.7521e-08, -2.3728e-08,
        -3.7400e-08, -6.1877e-08, -5.1506e-08, -1.6026e-08, -1.4817e-08,
        -5.7385e-08, -1.2697e-07, -3.5848e-08, -6.0236e-08, -1.3136e-06,
        -2.2647e-08, -1.2157e-07, -1.0006e-07, -3.8648e-08, -3.9100e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3950: [tensor([-8.7650e-07, -1.3321e-06, -8.3649e-08, -5.3098e-07, -6.1981e-07,
        -4.3919e-08, -9.2780e-08, -2.1528e-08, -6.1990e-08, -5.2309e-06,
        -4.6809e-08,  1.2012e-09, -7.2145e-08, -4.6272e-07, -5.2631e-07,
        -1.5735e-05, -1.2799e-07, -2.2528e-07, -1.2108e-07, -7.2826e-06,
        -7.6286e-08, -5.4543e-08, -7.2307e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9976e-06, -7.5404e-07, -6.6222e-08, -1.0809e-07, -1.9339e-07,
        -4.2551e-08, -6.1810e-08, -8.3859e-08, -2.8516e-07, -3.7345e-08,
        -4.4763e-08, -5.6856e-06, -3.0530e-08, -1.1891e-08, -2.7565e-08,
        -9.7947e-07, -1.5623e-07, -1.7330e-07, -4.4094e-07, -2.3989e-07,
        -7.4435e-08, -8.2893e-08, -2.1959e-07, -1.0716e-07, -3.8022e-08,
        -3.8804e-08, -1.6113e-05, -7.1184e-08, -4.6006e-08, -6.0792e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7156e-07, -1.4565e-06, -2.0125e-07, -2.2465e-07, -4.5618e-07,
        -6.9734e-08, -1.3131e-07, -2.2681e-07, -5.8463e-07, -9.5398e-08,
        -7.0458e-08, -2.3514e-05, -4.8452e-08,  1.8432e-09, -4.9716e-08,
        -1.0344e-05, -2.6875e-07, -5.4634e-06, -1.6819e-07, -3.6694e-07,
        -2.1025e-06, -5.7930e-08, -1.1258e-08, -1.0362e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0873e-07, -7.5599e-07, -8.2817e-08, -6.4705e-08, -1.5879e-07,
        -1.5503e-08, -2.5828e-08, -6.3828e-08, -2.0801e-07, -6.2777e-08,
        -2.7787e-08, -1.3535e-05, -3.7075e-08, -2.9265e-09, -2.2149e-08,
        -2.7235e-06, -1.2059e-07, -1.4934e-06, -1.7899e-07, -8.7437e-08,
        -1.3163e-07, -8.8871e-07, -3.5281e-08, -1.8643e-08, -6.3710e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1169e-06, -1.7311e-06, -1.2466e-08, -1.9249e-07, -3.1232e-09,
        -3.8007e-08, -1.1285e-06, -4.2322e-09, -1.0057e-07,  2.4885e-09,
        -1.2119e-07, -4.1685e-05, -3.2636e-07, -1.5617e-06, -5.7016e-07,
        -5.1930e-07, -4.0996e-07, -2.4415e-07, -3.0841e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3308e-06, -2.2552e-06, -7.6498e-08, -2.2007e-07, -4.3418e-08,
        -1.5807e-07, -1.3515e-06,  4.7935e-08, -1.1797e-07, -3.4896e-09,
        -1.2237e-07, -5.7904e-05, -5.3835e-07, -2.0552e-06, -5.7353e-07,
        -1.9696e-07, -3.3014e-07, -3.4563e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4379e-06, -1.2691e-06,  7.1182e-09, -1.1089e-07, -4.2432e-08,
        -4.0470e-08, -6.7956e-07, -2.9501e-08, -5.4325e-08, -4.5797e-09,
        -6.2058e-08, -2.4575e-05, -2.9742e-07, -1.2112e-07, -1.5141e-07,
        -2.7316e-07, -1.5219e-07, -2.3797e-07, -3.2433e-07, -1.0156e-07,
        -7.7345e-08, -2.5752e-07, -6.3376e-06, -1.0740e-07, -2.8897e-08,
        -8.4885e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2021e-06, -1.0094e-06, -1.1789e-07, -5.1410e-07, -6.8938e-08,
        -3.8392e-09, -5.1780e-07, -1.0505e-07, -5.0879e-08, -4.9379e-06,
        -1.3507e-08, -3.8923e-08, -1.1197e-07, -1.3258e-06, -3.4017e-08,
        -1.5597e-07, -1.8341e-07, -2.3821e-07,  7.6919e-09,  6.8263e-09,
        -4.8279e-06, -1.6188e-07, -1.0340e-05, -4.2721e-06, -1.0811e-07,
        -2.4458e-07, -1.1122e-05, -8.2534e-08, -4.8079e-08, -1.4757e-06,
        -1.8652e-07, -1.1343e-07, -1.5338e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.8707e-07, -6.6472e-07, -6.4890e-08, -6.0352e-07, -6.6571e-08,
        -3.1653e-08, -3.3503e-07, -6.9178e-08, -3.8713e-08, -1.6657e-06,
        -9.0558e-09, -5.2192e-08, -9.2117e-08, -2.9623e-06, -5.1842e-08,
        -1.1095e-07, -1.4662e-07, -1.2978e-07, -6.8745e-09, -1.7771e-08,
        -4.3945e-06, -1.4298e-07, -1.4002e-07, -9.1202e-08, -1.3627e-05,
        -7.2557e-08, -7.0660e-08, -1.5683e-07, -1.6287e-05, -1.3549e-07,
        -1.8004e-07, -3.6893e-08, -1.2356e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2913e-06, -1.9234e-06, -6.0286e-08, -3.7226e-07, -7.2600e-08,
        -2.6307e-08, -3.2823e-07, -1.1110e-07, -5.1993e-08, -2.2873e-06,
         9.4863e-09, -7.8297e-08, -1.0327e-07, -1.5969e-06, -6.8613e-08,
        -7.9210e-08, -9.9044e-08, -1.6557e-07,  3.0507e-08, -6.9516e-09,
        -2.9705e-06, -1.5257e-07, -1.8804e-07, -1.2810e-07, -1.8943e-05,
        -1.1424e-07, -5.3971e-08, -1.1558e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8028e-07, -6.3282e-07, -1.0088e-08, -2.6381e-08, -1.1399e-07,
        -1.0203e-08, -2.3442e-08, -9.0023e-09, -1.2361e-08, -2.4011e-06,
         4.3066e-09,  3.5351e-09, -7.8501e-09, -1.7705e-08, -7.6375e-07,
        -9.2707e-08, -3.8280e-08, -1.2359e-08, -3.8448e-08, -2.9180e-06,
        -1.3684e-08, -2.5691e-08, -2.0204e-08, -3.1536e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.2544e-07, -6.9150e-07, -2.2906e-08, -4.1484e-08, -1.5257e-07,
        -4.1860e-08, -2.0615e-08, -4.8260e-09, -2.1832e-08, -2.9237e-06,
        -1.1687e-08, -2.2450e-08, -1.1836e-08, -2.2425e-08, -1.3038e-06,
        -1.1731e-07, -5.1947e-08, -3.3679e-08, -5.5120e-08, -3.8336e-08,
        -7.8073e-08, -3.2266e-08, -6.1924e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4000: [tensor([-1.4581e-07, -9.6641e-07, -1.7992e-08, -7.3624e-08, -2.5888e-08,
        -4.4925e-08, -2.6103e-07, -2.4193e-08, -2.6993e-08, -2.2194e-09,
        -3.4676e-08, -5.9066e-08, -7.4981e-08,  3.0207e-09, -2.0589e-08,
        -2.7965e-08, -1.7926e-08, -3.1139e-08, -6.4063e-07, -1.1369e-08,
        -5.5629e-08, -2.2599e-07, -6.7395e-07, -3.2729e-08, -8.6190e-10,
        -2.5820e-09, -2.6283e-07, -5.7741e-08, -3.0021e-07, -1.4554e-06,
        -7.5316e-08, -4.3498e-07, -1.0404e-07, -8.2827e-08, -3.3901e-08,
        -5.3033e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6257e-06, -7.3352e-07,  9.8666e-09, -8.4708e-08, -8.0974e-08,
        -2.9180e-08, -3.8892e-07, -2.6684e-08, -6.5346e-08, -7.2287e-09,
        -6.8017e-08, -7.1450e-08, -1.3034e-07,  8.1866e-10, -3.2413e-08,
        -5.3912e-08, -3.7929e-08, -4.9459e-08, -4.5651e-07, -7.0023e-09,
        -5.4859e-08, -2.6541e-07, -8.3744e-07, -3.4772e-08,  1.6595e-08,
        -1.2246e-08, -1.5109e-05, -9.9741e-08, -4.6158e-08, -6.9446e-07,
        -1.1747e-06, -8.1206e-08, -1.1594e-07, -4.3137e-08, -6.2066e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1010e-06, -1.0570e-06, -7.4359e-08, -2.2181e-07, -1.9365e-07,
        -1.4158e-07, -7.3978e-07, -1.2568e-07, -1.7178e-07, -1.8676e-08,
        -1.6318e-07, -2.0932e-07, -2.6814e-07,  7.4799e-09, -7.5166e-08,
        -2.2444e-07, -1.4599e-07, -9.8735e-08, -9.0647e-07, -1.8508e-08,
        -1.9121e-07, -6.0802e-07, -1.0631e-06, -8.1669e-08,  3.9203e-08,
        -3.2736e-08, -2.2849e-05, -2.2964e-07, -3.4676e-07, -2.5126e-07,
        -2.2473e-07, -5.4600e-07, -1.3473e-06, -2.5354e-07, -4.8404e-07,
        -3.0106e-05, -2.5831e-07, -1.9599e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5562e-06, -4.8126e-07, -2.1040e-08, -9.9287e-08, -8.4727e-08,
        -5.0584e-08, -3.3287e-07, -4.3849e-08, -6.5127e-08, -7.3913e-09,
        -8.7886e-08, -7.2784e-08, -1.4922e-07, -1.1147e-08, -3.6959e-08,
        -9.2079e-08, -5.3463e-08, -6.2208e-08, -4.3717e-07, -1.4054e-08,
        -6.5384e-08, -2.5242e-07, -3.7079e-07, -3.0145e-08,  9.1871e-09,
        -1.5881e-08, -1.3714e-05, -6.3590e-08, -9.1294e-08, -2.6512e-07,
        -9.7044e-08, -5.2338e-06, -1.1883e-07, -6.2545e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0893e-06, -6.1648e-07, -2.4073e-08, -1.0227e-07, -6.7172e-08,
        -3.3338e-08, -3.2866e-07, -3.9982e-08, -7.1231e-08,  9.1348e-09,
        -6.2057e-08, -8.4612e-08, -1.1621e-07, -9.1778e-09, -3.4288e-08,
        -6.2041e-08, -3.9172e-08, -4.9900e-08, -5.3646e-07, -8.0631e-09,
        -4.2100e-08, -2.3843e-07, -4.5728e-07, -3.5347e-08,  5.5202e-08,
        -3.1354e-08, -1.6341e-05, -5.2211e-08, -1.5247e-07, -1.9457e-07,
        -8.2733e-08, -1.1401e-06, -1.3754e-07, -1.3071e-07, -3.9289e-08,
        -7.4743e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9770e-06, -8.7858e-07, -3.9902e-08, -1.3835e-07, -7.5013e-08,
        -9.3447e-08, -7.9173e-07, -5.8209e-08, -1.0127e-07, -7.2710e-09,
        -1.2397e-07, -1.0165e-07, -1.8043e-07, -5.2655e-09, -7.2250e-08,
        -8.0721e-08, -7.7785e-08, -5.9712e-08, -7.4183e-07, -2.6268e-08,
        -7.2968e-08, -4.3068e-07, -8.2696e-07, -5.8058e-08,  1.0086e-08,
        -4.0971e-08, -1.6252e-05, -1.8371e-07, -1.3588e-07, -8.0463e-07,
        -4.9430e-06, -1.0650e-07, -1.8688e-07, -8.1192e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7149e-06, -7.1480e-07, -2.5309e-08, -1.2390e-07, -6.6063e-08,
        -2.7640e-08, -3.6099e-07, -4.8643e-08, -7.6401e-08,  6.1332e-09,
        -8.5956e-08, -6.7952e-08, -1.2969e-07, -6.4029e-09, -3.2315e-08,
        -6.3692e-08, -4.4263e-08, -5.0696e-08, -4.8105e-07, -5.4360e-09,
        -3.3202e-08, -2.5691e-07, -1.3018e-06, -4.1810e-08,  1.4174e-08,
         8.1314e-10, -1.5326e-05, -1.0590e-07, -1.0587e-07, -1.1428e-06,
        -1.4773e-07, -9.1283e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3454e-06, -1.3053e-06, -3.6749e-08, -1.1892e-07, -8.3657e-08,
        -4.7586e-08, -5.5647e-07, -4.4985e-08, -5.9158e-08,  7.0658e-09,
        -5.7365e-08, -1.3543e-07, -2.1497e-07,  4.7902e-09, -3.5674e-08,
        -1.2691e-07, -5.1732e-08, -4.8795e-08, -4.8331e-07, -3.1749e-08,
        -6.5787e-08, -3.4140e-07, -1.4802e-06, -6.0230e-08,  4.7509e-08,
        -4.1851e-08, -1.7260e-05, -7.5819e-08, -1.6854e-07, -1.8833e-07,
        -1.6084e-07, -5.4382e-06, -1.0917e-07, -6.0372e-08, -7.1378e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5405e-07, -2.1770e-06, -1.0648e-07, -1.8150e-07, -2.3253e-07,
        -4.8288e-08, -6.7609e-08, -7.7379e-08, -5.6453e-08, -3.9947e-08,
        -2.3504e-08, -1.7455e-08, -3.6174e-08, -2.0460e-06, -1.9174e-07,
        -1.1339e-06, -9.3695e-08, -9.7346e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4995e-07, -4.2328e-07, -3.9880e-08, -8.9104e-08, -1.5893e-07,
        -3.0830e-08, -2.8584e-08, -2.9160e-08, -2.5833e-08, -1.4112e-08,
        -1.6800e-08, -2.0450e-08, -1.7610e-08, -7.5768e-07, -5.9462e-08,
        -3.2782e-07, -5.7127e-08, -3.0024e-08, -3.0490e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4292e-07, -9.4752e-07, -5.1456e-08, -9.8383e-08, -1.1905e-07,
        -2.4735e-08, -2.9039e-08, -4.0484e-08, -2.6392e-08, -1.6202e-08,
        -2.3990e-08, -3.9112e-08, -1.3838e-08, -6.7157e-07, -1.0288e-07,
        -6.6796e-07, -3.0145e-08, -5.9231e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4125e-07, -9.7008e-07, -5.3219e-08, -4.0341e-08, -3.3200e-08,
        -7.2246e-07, -4.5198e-08, -4.3088e-08, -3.0273e-08, -3.1161e-08,
        -1.2475e-08, -1.1904e-07, -3.9283e-08, -2.2166e-08, -3.5352e-07,
        -6.9297e-08, -1.5926e-07, -6.5454e-08, -5.1029e-08, -9.9618e-08,
        -7.0921e-08, -4.3846e-08, -7.6353e-08, -1.2921e-07, -1.1815e-07,
        -3.5708e-08, -3.4312e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4050: [tensor([-7.0215e-07, -7.6036e-07, -3.2932e-08, -4.4212e-08, -5.2716e-08,
        -2.7172e-08, -3.7702e-08, -3.7221e-08, -2.8679e-07, -3.2704e-08,
        -4.6644e-08, -7.1187e-08, -1.9281e-08, -4.9516e-08, -4.6350e-08,
        -2.5990e-08, -2.2850e-07, -1.0077e-07, -3.1661e-08, -2.2738e-08,
        -3.8096e-08, -9.5635e-09,  2.7043e-09, -1.5573e-08, -1.1397e-06,
        -4.5178e-08, -6.7160e-06, -1.0802e-07, -7.3259e-08, -1.1127e-06,
        -6.9370e-08, -5.8035e-08, -7.5819e-08, -5.3628e-08, -2.5737e-08,
        -6.3061e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3889e-08, -8.6807e-07, -1.8594e-08, -8.3547e-08, -6.7272e-08,
        -3.2009e-08, -2.5651e-08, -2.3634e-08, -5.6478e-07, -3.3906e-08,
        -2.6202e-08, -6.5965e-08, -1.1494e-08, -6.0223e-08, -4.5489e-08,
        -5.3278e-08, -6.9162e-07, -6.1599e-08, -2.7032e-08, -4.2379e-08,
        -3.8320e-08, -3.1438e-08, -1.3273e-08, -1.7289e-08, -2.4678e-06,
        -5.6729e-08, -4.3382e-06, -9.3989e-08, -8.9112e-08, -5.3502e-08,
        -6.0176e-07, -5.1896e-08, -5.6188e-08, -1.8998e-08, -4.8216e-08,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4288e-06, -1.2468e-06, -9.8279e-08, -1.5043e-07, -1.1677e-07,
        -1.0511e-07, -1.3618e-07, -8.5590e-08, -8.4529e-07, -8.5982e-08,
        -9.9559e-08, -1.8115e-07,  6.2349e-09, -1.2367e-07, -1.1045e-07,
        -8.8039e-08, -1.1213e-06, -2.2527e-07, -1.0777e-07, -1.2638e-08,
        -6.0982e-08, -3.5734e-08,  1.7016e-08, -9.0525e-09, -2.5853e-05,
        -1.3862e-07, -6.6855e-06, -4.2447e-07, -7.4153e-08, -9.8251e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0113e-07, -1.1216e-06, -1.7815e-07, -9.1105e-08, -1.4363e-07,
        -8.8418e-07, -1.5166e-07, -6.1985e-08, -8.1802e-08, -6.1474e-07,
        -1.3691e-07, -1.9390e-07,  8.9787e-09, -9.2185e-08, -1.1339e-06,
        -1.5761e-07, -2.5202e-07, -1.7255e-05, -1.3190e-07, -1.3833e-07,
        -8.4243e-07, -1.8538e-07, -4.9960e-08, -9.8295e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1575e-07, -2.5801e-06, -1.0583e-07, -3.0027e-08, -5.3361e-08,
        -6.4998e-07, -4.9126e-08, -8.2070e-09, -3.7180e-08, -4.0311e-07,
        -5.2176e-08, -2.0385e-07, -1.6515e-09, -2.8130e-08, -7.1015e-07,
        -4.9383e-08, -2.8522e-07, -6.0587e-08, -5.4545e-08, -1.8711e-07,
        -1.2877e-07, -1.1199e-07, -2.9733e-08, -3.6007e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3534e-07, -1.4159e-06, -7.1296e-08, -4.6978e-08, -1.0577e-07,
        -8.2879e-07, -6.7155e-08, -3.0584e-08, -4.3871e-08, -4.4244e-07,
        -4.3613e-08, -1.4942e-07, -2.2805e-09, -5.1042e-08, -1.4136e-06,
        -9.0852e-08, -3.7491e-07, -2.2275e-08, -7.7963e-08, -1.8256e-06,
        -9.7407e-08, -2.1567e-08, -7.1613e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3896e-07, -3.9424e-08, -7.4336e-08, -4.2008e-08, -4.9169e-08,
        -6.1737e-07, -2.7781e-08, -1.0668e-08, -3.9830e-08, -3.9617e-08,
        -1.0952e-07, -6.4317e-06, -2.9355e-08, -2.0578e-08, -6.9441e-08,
        -1.1921e-07,  1.4567e-09, -7.0841e-08, -1.5508e-08,  1.1866e-08,
        -6.4503e-09, -1.0104e-08, -9.2359e-08, -3.6034e-07, -3.2068e-08,
        -3.4637e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4844e-06, -9.5997e-08, -1.4310e-07, -1.3183e-07, -5.8909e-08,
        -5.8460e-07, -8.9817e-08, -4.3668e-08, -7.5148e-08, -5.8570e-08,
        -2.1684e-07, -6.9462e-06, -8.6274e-08, -7.4951e-08, -1.6071e-07,
        -1.8516e-07, -3.6105e-08, -1.0176e-07, -2.7669e-08,  2.0789e-08,
        -1.5397e-08, -1.5040e-07, -4.0282e-07, -3.1658e-07, -1.0030e-07,
        -1.0520e-07, -1.6534e-05, -1.2547e-07, -1.1984e-07, -8.3476e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1843e-06, -3.7135e-08, -9.7867e-08, -3.2022e-08, -5.9406e-08,
        -6.0004e-07, -3.1854e-08, -1.9007e-08, -3.3055e-08, -3.1502e-08,
        -1.1959e-07, -3.5824e-06, -2.0321e-08, -2.0980e-08, -9.1238e-08,
        -9.0963e-08, -1.5997e-08, -5.7876e-08, -1.6281e-08, -3.8275e-09,
        -1.3001e-08, -5.5544e-08, -4.9454e-07, -2.0526e-07, -1.2725e-11,
        -3.6701e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0184e-07, -5.6424e-07, -3.3172e-08, -3.4708e-07, -4.2196e-08,
        -4.8680e-07, -3.5492e-08, -4.5623e-08, -1.5267e-07, -4.1598e-08,
        -8.6851e-08, -3.1687e-06, -3.9634e-08, -3.7616e-08, -3.1063e-08,
        -1.2803e-06, -3.0836e-08, -2.0871e-08, -5.2353e-09, -8.4762e-06,
        -6.7892e-08, -5.4270e-07, -1.0803e-07, -6.7982e-08, -7.9324e-08,
        -8.4381e-08, -2.8661e-08, -6.7519e-08, -5.9236e-08, -2.4900e-08,
        -5.5403e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7736e-06, -1.0837e-06, -8.2076e-08, -8.4592e-07, -9.7576e-08,
        -8.2693e-07, -5.9266e-08, -8.8070e-08, -3.0834e-07, -5.6587e-08,
        -1.7699e-07, -4.3767e-06, -4.7409e-08, -5.9894e-08, -3.7076e-08,
        -1.9349e-06, -5.6868e-08, -3.2493e-08, -3.2319e-08, -1.1889e-05,
        -1.4371e-07, -9.7798e-07, -2.9951e-07, -8.5631e-08, -5.2624e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1746e-07, -4.4960e-07, -1.9563e-08, -2.0979e-07, -3.4776e-08,
        -3.1989e-07, -3.2219e-08, -3.3477e-08, -1.2422e-07, -3.7042e-08,
        -6.5359e-08, -3.4641e-06, -2.6301e-08, -1.9336e-08, -1.7292e-08,
        -4.2242e-07, -1.7771e-08, -8.4504e-09, -1.0833e-08, -1.6531e-06,
        -3.8435e-08, -5.4111e-07, -1.1308e-07, -3.5554e-08, -5.2276e-08,
        -3.0121e-06, -4.1438e-08, -3.8401e-08, -4.8590e-08, -7.4735e-08,
        -1.9814e-08, -2.5612e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4100: [tensor([-1.0916e-06, -1.2340e-06, -8.7297e-08, -6.7136e-08, -5.2008e-07,
        -6.3436e-08, -3.6317e-08, -2.0961e-08, -3.5355e-08, -1.6502e-07,
        -4.8632e-08, -3.7330e-07, -8.7807e-08, -8.8142e-08, -4.4378e-08,
        -6.9932e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1846e-06, -1.5695e-06, -8.7086e-08, -1.7901e-07, -6.6565e-07,
        -8.2383e-08, -5.0421e-08, -2.1139e-08, -2.8621e-08, -1.3945e-07,
        -6.0147e-08, -3.1607e-07, -7.8645e-08, -5.3855e-08, -1.0787e-07,
        -5.8721e-08, -2.5841e-08, -6.0808e-08, -5.7573e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5794e-07, -1.1883e-06, -1.9856e-08, -1.2444e-06, -5.9932e-08,
        -1.0604e-06, -9.7264e-08, -6.0245e-08, -1.3794e-08, -3.7761e-08,
        -6.8538e-07, -2.8461e-07, -1.0197e-07, -1.1996e-06, -4.8960e-08,
        -7.9662e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5094e-07, -2.5508e-06, -1.3498e-08, -6.1198e-07, -9.7057e-08,
        -9.6283e-07, -1.5248e-07, -7.0656e-08, -1.1039e-09, -4.6076e-08,
        -7.9564e-08, -8.8823e-06, -2.4184e-07, -1.0980e-06, -1.0700e-07,
        -9.7337e-08, -4.0024e-08, -1.0192e-07, -8.1626e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9427e-07, -1.6262e-06, -3.8811e-08, -6.3107e-07, -5.7321e-08,
        -8.3462e-07, -8.8177e-08, -6.0885e-08, -1.0044e-08, -4.1253e-08,
        -4.8797e-07, -4.3580e-07, -8.5342e-08, -7.6350e-08, -2.7337e-08,
        -9.9064e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5488e-07, -6.6430e-07, -1.2592e-08, -3.4976e-08, -1.1226e-08,
        -6.0425e-08, -2.6409e-07, -3.0651e-08, -3.0512e-08, -3.3268e-08,
        -1.8659e-08, -1.8995e-08, -2.9378e-08, -1.0439e-08, -1.1720e-08,
        -2.4280e-08, -3.0109e-08, -7.6304e-09, -5.4368e-09, -7.7573e-09,
        -7.1205e-06, -4.7725e-08, -2.8521e-08, -5.4408e-08, -1.1050e-08,
        -2.6296e-08, -6.4253e-08, -7.3354e-08, -7.8742e-08, -5.5733e-08,
        -5.1096e-08, -2.6062e-08, -6.2265e-08, -3.6310e-08, -4.3722e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6195e-07, -1.4443e-06, -3.2392e-08, -4.9145e-08, -3.3392e-08,
        -1.1754e-07, -4.5590e-07, -5.8403e-08, -5.6404e-08, -8.3278e-08,
        -2.5400e-08, -3.5404e-08, -5.3078e-08, -2.5491e-08, -3.0304e-08,
        -4.3391e-08, -8.2147e-08, -1.2819e-09, -2.7469e-08, -2.6457e-08,
        -5.3438e-06, -5.5514e-08, -4.1259e-08, -8.0922e-08, -2.2718e-08,
        -5.4821e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0192e-07, -6.0180e-07, -2.2698e-08, -7.0410e-08, -1.2770e-08,
        -1.0778e-07, -3.6557e-07, -5.6652e-08, -4.8807e-08, -7.2335e-08,
        -2.3183e-08, -3.4558e-08, -5.1387e-08, -2.4312e-08, -3.0675e-08,
        -6.4204e-08, -6.6670e-08, -2.0628e-08, -9.1932e-09, -1.4967e-08,
        -1.0968e-05, -1.6032e-07, -9.8105e-08, -1.0588e-07, -1.0589e-07,
        -3.6595e-06, -2.0232e-07, -5.7559e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7988e-07, -4.3274e-07, -1.8136e-08, -1.7721e-08, -8.4040e-09,
        -9.8285e-09, -1.7017e-07, -3.8023e-08, -2.8761e-08, -1.1404e-08,
        -5.1055e-09, -2.1448e-08, -3.0223e-08, -1.9417e-08,  2.0184e-09,
        -1.0633e-08, -6.4553e-07, -4.0546e-08, -3.3859e-08, -1.7657e-08,
        -7.1317e-08, -1.6941e-06, -1.1353e-07, -2.8462e-08, -3.5830e-08,
        -3.0223e-08, -6.1673e-08, -4.6938e-08, -3.7504e-08, -2.0464e-08,
        -2.9683e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2345e-07, -8.3170e-07, -3.8702e-08, -1.5799e-08, -1.6087e-08,
        -2.4334e-08, -3.2467e-07, -5.7959e-08, -3.2190e-08, -2.0691e-08,
        -6.4078e-09, -4.1748e-08, -5.2116e-08, -2.3587e-08,  5.2061e-09,
        -1.9407e-08, -9.4666e-07, -5.9934e-08, -6.4944e-08, -5.7872e-08,
        -4.1170e-06, -8.5830e-08, -8.9199e-08, -4.2238e-08, -9.4872e-09,
        -7.4519e-08, -2.5219e-07, -3.8243e-08, -2.6049e-08, -3.3529e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8107e-07, -1.1659e-06, -6.6171e-08, -3.8528e-08, -2.0522e-08,
        -4.6448e-08, -3.6739e-07, -7.7880e-08, -7.1277e-08, -3.6413e-08,
        -1.6717e-08, -6.4475e-08, -5.6392e-08, -2.8753e-08, -4.0370e-09,
        -2.3496e-08, -1.4034e-06, -8.0826e-08, -7.0565e-08, -5.4822e-08,
        -8.9318e-08, -8.5783e-06, -2.0816e-07, -9.2273e-08, -6.0552e-08,
        -4.1535e-08, -1.4027e-07, -1.1539e-07, -7.2283e-08, -1.5825e-08,
        -7.0392e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6344e-07, -7.3402e-07, -4.3809e-08, -1.3930e-07, -5.3708e-09,
        -8.4292e-09, -1.2191e-07, -5.8112e-09, -2.5142e-08, -1.9708e-08,
        -9.6313e-08, -2.8889e-08, -1.9090e-08, -2.9822e-08, -1.7371e-08,
        -9.6115e-08, -1.3441e-07, -2.0060e-08, -1.8442e-08, -6.0350e-09,
        -1.1859e-08, -2.7198e-08, -1.6938e-09, -1.2875e-08, -1.6043e-06,
        -3.8729e-08, -9.9296e-08, -4.7225e-08, -4.8411e-08, -8.1717e-08,
        -5.5568e-07, -4.8842e-08, -3.2490e-08, -1.4764e-07, -6.7548e-08,
        -9.9622e-07, -5.6849e-08, -5.2690e-08, -2.4291e-08, -2.4013e-08],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4150: [tensor([-5.2550e-07, -9.8162e-07, -2.9872e-08, -3.2011e-08, -3.0711e-08,
        -6.6045e-08, -5.5347e-08, -3.2603e-08, -1.5241e-07, -4.9981e-08,
        -1.1929e-07, -8.4200e-08, -9.7397e-07, -2.0538e-08, -2.6945e-08,
        -2.4559e-07, -2.6458e-08, -7.8907e-09, -3.1133e-08, -1.9366e-08,
         8.5797e-10, -1.4900e-08, -4.1987e-08, -1.0970e-07, -1.5125e-06,
        -5.9831e-08, -6.9631e-08, -7.5019e-08, -2.0924e-07, -4.9532e-08,
        -2.9884e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1318e-06, -1.2227e-06, -1.3363e-07, -1.3155e-07, -1.0511e-07,
        -2.0111e-07, -1.6123e-07, -1.0378e-07, -5.8408e-07, -7.2515e-08,
        -5.6033e-07, -1.8229e-07, -1.0918e-06, -7.9895e-08, -1.0438e-07,
        -5.1489e-07, -1.3166e-07, -6.2065e-08, -1.2882e-07, -3.1579e-08,
         4.6900e-08, -1.5370e-08, -2.7566e-05, -2.4958e-07, -7.1995e-07,
        -4.0350e-06, -1.9328e-07, -2.7379e-07, -9.3170e-08, -1.3970e-07,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3876e-06, -9.6371e-07, -4.4039e-08, -1.5598e-07, -1.5537e-07,
        -1.5734e-07, -1.6592e-07, -8.2755e-08, -4.8534e-07, -1.3699e-07,
        -4.0822e-07, -1.2180e-07, -1.5953e-06, -7.9940e-08, -1.4126e-07,
        -4.6248e-07, -8.5147e-08, -2.2900e-08, -1.1830e-07, -6.1853e-08,
         3.3195e-08, -6.1781e-08, -2.5705e-05, -1.6973e-07, -9.1643e-07,
        -2.1048e-07, -2.3721e-07, -2.5894e-07, -1.1198e-07, -1.3594e-07,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0025e-07, -4.8226e-07, -1.9035e-08, -4.2701e-08, -2.5096e-08,
        -6.1772e-08, -4.3000e-08, -3.3338e-08, -1.3826e-07, -3.1329e-08,
        -1.1715e-07, -8.4099e-08, -9.2854e-07, -2.0513e-08, -2.0361e-08,
        -1.5570e-07, -1.5249e-08, -1.1685e-08, -4.0255e-08, -1.9969e-08,
        -1.2110e-08, -1.6366e-08, -5.2922e-07, -3.4494e-07, -3.2661e-08,
        -8.9705e-08, -4.0843e-08, -2.9454e-08, -3.2119e-08,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6263e-07, -1.5130e-06, -5.8204e-09, -7.1979e-08, -3.3820e-08,
        -3.7394e-08, -5.0344e-08, -7.5417e-07, -3.2438e-08, -6.5555e-09,
        -1.7777e-08, -1.7514e-07, -1.1526e-07, -1.2599e-07, -4.6326e-07,
        -7.1758e-08, -4.5217e-08, -5.1152e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6744e-07, -7.8198e-07, -2.9879e-08, -3.6459e-08, -2.5132e-08,
        -3.3644e-08, -6.3462e-08, -1.1435e-06, -2.8832e-08, -2.0883e-08,
        -1.6155e-08, -2.2152e-07, -8.1078e-08, -3.3436e-08, -3.4788e-08,
        -4.6346e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8856e-07, -1.0046e-06, -2.2973e-08, -8.2097e-08, -3.2054e-08,
        -4.3840e-08, -8.0788e-08, -5.4334e-07, -3.3883e-08, -3.8125e-08,
        -2.1199e-08, -2.0273e-07, -1.9007e-07, -1.5252e-07, -8.1524e-08,
        -6.0976e-06, -1.1645e-07, -7.9302e-08, -8.2304e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2463e-07, -5.2598e-07, -2.1441e-08, -2.7197e-08, -2.2341e-07,
        -1.7226e-08, -2.8271e-08, -1.4196e-07, -2.9782e-08, -1.0795e-08,
        -1.1136e-08, -2.8876e-08, -1.9533e-08, -1.7300e-08, -3.9931e-08,
        -7.6231e-08, -4.1781e-08, -1.6021e-06, -3.1762e-07, -3.1060e-08,
        -1.8682e-08, -1.5285e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8954e-07, -6.6451e-07, -6.1270e-09, -3.0902e-08, -3.3458e-07,
        -1.6224e-08, -2.8790e-08, -1.3801e-07, -3.6749e-08, -2.3638e-08,
        -1.9074e-08, -1.7418e-08, -1.6133e-08, -2.9519e-08, -3.7707e-08,
        -9.1666e-08, -5.1246e-08, -2.0863e-07, -4.2695e-08, -7.0779e-08,
        -1.8745e-08, -3.3938e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2658e-07, -6.7367e-07, -2.7343e-08, -3.0178e-08, -3.6397e-07,
        -1.1034e-08, -2.9331e-08, -2.0733e-07, -3.8029e-08, -1.2606e-08,
        -1.0561e-08, -2.7157e-08, -1.6663e-08, -1.6150e-08, -4.2951e-08,
        -7.3853e-08, -3.2524e-08, -2.8479e-07, -5.9269e-07, -8.4716e-08,
        -2.5089e-08, -2.1661e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9482e-07, -4.5633e-08, -4.1745e-08, -7.3600e-08, -6.2022e-08,
        -3.1656e-08, -2.9838e-08, -6.5041e-07, -6.4970e-08, -4.1334e-08,
        -3.7607e-09, -2.7469e-08, -9.3509e-08, -5.3681e-07, -6.2892e-08,
        -7.9997e-07, -3.9930e-07, -1.2560e-07, -3.8661e-08, -6.6025e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9199e-08, -3.0592e-08, -4.1464e-08, -5.0051e-08, -6.2681e-08,
        -2.3702e-08, -3.2568e-08, -5.7511e-07, -4.6242e-08, -2.0625e-08,
        -5.1134e-09, -1.9436e-08, -5.5460e-07, -8.4343e-08, -3.0350e-08,
        -9.0205e-07, -3.0156e-08, -6.0634e-08, -5.7568e-08, -2.6089e-08,
        -3.7681e-08, -2.4614e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4200: [tensor([-1.4129e-06, -7.4085e-07, -2.2578e-08, -1.0386e-07, -7.9765e-08,
        -4.8430e-07, -2.3890e-08, -1.2086e-07, -4.2575e-08, -1.6251e-08,
        -2.1138e-08, -1.0983e-07, -1.7504e-08, -2.7867e-08, -5.7440e-07,
        -5.9701e-08, -8.3718e-08, -2.6981e-06, -7.0493e-08, -5.7944e-08,
        -6.1525e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4433e-07, -1.8185e-06, -3.7298e-08, -9.9835e-08, -6.5612e-08,
        -3.0185e-07, -2.5201e-08, -1.0098e-07, -4.8596e-08, -6.6473e-09,
        -3.7666e-08, -8.9691e-08, -2.8001e-08, -3.2166e-08, -6.9471e-07,
        -8.2869e-08, -1.0319e-07, -1.8943e-07, -8.6886e-08, -2.2016e-07,
        -1.8149e-06, -5.6058e-08, -7.7381e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8971e-07, -1.3046e-06, -3.4776e-08, -1.0147e-07, -1.0448e-07,
        -2.5198e-07, -2.9606e-08, -1.0972e-07, -6.4412e-08, -3.4955e-08,
        -3.2700e-08, -1.1060e-07, -3.5675e-08, -5.2094e-08, -4.3145e-07,
        -1.0001e-07, -1.5960e-07, -2.7868e-06, -5.7850e-08, -1.0284e-07,
        -5.4791e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0562e-06, -1.5642e-06, -9.0642e-08, -1.1598e-07, -5.3179e-07,
        -1.0715e-07, -9.4768e-08, -3.9168e-07, -1.0970e-07, -3.2371e-06,
        -5.6278e-08, -2.3868e-08, -6.0657e-08, -2.4116e-06, -1.6693e-07,
        -1.0645e-06, -1.2966e-07, -1.4950e-05, -1.8938e-07, -1.0998e-07,
        -7.4760e-08, -1.0648e-07, -9.3225e-08, -1.1016e-07,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0718e-07, -5.5354e-07, -7.2764e-08, -4.1920e-08, -4.8564e-07,
        -5.5363e-08, -4.1869e-08, -2.1332e-07, -5.8712e-08, -1.9232e-06,
        -3.4679e-08, -2.3602e-08, -3.5157e-08, -8.6900e-06, -1.1220e-07,
        -1.2073e-07, -3.1910e-07, -1.4504e-07, -1.3459e-06, -8.3851e-08,
        -3.0762e-08, -6.8165e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5620e-07, -9.1328e-07, -8.2499e-08, -1.0531e-07, -6.0060e-07,
        -1.3395e-07, -8.0229e-08, -5.0059e-07, -1.0926e-07, -6.4687e-06,
        -7.0553e-08, -2.3967e-08, -5.3541e-08, -1.5893e-05, -1.8645e-07,
        -3.4952e-07, -1.9801e-07, -2.8173e-06, -1.6664e-07, -1.3180e-07,
        -1.9914e-07, -1.5052e-07, -2.9492e-08, -6.4260e-08,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3637e-06, -5.5790e-08, -1.0255e-07, -8.6155e-08, -1.0387e-07,
        -2.7313e-08, -7.9928e-08, -8.7066e-08, -6.7328e-08, -8.3436e-07,
        -4.6718e-08, -9.3223e-08, -1.8962e-06, -1.0849e-07, -3.0696e-08,
        -4.3104e-08, -1.0024e-07, -1.4299e-07, -5.5921e-07, -1.7234e-07,
        -7.0884e-08, -3.0585e-08, -9.2872e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6865e-07, -3.6488e-08, -6.4404e-08, -5.1203e-08, -5.3853e-08,
        -1.7611e-08, -2.3349e-08, -2.7428e-08, -2.6165e-08, -2.6355e-07,
        -2.3928e-08, -5.7258e-08, -1.1200e-06, -5.6330e-08, -6.8973e-09,
        -2.2365e-08, -6.0623e-08, -1.0378e-07, -2.0438e-07, -5.9950e-08,
        -1.4258e-06, -2.8061e-08, -4.2273e-08, -3.2978e-08,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6583e-06, -8.0887e-08, -1.4997e-07, -1.2639e-07, -1.4059e-07,
        -3.6881e-08, -7.6730e-08, -1.6769e-07, -8.2799e-08, -8.1087e-07,
        -3.8920e-08, -1.6857e-07, -4.7771e-06, -1.0152e-07, -4.3506e-09,
        -1.4182e-08, -1.3237e-07, -1.5991e-07, -6.7816e-07, -8.2640e-08,
        -7.3954e-08, -7.5160e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5263e-06, -2.3261e-07, -1.3237e-07, -1.1250e-07, -8.8229e-07,
        -3.1920e-08, -3.7498e-07, -6.5881e-08, -6.1991e-07, -1.7465e-07,
        -3.2295e-07, -1.0146e-06, -1.3563e-07, -5.5061e-08, -2.7398e-08,
        -1.6003e-07, -2.2102e-07, -4.7440e-07, -2.7168e-07, -5.7140e-06,
        -6.3681e-08, -3.2708e-07, -3.5877e-05, -1.3062e-07, -8.1728e-08,
        -1.6960e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0695e-06, -1.0243e-07, -6.6582e-08, -6.3583e-08, -4.8773e-07,
        -4.0025e-08, -2.1220e-07, -5.9710e-08, -3.3073e-07, -6.0744e-08,
        -1.7877e-07, -8.2112e-07, -6.0175e-08, -2.2545e-08,  7.1060e-09,
        -2.8669e-06, -1.0556e-07, -1.0645e-07, -1.2797e-07, -1.3459e-07,
        -1.5551e-07, -1.0732e-07, -1.6734e-05, -2.1082e-07, -5.2848e-08,
        -6.8015e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5341e-07, -6.5952e-08, -5.7968e-08, -2.9961e-08, -6.2144e-07,
        -2.3624e-08, -1.0634e-07, -5.6774e-08, -2.6210e-07, -1.0029e-07,
        -7.5331e-08, -7.8390e-07, -5.0404e-08, -2.5310e-08, -2.2006e-08,
        -6.8374e-08, -6.9562e-08, -1.4745e-07, -2.6019e-06, -3.9203e-08,
        -8.5419e-08, -2.6543e-06, -2.6389e-07, -4.4833e-08, -9.8366e-08,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4250: [tensor([-3.0142e-07, -1.3302e-06, -3.3917e-08, -1.1695e-07, -4.8578e-08,
        -8.8275e-08, -1.6698e-08, -3.1523e-08, -3.4733e-07, -1.3003e-07,
        -1.1014e-08,  2.3463e-10, -2.4909e-08, -1.0863e-07, -8.0794e-08,
        -3.5791e-06, -1.6263e-07, -6.8507e-08, -3.9212e-08, -4.8097e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7869e-07, -2.3302e-06, -2.6880e-07, -5.2751e-07, -9.6171e-08,
        -5.0277e-07, -8.2944e-08, -4.7524e-08, -1.0398e-06, -3.6994e-07,
        -8.0231e-08, -3.1293e-08, -1.4254e-07, -4.0101e-07, -5.1344e-07,
        -3.2407e-07, -3.4196e-07, -3.6399e-05, -3.2277e-07, -1.2743e-07,
        -1.8191e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4693e-07, -6.6243e-07, -3.4650e-08, -2.8551e-08, -1.3336e-07,
        -1.2764e-08, -3.0741e-08, -2.3780e-08, -2.5799e-07, -1.7865e-09,
        -3.7399e-08, -6.3878e-09, -9.7586e-09, -8.3536e-09, -5.8281e-07,
        -4.1086e-08, -2.4456e-07, -2.9860e-08, -2.4609e-08, -1.5673e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9913e-07, -1.1360e-06, -8.3946e-08, -1.0064e-07, -2.9109e-07,
        -6.9295e-08, -9.7339e-08, -5.1327e-08, -6.3703e-07, -7.4201e-09,
        -1.1539e-07, -3.8280e-08, -2.1938e-08, -3.1448e-08, -2.3608e-06,
        -1.6177e-07, -5.0603e-07, -5.6673e-08, -1.0726e-07, -2.9739e-07,
        -8.1552e-08, -6.0902e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7822e-07, -9.7012e-07, -3.8923e-08, -8.3066e-08, -1.5846e-07,
        -6.6827e-08, -7.2838e-08, -3.8288e-08, -5.1489e-07, -9.6507e-09,
        -6.4272e-08, -1.8681e-08, -2.0424e-09, -3.7171e-08, -1.2802e-06,
        -7.0743e-08, -4.8159e-07, -2.6647e-07, -6.4560e-08, -3.7727e-08,
        -2.1897e-08, -2.9881e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1683e-06, -8.3597e-07, -4.7174e-08, -1.3950e-07, -2.9825e-08,
        -1.9316e-07, -2.4830e-08, -6.0668e-08, -3.0066e-08, -2.0572e-07,
        -1.8434e-07, -4.3876e-07, -7.5477e-08, -8.0841e-08, -2.8905e-07,
        -6.5033e-09, -3.7516e-08, -2.7522e-08, -1.2393e-08, -3.2568e-06,
        -7.9074e-08, -8.0562e-07, -7.3107e-08, -1.1792e-07, -2.9850e-06,
        -4.4230e-08, -1.0778e-07, -3.8023e-08, -6.3412e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2288e-07, -6.5432e-07, -3.0378e-08, -6.8114e-08, -2.1501e-08,
        -1.8734e-07, -1.5979e-08, -3.8827e-08, -1.7286e-08, -7.3384e-08,
        -1.2132e-07, -3.3963e-07, -3.4188e-08, -6.0787e-08, -3.1869e-07,
        -1.9906e-08, -3.4429e-08, -6.5059e-09, -1.7246e-08, -1.2710e-07,
        -4.4348e-08, -1.7582e-07, -2.7359e-07, -3.0635e-08, -5.3608e-08,
        -4.9685e-08, -5.5621e-08, -8.1049e-07, -6.1917e-07, -1.4621e-07,
        -6.0256e-08, -3.7835e-08, -3.0089e-08, -2.4357e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7275e-07, -1.4061e-06, -4.0883e-08, -1.0348e-07, -4.6957e-08,
        -1.8255e-07, -3.0079e-08, -5.3400e-08, -2.3231e-08, -2.6214e-07,
        -1.6252e-07, -6.5273e-07, -6.0167e-08, -1.1223e-07, -5.5281e-07,
        -1.9993e-08, -3.2616e-08, -3.1459e-08, -2.0526e-08, -1.0347e-06,
        -6.8567e-08, -2.3684e-07, -6.5772e-08, -5.0117e-08, -1.8627e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7277e-07, -1.1049e-06, -3.9498e-08, -1.1443e-07, -3.2166e-09,
        -3.1483e-08, -3.2140e-08, -8.6717e-08, -5.2131e-07, -4.3330e-08,
        -1.0139e-07, -1.6276e-07, -7.9574e-08, -7.6153e-08, -2.4697e-09,
        -2.8606e-08, -2.6138e-06, -8.0581e-08, -2.2343e-07, -5.7094e-08,
        -6.7488e-08, -8.1273e-08, -9.3620e-08, -2.6245e-06, -1.5087e-07,
        -2.3863e-07, -6.0305e-07, -1.1602e-07, -3.2974e-08, -4.2991e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2403e-08, -6.1841e-07, -4.4691e-08, -1.1788e-07, -9.0391e-09,
        -4.2879e-08, -4.7027e-08, -9.2129e-08, -4.8859e-07, -3.8319e-08,
        -9.3844e-08, -1.7183e-07, -5.2794e-08, -6.4952e-08, -9.6662e-09,
        -1.2149e-08, -1.0658e-06, -7.7408e-08, -1.5269e-07, -1.9539e-08,
        -8.1438e-08, -1.0421e-07, -4.3981e-08, -5.9114e-06, -6.8102e-08,
        -1.8147e-07, -7.1906e-07, -1.0130e-07, -3.6465e-08, -4.8660e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4521e-07, -2.9201e-06, -2.6919e-08, -9.4132e-08, -5.8086e-09,
        -3.9426e-08, -3.1798e-08, -5.1308e-08, -3.0704e-07, -5.3037e-08,
        -5.5969e-08, -8.5449e-08, -3.0765e-08, -3.8722e-08, -5.9693e-09,
        -1.4745e-08, -9.3222e-07, -6.2976e-08, -1.3732e-07, -1.6169e-08,
        -6.3231e-08, -1.0323e-07, -2.3554e-07, -2.1532e-08, -3.5167e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3798e-06, -1.2999e-06, -1.1085e-07, -7.9299e-08, -6.2691e-09,
        -5.4581e-08, -4.7824e-07, -1.1401e-07, -9.5180e-08, -9.1356e-08,
        -1.0512e-07, -7.2935e-08, -1.9325e-07, -1.6345e-06, -1.4167e-08,
        -3.3178e-08, -6.4729e-08, -1.8628e-05, -1.4810e-07, -1.4250e-07,
        -1.1784e-07, -1.6077e-07, -7.7509e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4300: [tensor([-2.8024e-06, -1.3207e-06, -1.4660e-07, -6.6617e-07, -1.3514e-07,
        -9.8392e-08, -7.3704e-08, -5.2733e-07, -7.5397e-08, -3.1166e-08,
        -2.5303e-07, -4.8906e-06, -7.3685e-08,  2.9527e-08, -6.0770e-08,
        -1.2607e-07, -2.2559e-07, -6.6368e-07, -1.1801e-07,  5.5747e-08,
         1.7197e-09, -5.2746e-06, -1.6558e-07, -8.9271e-07, -2.4766e-07,
        -2.6446e-07, -2.7066e-05, -2.4519e-07, -4.1330e-07, -1.3421e-06,
        -9.6365e-07, -2.1035e-07, -1.0111e-07, -1.1264e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0648e-06, -2.6945e-06, -9.9863e-08, -4.5668e-07, -6.1924e-07,
        -3.1230e-08, -2.9828e-08, -1.5218e-07, -5.2181e-07,  8.3957e-09,
        -6.3659e-08, -7.8598e-06, -6.3818e-08, -3.3648e-08,  2.2643e-08,
        -1.3465e-08, -2.1163e-06, -1.7217e-07, -2.0826e-07, -5.7468e-08,
        -3.5235e-08, -6.6811e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5226e-06, -1.5226e-06, -8.4057e-08, -4.7378e-07, -6.4846e-07,
        -6.3918e-08, -1.6644e-08, -1.4281e-07, -6.0101e-07, -2.9463e-09,
        -1.1261e-07, -2.9255e-06, -9.1482e-08, -5.2929e-08,  8.2921e-09,
        -1.6307e-08, -1.1209e-06, -1.9630e-07, -1.2543e-07, -1.7347e-07,
        -2.2150e-08, -3.6072e-08, -1.1536e-05, -1.7334e-07, -5.8830e-08,
        -9.8511e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6069e-06, -2.3077e-06, -2.7873e-07, -1.3558e-06, -1.4053e-06,
        -1.4228e-07, -3.6697e-08, -4.1694e-07, -1.3144e-06,  4.6920e-08,
        -2.7924e-07, -7.4034e-06, -3.0596e-07, -9.4879e-08,  5.2222e-08,
        -9.6757e-08, -1.9157e-06, -3.0631e-07, -2.7486e-07, -3.6390e-07,
        -2.6085e-07, -2.0667e-07, -4.4965e-05, -1.7885e-07, -5.7837e-08,
        -2.3052e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6493e-07, -3.2734e-08, -5.1734e-08, -4.2230e-08, -1.0168e-07,
        -1.3209e-08, -3.7994e-08, -2.1417e-08, -6.6707e-09, -4.8977e-08,
        -1.1343e-08, -1.6655e-08, -1.4116e-08, -2.5395e-08, -3.4404e-08,
        -2.9462e-07, -5.3002e-08, -3.1794e-08, -2.4915e-08, -2.4213e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3442e-07, -5.4474e-08, -1.8836e-07, -7.5836e-08, -1.8560e-07,
        -4.1818e-08, -9.1059e-08, -2.6340e-08, -1.3237e-08, -4.3084e-08,
        -3.1554e-08, -3.5415e-08, -9.7943e-09, -4.2985e-08, -6.0117e-08,
        -3.8977e-07, -4.4102e-08, -2.7893e-06, -8.9289e-08, -1.4478e-07,
        -5.1143e-08, -3.7857e-08, -1.1103e-07, -1.0711e-07, -2.3450e-08,
        -4.5274e-08, -6.7983e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4720e-07, -4.3622e-08, -1.3401e-07, -4.4451e-08, -1.0647e-07,
        -5.5821e-09, -3.2552e-08, -3.0485e-08, -1.0269e-08, -4.8115e-08,
        -1.5167e-08, -8.5907e-09, -2.1547e-08, -1.1779e-08, -4.1420e-08,
        -3.5126e-07, -2.8792e-08, -5.5854e-08, -2.9028e-07, -3.0699e-08,
        -1.3837e-08, -3.4665e-08, -2.3504e-08, -2.5208e-08, -2.4470e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2007e-06, -3.7322e-08, -2.1586e-08, -2.4341e-08, -2.2132e-08,
        -5.1369e-08, -2.6694e-07, -8.2342e-08, -1.8519e-08, -2.7771e-08,
        -8.7763e-07, -5.9870e-08, -6.6628e-08, -9.0040e-08, -4.8120e-08,
        -5.0007e-08, -4.5561e-07, -5.7296e-08, -1.2400e-07, -1.5450e-06,
        -5.8337e-08, -4.3564e-08, -4.4698e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5097e-07, -1.5256e-08, -6.7625e-09, -1.9696e-08, -6.1527e-09,
        -2.2535e-08, -9.5946e-08, -4.1211e-08, -8.5180e-09, -1.2817e-08,
        -9.8832e-07, -2.3647e-08, -2.2219e-08, -4.9123e-08, -2.8453e-08,
        -3.2275e-08, -5.1141e-08, -3.2877e-08, -1.4261e-06, -3.2373e-08,
        -8.8502e-08, -5.0793e-07, -2.5001e-08, -1.6411e-08, -2.7169e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0106e-07, -2.7627e-08, -2.2012e-08, -1.5539e-08, -1.9270e-08,
        -2.8997e-08, -1.9112e-07, -8.4666e-08, -1.3518e-08, -1.3132e-08,
        -6.8968e-07, -5.0676e-08, -4.7568e-08, -4.6975e-08, -1.0230e-06,
        -1.2075e-07, -6.8756e-08, -4.1330e-07, -4.9335e-08, -1.6649e-07,
        -2.4446e-08, -3.0791e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4880e-07, -1.5263e-06, -2.7035e-08, -1.7324e-07, -7.9576e-08,
        -3.7015e-09, -1.0952e-08, -5.4515e-08, -2.8360e-08, -8.4365e-08,
        -1.1628e-08, -2.3583e-08, -1.4992e-07, -7.5855e-08, -5.5522e-08,
        -8.1325e-06, -6.0926e-08, -5.5504e-08, -1.2367e-07, -3.5701e-08,
        -5.3088e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8122e-07, -1.6595e-06, -2.6389e-08, -1.4502e-07, -4.3853e-08,
        -6.9633e-09, -2.4041e-08, -2.5080e-08, -3.6522e-08, -6.8919e-08,
        -1.8028e-09, -2.2176e-08, -8.6507e-08, -1.2101e-07, -8.8955e-08,
        -1.8285e-06, -9.8946e-08, -1.6618e-06, -2.4384e-06, -7.9590e-08,
        -3.9801e-08, -5.6981e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4350: [tensor([-2.2779e-07, -4.1255e-08, -3.4185e-08, -5.0465e-07, -3.1040e-08,
        -7.0914e-08, -4.7102e-08, -2.1060e-08, -2.5199e-08, -2.9195e-08,
        -3.5135e-08, -5.5749e-08, -5.2075e-08, -5.6622e-08, -5.1492e-08,
        -7.9351e-09, -2.6635e-08, -5.8537e-08, -8.4430e-08, -5.1091e-07,
        -9.7551e-07, -8.3292e-07, -3.3503e-08, -3.8478e-08, -7.3897e-08,
        -2.9375e-08, -2.9932e-08, -9.3764e-08, -8.2966e-07, -1.1259e-07,
        -6.7804e-08, -3.8055e-08, -5.1218e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7800e-06, -1.0279e-06, -9.0021e-08, -1.8506e-07, -4.6438e-07,
        -1.0292e-07, -1.1990e-07, -1.3586e-07, -5.4115e-08, -3.0257e-07,
        -1.4158e-07, -1.4126e-07, -1.1820e-07, -1.8319e-07, -1.1097e-07,
        -1.6564e-07, -1.2900e-07, -4.9442e-06, -3.0941e-07, -1.5807e-07,
         4.9678e-08, -4.0896e-08, -2.4505e-05, -1.6390e-07, -2.7170e-06,
        -2.1341e-07, -2.1291e-07, -2.5348e-07, -9.4210e-06, -3.6844e-07,
        -2.3516e-07, -1.7081e-07, -2.2171e-07, -9.8618e-08, -1.9705e-07,
        -3.4721e-07, -2.0965e-07, -1.3174e-07, -1.7610e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7108e-06, -1.2147e-06, -1.2754e-07, -3.0651e-07, -5.2124e-07,
        -1.8217e-07, -1.0831e-07, -1.6643e-07, -7.9357e-08, -5.4509e-07,
        -1.7684e-07, -1.4336e-07, -1.4159e-07, -2.2806e-07, -1.4443e-07,
        -2.0133e-07, -1.8866e-07, -3.3160e-06, -3.3694e-07, -1.4735e-07,
         1.7302e-08, -7.9742e-08, -3.0017e-05, -2.1980e-07, -2.4904e-06,
        -3.6318e-07, -1.0689e-05, -2.4614e-07, -2.3975e-07, -1.0098e-07,
        -2.0994e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0715e-06, -1.9641e-06, -1.1616e-07, -3.3066e-07, -5.7930e-07,
        -1.3009e-07, -1.3220e-07, -1.7648e-07, -7.8673e-08, -4.9574e-07,
        -8.1378e-08, -2.0680e-07, -2.0102e-07, -2.7000e-07, -1.5052e-07,
        -2.4199e-07, -2.3567e-07, -5.6706e-06, -4.3597e-07, -1.4585e-07,
         3.1494e-08, -1.1659e-07, -3.5045e-05, -2.7438e-07, -2.1948e-07,
        -1.2108e-05, -1.0678e-06, -2.5979e-07, -9.3279e-08, -1.7427e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1461e-06, -5.0429e-07, -4.7606e-07, -4.7520e-07, -2.6944e-06,
        -2.0564e-07, -8.8949e-08, -3.1184e-07, -4.6422e-07, -5.2498e-06,
        -6.6666e-07, -8.0758e-05, -1.2595e-06, -5.1100e-07, -6.0301e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4038e-07, -4.6994e-08, -1.0479e-07, -4.8464e-08, -4.5124e-07,
        -5.3351e-08, -2.1219e-08, -3.2788e-08, -6.7825e-08, -2.0367e-06,
        -6.6064e-08, -8.1768e-08, -6.1580e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6224e-07, -1.1928e-07, -1.7102e-07, -6.9948e-08, -8.1660e-07,
        -6.3796e-08, -2.9105e-08, -3.3922e-08, -2.0243e-07, -4.8371e-06,
        -9.2835e-08, -9.6758e-08, -1.3577e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2063e-07, -9.9321e-07, -1.8185e-08, -5.6762e-07, -3.3824e-07,
        -3.5879e-08, -4.9306e-08, -1.3172e-07, -1.6211e-08, -4.2070e-07,
        -3.0665e-08, -1.7888e-07, -4.7464e-08, -8.3022e-08, -4.6764e-08,
        -6.6629e-07, -6.5431e-08, -1.7169e-08, -1.3894e-08, -1.9462e-07,
        -4.6431e-07, -5.0447e-06, -6.9816e-08, -1.1395e-07, -3.8205e-08,
        -1.0685e-06, -8.8641e-08, -1.9132e-07, -1.4014e-08, -3.5627e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.2997e-07, -2.6369e-06, -1.9027e-08, -2.6565e-07, -2.2797e-07,
        -2.1180e-08, -4.0165e-08, -1.5570e-07, -4.4993e-09, -1.9782e-07,
        -1.8987e-08, -7.0577e-08, -1.8769e-08, -4.7319e-08, -3.1667e-08,
        -4.3062e-07, -4.5687e-08, -1.7457e-08, -1.1160e-08, -1.3622e-07,
        -6.8355e-07, -2.2027e-06, -3.2104e-08, -9.3421e-08, -5.0112e-08,
        -5.9347e-08, -1.2233e-07, -4.2277e-08, -4.4269e-08, -1.0358e-07,
        -3.2399e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5233e-08, -8.5713e-07, -4.2395e-08, -4.4944e-07, -4.9129e-07,
        -4.1494e-08, -1.2281e-07, -3.0704e-07, -1.9373e-08, -5.1541e-07,
        -5.0698e-08, -2.3633e-07, -6.7718e-08, -1.3622e-07, -4.7140e-08,
        -1.0599e-06, -9.5047e-08, -1.6117e-08, -3.7023e-08, -2.5916e-07,
        -8.3057e-07, -3.5793e-06, -4.9389e-08, -4.4280e-08, -5.7280e-06,
        -8.5883e-08, -6.7544e-07, -1.6106e-07, -9.5011e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8768e-07, -1.3850e-06, -6.4158e-08, -5.9691e-08, -9.5616e-08,
        -5.9425e-08, -3.1751e-08, -6.4284e-07, -4.3092e-08, -5.7832e-08,
        -7.0735e-07, -1.7844e-07, -2.7856e-08, -4.0544e-08, -7.1046e-08,
        -2.3779e-07, -1.3473e-07, -1.5178e-07, -2.3326e-06, -1.1638e-07,
        -4.4064e-08, -5.6369e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5575e-07, -4.1129e-07, -5.3696e-08, -6.1051e-08, -1.0708e-07,
        -3.3580e-08, -4.4618e-08, -9.7743e-07, -5.3639e-08, -4.4486e-08,
        -5.3929e-07, -1.0595e-07, -9.4337e-09, -3.2769e-08, -7.5454e-08,
        -9.8540e-08, -6.2361e-08, -7.6978e-06, -9.2625e-08, -1.6881e-07,
        -1.0683e-07, -3.6409e-08, -4.6372e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4400: [tensor([-7.4311e-07, -3.9644e-08, -5.0909e-08, -3.2170e-08, -4.8379e-07,
        -7.6081e-09, -3.0753e-08, -5.8371e-08, -1.7905e-07, -4.0445e-06,
        -2.9414e-08, -1.0876e-07, -3.5379e-09, -2.9774e-08, -1.1019e-07,
        -1.1933e-07, -1.1743e-07, -7.1631e-08, -1.2592e-06, -1.7233e-07,
        -8.8030e-09, -3.2281e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0976e-07, -4.1292e-08, -4.9405e-08, -2.7813e-08, -5.1894e-07,
        -2.1457e-08, -2.4762e-08, -5.4847e-08, -2.0851e-07, -2.9753e-06,
        -2.7047e-08, -1.3716e-07, -1.0593e-08, -2.7046e-08, -1.2339e-07,
        -1.0037e-07, -1.5844e-07, -7.8217e-08, -7.3912e-08, -4.0922e-06,
        -2.0689e-07, -4.9126e-08, -2.4951e-07, -6.1064e-07, -1.2258e-07,
        -2.8366e-08, -3.5870e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4074e-07, -6.5902e-08, -5.7768e-08, -6.8051e-08, -4.5677e-07,
        -3.1914e-08, -1.1911e-07, -2.6468e-08, -1.2978e-08, -5.6225e-06,
        -7.7958e-08, -1.0308e-07,  1.2353e-09, -3.7259e-08, -1.2502e-07,
        -8.8937e-08, -8.8173e-08, -1.6092e-07, -2.9365e-06, -9.1051e-08,
        -3.1407e-08, -2.9686e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4612e-07, -4.8025e-08, -5.2182e-08, -3.5338e-08, -3.6619e-07,
        -1.7953e-08, -1.0017e-07, -3.1665e-08, -4.5552e-09, -1.4921e-06,
        -4.5928e-08, -1.1377e-07, -8.1469e-09, -2.9924e-08, -9.8757e-08,
        -8.6956e-08, -3.4427e-08, -3.8766e-08, -1.6179e-06, -1.9111e-07,
        -2.6676e-08, -4.8847e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3214e-07, -4.4922e-08, -5.0652e-08, -2.7301e-08, -4.8103e-07,
        -3.1661e-08, -9.2691e-08, -2.7599e-08, -1.2883e-08, -2.3887e-06,
        -4.9931e-08, -2.4973e-07, -2.4355e-08, -3.4658e-08, -1.6035e-07,
        -8.5356e-08, -7.2364e-08, -6.5012e-08, -1.5727e-07, -3.6528e-08,
        -7.1998e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7472e-07, -1.4026e-06, -2.7241e-08, -3.8764e-08, -1.9418e-07,
        -4.2685e-07, -2.2909e-08, -2.7491e-08, -4.2639e-08, -3.2772e-06,
        -3.1793e-08, -6.2236e-08, -8.5362e-09, -2.0172e-08, -1.2869e-06,
        -7.7886e-08, -6.1069e-08, -5.6459e-08, -4.9812e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5133e-07, -8.0541e-07, -2.4254e-08, -2.4389e-08, -1.3855e-07,
        -3.4637e-07, -1.9254e-08, -2.4826e-08, -2.5794e-08, -1.1538e-06,
        -1.2901e-08, -3.9796e-08, -7.9792e-09, -2.2945e-08, -8.4279e-07,
        -5.3726e-08, -6.0504e-08, -5.4950e-08, -6.1360e-08, -1.6994e-07,
        -4.8242e-07, -3.3514e-06, -5.2781e-08, -2.8841e-08, -3.5074e-08,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5129e-07, -8.1368e-07, -3.3961e-08, -4.8302e-08, -2.4866e-07,
        -5.4521e-07, -6.3504e-08, -3.3728e-08, -5.7532e-08, -2.4054e-06,
        -4.7096e-08, -8.7536e-08, -9.2855e-09, -3.7750e-08, -8.1267e-07,
        -1.1215e-07, -4.5618e-07, -1.2784e-05, -1.0887e-07, -1.4381e-07,
        -8.9489e-08, -2.1873e-07, -3.6588e-08, -7.1711e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3574e-07, -1.0248e-07, -5.0864e-08, -7.9389e-08, -9.7947e-09,
        -3.4926e-07, -6.6277e-08, -3.8346e-08, -6.1449e-07, -4.5469e-08,
        -2.3065e-07, -7.0568e-08, -1.1799e-08, -3.9720e-08, -9.5161e-08,
        -1.3039e-07, -2.6819e-07, -5.8833e-08, -3.7062e-06, -1.7261e-07,
        -1.8883e-08, -3.0904e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4787e-07, -9.8557e-08, -5.4099e-08, -5.4244e-08, -8.0533e-09,
        -3.2679e-07, -6.9930e-08, -4.3006e-08, -8.7049e-07, -8.9911e-08,
        -1.8959e-07, -7.0807e-08, -1.7633e-08, -3.2459e-08, -1.1583e-07,
        -6.6789e-08, -2.7360e-07, -7.6646e-08, -4.5261e-08, -5.1400e-07,
        -3.1062e-07, -2.7379e-08, -7.5803e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7048e-08, -1.1495e-07, -4.3188e-08, -5.2228e-08, -1.0802e-08,
        -2.1540e-07, -3.2179e-08, -4.8320e-08, -4.6902e-07, -4.9826e-08,
        -1.5522e-07, -6.1910e-08, -2.5338e-08, -2.3061e-08, -1.0103e-07,
        -6.9235e-08, -1.5361e-07, -3.2142e-08, -1.5933e-07, -6.5725e-07,
        -2.5054e-08, -1.6370e-08, -9.7369e-08, -4.1135e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8465e-07, -1.5876e-06, -2.1678e-07, -5.6729e-08, -1.4868e-07,
        -4.2826e-07, -8.9281e-07, -6.6051e-08, -1.5322e-07, -2.1170e-07,
        -1.0853e-07, -1.4888e-05, -6.2974e-08, -2.6879e-08, -4.0830e-08,
        -7.3352e-06, -2.5475e-07, -7.2530e-06, -3.1689e-07, -6.0515e-06,
        -9.9598e-08, -7.5085e-08, -1.6904e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4450: [tensor([-7.3088e-07, -3.5666e-08, -4.7816e-08, -9.0102e-08, -1.6932e-08,
        -3.3333e-08, -1.4481e-07, -7.8105e-07, -2.6438e-08, -1.8815e-08,
        -7.5296e-09, -2.2710e-08, -8.2561e-08, -9.3222e-08, -2.6329e-08,
        -6.2533e-08, -6.8531e-08, -3.9573e-08, -5.6485e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9115e-07, -8.6589e-08, -1.8592e-07, -6.1740e-07, -8.0226e-08,
        -1.9635e-08, -1.5877e-07, -6.5880e-08, -7.5347e-08, -3.8892e-08,
        -1.6597e-07, -3.6588e-07, -1.1677e-06, -2.9179e-07, -3.7951e-07,
        -1.0507e-07, -1.6105e-07, -2.1011e-05, -1.4810e-07, -7.3799e-08,
        -1.1784e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5378e-08, -7.8732e-08, -7.3714e-08, -5.5805e-07, -4.1449e-08,
        -2.6887e-08, -6.8913e-08, -6.2542e-08, -2.5898e-08, -8.5514e-09,
        -8.3266e-08, -1.5521e-06, -6.8968e-08, -7.0770e-06, -1.3301e-07,
        -6.0207e-08, -7.1823e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5450e-07, -6.0615e-08, -8.8241e-08, -3.0958e-07, -4.3439e-08,
        -1.8252e-09, -8.9505e-08, -4.7888e-08, -3.6464e-08, -2.9960e-08,
        -8.1815e-08, -2.6506e-07, -1.0324e-06, -6.9273e-08, -2.2471e-07,
        -5.2637e-08, -1.0877e-06, -2.4663e-07, -5.5931e-08, -8.8674e-08,
        -7.2677e-06, -5.0155e-08, -6.3766e-08, -5.2430e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2149e-07, -3.3687e-08, -3.0387e-08, -3.3240e-08, -3.5579e-08,
        -1.4667e-08, -4.4629e-08, -1.6035e-08, -3.4591e-08, -4.7333e-08,
        -4.4436e-08, -3.2190e-06, -2.5545e-08, -1.0023e-07, -3.0895e-08,
         2.9794e-09, -1.5229e-08, -4.7383e-08, -7.9388e-08, -3.4142e-07,
        -3.3658e-08, -1.2040e-07, -1.7719e-07, -4.5315e-07, -8.6437e-08,
        -1.2637e-08, -1.7069e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0049e-08, -2.5321e-08, -2.4455e-08, -4.5398e-08, -3.9155e-08,
        -8.1157e-09, -2.8953e-08, -2.4898e-08, -6.4790e-08, -5.4911e-08,
        -3.6286e-08, -3.6338e-06, -1.3501e-08, -1.1740e-07, -5.0464e-08,
        -8.5675e-09, -1.5815e-08, -8.0108e-08, -1.3892e-07, -3.4163e-07,
        -2.2021e-08, -1.8746e-09, -6.0913e-08, -3.0350e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8410e-06, -8.9161e-08, -7.7866e-08, -1.1447e-07, -5.9622e-08,
        -3.4698e-08, -1.1087e-07, -3.9186e-08, -1.2387e-07, -8.6460e-08,
        -1.2349e-07, -2.6053e-06, -7.8327e-08, -2.3800e-07, -7.1912e-08,
         1.2551e-09, -5.5527e-08, -1.2138e-07, -2.0225e-07, -8.8435e-08,
        -8.6462e-08, -1.4349e-06, -5.0395e-07, -2.6165e-07, -3.7153e-07,
        -1.4669e-07, -1.7170e-07, -1.7533e-05, -7.8278e-08, -8.4724e-08,
        -3.1652e-08, -9.8174e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0260e-06, -6.0288e-08, -9.0851e-08, -7.8780e-08, -1.6477e-08,
        -3.5907e-08, -4.2464e-07, -3.0207e-09, -5.8662e-08, -8.5816e-08,
        -3.3973e-08, -1.5302e-07, -2.8380e-08, -3.5005e-08, -5.9149e-07,
        -9.2351e-08, -6.7329e-08, -1.9988e-06, -4.9688e-08, -1.8080e-06,
        -1.0887e-06, -2.9159e-08, -3.7812e-08, -4.1083e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4940e-07, -3.8979e-08, -5.1494e-08, -6.0655e-08, -6.8484e-09,
        -2.4349e-08, -3.9549e-07, -1.6722e-09, -5.7727e-08, -4.7689e-08,
        -2.5684e-08, -7.9153e-08, -2.2022e-08, -2.4770e-08, -5.7986e-08,
        -4.4804e-07, -3.6673e-08, -1.5875e-07, -6.4471e-07, -3.5872e-08,
        -1.4781e-07, -5.6974e-08, -2.0687e-07, -4.0520e-08, -7.7415e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8873e-08, -2.7111e-08, -4.7872e-08, -3.7701e-08, -2.6589e-08,
        -3.3850e-08, -4.6276e-07, -4.6266e-09, -5.1216e-08, -2.2454e-08,
        -2.0175e-08, -9.6731e-08, -2.6183e-08, -2.9634e-08, -7.1634e-08,
        -4.3601e-07, -4.0830e-08, -8.3663e-08, -3.2193e-08, -4.8550e-08,
        -1.1908e-07, -3.6458e-08, -6.9272e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1019e-06, -1.4572e-07, -1.2540e-07, -9.9341e-08, -3.2626e-07,
        -6.6285e-08, -1.3209e-07, -6.7560e-08, -1.3332e-07, -1.3278e-07,
        -5.8701e-08, -1.0514e-06, -1.2884e-06,  1.6910e-08,  2.5242e-08,
        -3.0714e-08, -1.5787e-07, -1.6334e-07, -3.7612e-07, -2.5745e-07,
        -1.0787e-07, -1.8925e-07, -4.1930e-06, -1.4234e-07, -4.0460e-07,
        -8.9361e-07, -1.4975e-05,  7.4571e-09, -7.4800e-08, -5.6280e-08,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2032e-07, -7.8215e-08, -1.2827e-07, -7.2510e-08, -2.8198e-07,
        -5.6422e-08, -1.1217e-07, -2.6237e-08, -8.2460e-08, -6.2083e-07,
        -3.2943e-08, -6.7289e-07, -1.2490e-06, -2.7455e-09, -1.7212e-08,
        -3.2542e-08, -1.0159e-07, -1.1956e-07, -3.2800e-07, -7.8218e-07,
        -1.0169e-05, -1.0866e-08, -6.8676e-08, -4.9685e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4500: [tensor([-7.4891e-06, -2.1824e-06, -2.1792e-07, -3.2234e-07, -1.3784e-07,
        -2.4378e-07, -1.2316e-07, -1.0494e-07, -2.9328e-07, -2.1139e-07,
        -1.9561e-07, -5.6642e-07, -1.9672e-06, -1.8884e-07,  3.7613e-08,
        -1.3919e-07, -2.7921e-06, -2.6516e-07, -1.5413e-06, -3.8922e-07,
        -2.9920e-07, -5.4079e-07, -3.6231e-05, -4.0868e-07, -1.0095e-06,
        -3.6505e-07, -3.0466e-08, -2.3093e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2348e-06, -1.2646e-06, -4.5577e-08, -4.5091e-08, -1.4436e-08,
        -3.6285e-08, -3.0584e-08, -3.3873e-09, -7.2758e-08, -6.1765e-08,
        -3.4970e-08, -1.1949e-07, -6.4947e-07, -1.1045e-08, -6.2783e-09,
        -1.1256e-08, -1.2662e-06, -1.1166e-07, -1.7499e-07, -2.0891e-08,
        -2.0715e-07, -7.3773e-08, -6.0170e-08, -5.3595e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1793e-07, -1.1784e-07, -1.0450e-07, -1.0899e-07, -1.1515e-06,
        -5.2175e-08, -2.9855e-08, -4.3678e-08, -1.5113e-07, -1.7142e-06,
        -7.2005e-08, -1.9147e-07, -9.5200e-08, -9.3176e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7481e-07, -9.5746e-08, -8.2409e-08, -6.7090e-08, -5.4056e-07,
        -4.8346e-08, -2.1124e-08, -3.2924e-08, -5.1099e-07, -1.0660e-07,
        -3.3845e-07, -7.9050e-08, -8.0743e-08, -7.4958e-08, -5.6267e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2095e-07, -5.6025e-08, -3.3846e-08, -6.0743e-08, -6.0765e-07,
        -5.8025e-08, -3.5841e-08, -4.1228e-08, -5.0834e-07, -1.8745e-07,
        -1.7011e-07, -1.8651e-07, -1.9619e-06, -1.2351e-06, -4.7221e-08,
        -6.5936e-08, -6.7822e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9939e-07, -1.3265e-06, -4.1624e-08, -3.7222e-08, -3.4153e-07,
        -4.2665e-08, -2.9719e-08, -2.3538e-08, -3.8783e-08, -4.8547e-08,
        -2.1987e-08, -2.6811e-06, -4.2710e-08, -2.3761e-07, -2.3827e-08,
        -8.3008e-08, -2.9348e-07, -7.7941e-08, -3.3562e-09, -1.4311e-08,
        -5.1836e-06, -7.3960e-08, -1.7907e-07, -7.6881e-08, -8.7792e-08,
        -3.9300e-08, -1.8722e-07, -4.9369e-08, -2.6954e-08, -8.7643e-07,
        -5.9081e-08, -9.4059e-08, -5.8125e-08, -1.3899e-08, -5.4419e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8330e-06, -1.2806e-06, -1.3072e-07, -1.8269e-07, -8.0924e-07,
        -1.0918e-07, -6.6382e-08, -6.3560e-08, -1.0996e-07, -1.3693e-07,
        -7.0533e-08, -3.4272e-06, -7.5518e-08, -4.5143e-07, -1.0962e-07,
        -2.6914e-07, -9.6568e-07, -2.8428e-07, -9.0064e-09, -4.5014e-08,
        -2.4900e-06, -3.7607e-07, -1.4686e-05, -1.5292e-07, -1.6805e-07,
        -2.1435e-07, -5.4377e-08, -1.3717e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7101e-06, -1.3172e-06, -8.0880e-08, -9.9596e-08, -3.6609e-07,
        -1.0975e-07, -4.3785e-08, -1.9271e-08, -8.4644e-08, -1.5936e-07,
        -6.2539e-09, -2.0680e-06, -9.3696e-08, -3.9115e-07, -7.6138e-08,
        -1.8964e-07, -5.3432e-07, -1.4059e-07,  1.0257e-08, -8.5174e-09,
        -7.5863e-06, -1.4461e-07, -4.7251e-08, -1.7200e-07, -2.7628e-06,
        -1.3877e-07, -5.9887e-07, -1.6649e-05, -1.2878e-07, -1.1979e-07,
        -7.1603e-08, -9.4885e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0088e-07, -1.0834e-07, -1.5152e-07, -1.4719e-07, -9.3465e-07,
        -1.2750e-07, -1.8304e-08, -7.2144e-08, -2.5080e-07, -1.3607e-06,
        -1.9974e-07, -1.4282e-05, -2.4217e-07, -1.3793e-07, -1.7811e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4835e-09, -2.3103e-08, -6.2791e-08, -3.7537e-08, -5.8352e-07,
        -3.7596e-08, -1.9534e-08, -2.2825e-08, -5.7482e-08, -6.5768e-07,
        -3.7076e-08, -1.1042e-07, -6.0728e-07, -1.0617e-07, -1.3766e-07,
        -4.0317e-08, -3.2330e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4871e-06, -5.7885e-08, -7.8163e-08, -8.2457e-08, -8.1993e-07,
        -4.4559e-08, -2.1390e-08, -3.0221e-08, -1.0191e-07, -1.9323e-06,
        -4.3475e-08, -1.4150e-07, -8.4492e-07, -1.4339e-07, -3.4102e-08,
        -7.0668e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2356e-07, -4.9067e-08, -7.5174e-08, -7.4157e-08, -6.8589e-08,
        -1.5956e-08, -2.5271e-07, -3.3772e-08, -9.1666e-08, -9.9081e-08,
        -6.3607e-07, -1.8168e-07, -9.2506e-08, -3.7877e-07,  1.8532e-09,
         1.0879e-08, -5.3773e-08, -1.4296e-06, -2.0981e-07, -7.7558e-07,
        -7.9075e-08, -8.5637e-07, -6.1101e-08, -1.5343e-07, -1.7130e-07,
        -7.0403e-08, -1.4709e-07, -1.0322e-07, -1.2628e-05, -8.6330e-08,
        -6.4949e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4550: [tensor([-5.9832e-08, -8.3717e-08, -7.2992e-08, -5.6363e-08, -7.6012e-09,
        -2.0959e-08, -3.6073e-08, -2.4939e-08, -4.1051e-08, -3.9998e-08,
        -2.2416e-08, -7.9213e-08, -8.6603e-08, -1.8057e-08, -7.3881e-08,
        -2.1428e-08, -2.2294e-08, -4.0280e-08, -7.0598e-08, -4.2641e-09,
        -6.4524e-09, -2.1813e-08, -3.7852e-07, -7.6961e-08, -2.0540e-06,
        -9.2821e-08, -3.2413e-07, -6.4982e-08, -4.5803e-08, -5.1976e-08,
        -5.2267e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0615e-06, -1.2077e-07, -7.3558e-08, -5.7926e-08, -2.5070e-08,
        -3.3934e-08, -4.7646e-08, -2.4873e-08, -3.4405e-08, -4.4552e-08,
        -2.0923e-08, -1.0493e-07, -1.7436e-07, -4.0810e-08, -1.1036e-07,
        -2.7324e-08, -1.7734e-08, -8.6717e-08, -8.1845e-08, -1.4412e-08,
        -1.6504e-09, -2.0578e-08, -2.9848e-07, -5.0207e-08, -2.6482e-06,
        -5.4731e-08, -3.9187e-09, -6.1469e-08, -2.7933e-08, -3.2550e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0427e-07, -9.7322e-08, -7.2175e-08, -4.5710e-08,  9.5074e-10,
        -2.7534e-08, -3.8797e-08, -5.6944e-09, -1.8497e-08, -3.7937e-08,
        -1.0734e-08, -1.1342e-07, -1.0575e-07, -2.0970e-08, -6.4554e-08,
        -2.1889e-08, -3.5547e-09, -6.0683e-08, -6.8139e-08, -1.3281e-08,
        -8.2759e-09, -1.7921e-08, -6.2536e-07, -3.5679e-07, -2.7771e-07,
        -6.2691e-08, -4.5169e-08, -2.0563e-08, -3.5072e-08, -2.8618e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0483e-07, -7.7372e-08, -6.4272e-08, -4.1956e-08, -1.9584e-08,
        -2.5840e-08, -4.8791e-08, -1.1306e-08, -1.6670e-08, -4.6430e-08,
        -2.4631e-08, -7.8715e-08, -1.3130e-07, -3.0380e-08, -9.4970e-08,
        -2.2121e-08, -1.3252e-08, -6.0078e-08, -7.0064e-08, -8.2014e-09,
         7.3756e-09, -2.1252e-08, -5.3474e-07, -6.6787e-08, -4.6074e-08,
        -7.4521e-08, -1.6756e-07, -6.6266e-06, -8.4715e-08, -7.4356e-08,
        -6.3365e-08, -3.5793e-06, -1.3668e-07, -2.0334e-08, -3.3983e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3897e-07, -6.3204e-08, -5.7408e-08, -3.8901e-08, -9.0472e-09,
        -1.1947e-08, -3.0745e-08, -1.3014e-08, -1.2999e-08, -3.4597e-08,
        -1.7315e-08, -5.4536e-08, -6.9951e-08, -1.0834e-08, -4.7702e-08,
        -1.1587e-08, -8.2557e-09, -1.7718e-08, -3.6842e-08, -6.4153e-09,
        -6.0757e-11, -1.4742e-08, -4.0490e-07, -6.0687e-08, -8.0532e-08,
        -1.4686e-08, -7.1710e-08, -3.7737e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0865e-07, -1.3004e-07, -1.0393e-07, -8.6315e-08, -3.0663e-08,
        -1.9508e-08, -7.3243e-08, -2.2584e-08, -3.6918e-08, -8.0211e-08,
        -2.7742e-08, -1.4121e-07, -1.7519e-07, -4.4067e-08, -1.0546e-07,
        -5.6444e-08, -2.0477e-08, -6.4871e-08, -8.3788e-08, -2.3395e-08,
        -1.1540e-08, -4.9396e-08, -5.7601e-07, -1.9244e-07, -2.7881e-07,
        -4.7087e-07, -2.3515e-08,  5.2506e-09, -8.6884e-08, -5.0875e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.9277e-07, -4.7756e-07, -3.0475e-08, -8.1786e-08, -3.2865e-08,
        -1.0650e-07, -1.7412e-08, -9.0581e-09, -1.6943e-08, -2.9748e-08,
        -2.9102e-08, -2.6002e-08, -3.6855e-08, -2.4798e-08, -2.8001e-07,
        -3.4934e-08, -5.6573e-08, -1.1274e-08, -4.3732e-08, -1.0076e-06,
        -4.8511e-08,  6.1822e-10, -9.3985e-09, -5.9822e-08, -2.9965e-08,
        -4.2037e-08, -3.1496e-06, -6.8897e-08, -5.5024e-08, -2.6995e-08,
        -5.0876e-08, -7.1332e-08, -1.4774e-07, -1.7437e-08, -3.3589e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1815e-07, -9.7322e-07, -2.7771e-08, -1.1898e-07, -2.3075e-08,
        -1.8268e-07, -3.8922e-08, -1.2802e-08, -1.2835e-08, -3.3941e-08,
        -3.6539e-08, -5.9594e-08, -4.8968e-08, -2.2365e-08, -3.7822e-07,
        -3.4786e-08, -4.7412e-08, -2.9462e-08, -2.9447e-08, -6.6314e-07,
        -4.3639e-08, -7.7962e-09, -2.8030e-08, -9.7852e-07, -6.5941e-08,
        -1.6673e-07, -1.0677e-07, -6.1973e-08, -2.4275e-08, -4.2770e-07,
        -1.7641e-08, -3.1020e-08, -7.8127e-08, -4.2118e-08, -2.7433e-08,
        -5.2868e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.2746e-07, -7.9654e-07, -3.4214e-08, -9.7429e-08, -2.8946e-08,
        -1.3916e-07, -1.8318e-08, -1.4717e-08, -2.7134e-08, -3.0159e-08,
        -1.4248e-08, -4.6989e-08, -4.1411e-08, -3.2692e-08, -1.8296e-07,
        -3.0358e-08, -3.2299e-08, -1.7177e-08, -3.1539e-08, -6.7274e-07,
        -2.3873e-08,  1.7075e-09,  3.9306e-09, -5.4248e-08, -2.6894e-07,
        -3.3981e-07, -9.2522e-08, -1.0112e-07, -9.0813e-07, -3.8301e-08,
        -3.7554e-08, -4.7353e-07, -5.0820e-08, -6.2049e-08, -1.5535e-07,
        -1.0509e-05, -8.8110e-08, -2.6040e-08, -2.8872e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0912e-06, -8.6489e-07, -7.8607e-08, -2.0073e-07, -8.6068e-08,
        -1.7398e-07, -6.2124e-08, -3.9211e-08, -4.5674e-08, -6.9298e-08,
        -2.5655e-08, -6.2526e-08, -6.5570e-08, -6.8196e-08, -2.9156e-07,
        -6.6539e-08, -9.2962e-08, -5.8815e-08, -7.3868e-08, -1.6206e-06,
        -8.0262e-08,  9.6449e-09, -6.5443e-09, -2.6088e-06, -8.1378e-08,
        -1.9925e-07, -1.9840e-07, -1.2379e-07, -1.3794e-07, -7.6160e-08,
        -1.2032e-07, -3.1878e-06, -1.2436e-07, -1.1387e-07, -1.0379e-07,
        -2.3954e-05, -1.6913e-07, -1.0132e-07,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3063e-07, -6.2584e-07, -1.6300e-08, -6.2377e-08, -2.9439e-08,
        -6.4703e-08, -1.6345e-08, -4.5737e-09, -1.4039e-08, -2.9750e-08,
        -1.7302e-08, -3.9867e-08, -3.7482e-08, -1.8874e-08, -4.8011e-07,
        -2.4110e-08, -3.6451e-08, -2.7656e-08, -3.4443e-08, -6.0939e-07,
        -4.0918e-08, -6.4889e-09, -3.1602e-08, -2.7401e-08, -4.3695e-09,
        -2.3741e-07, -3.2166e-07, -4.6226e-08, -2.7958e-08, -6.7514e-08,
        -6.1457e-08, -2.8563e-08, -3.2218e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7039e-07, -4.4340e-07, -2.4553e-08, -6.9900e-08, -2.7006e-08,
        -6.1877e-08, -2.4289e-08, -1.9571e-09, -1.5453e-08, -3.4530e-08,
        -2.0614e-08, -2.0024e-08, -2.6919e-08, -3.2273e-08, -4.0330e-07,
        -2.5776e-08, -4.3528e-08, -2.8372e-08, -2.3865e-08, -5.4067e-07,
        -2.8061e-08, -3.2738e-09, -1.4878e-08, -4.2576e-08, -2.8739e-08,
        -1.4198e-06, -1.8101e-07, -7.0092e-08, -1.1535e-06,  4.8521e-09,
        -2.8195e-08, -2.8710e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4600: [tensor([-6.0711e-07, -1.3070e-06, -1.3376e-07, -1.4359e-07, -6.7279e-07,
        -4.8963e-08, -3.7009e-07, -9.4563e-08, -7.6744e-07, -1.2763e-07,
         1.6824e-09, -5.0213e-08, -6.9668e-06, -2.0714e-07, -6.7652e-07,
        -1.7790e-07, -3.8861e-06, -1.3789e-07, -9.9678e-08, -1.2532e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8898e-08, -9.3326e-07, -3.1416e-08, -5.4686e-08, -3.9691e-07,
        -1.5752e-08, -1.5443e-07, -4.8627e-08, -2.7945e-07, -4.7028e-08,
        -2.2051e-09, -1.9125e-08, -7.5028e-08, -6.7354e-08, -7.0702e-08,
        -1.1402e-07, -7.2235e-08, -3.5070e-06, -8.1870e-08, -4.1934e-06,
        -6.2389e-08, -7.1561e-08, -7.6345e-08, -8.1534e-08, -5.9155e-08,
        -8.2060e-08, -2.1694e-07, -1.7848e-08, -5.9694e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0466e-06, -1.7445e-06, -1.0191e-07, -1.6447e-07, -8.2616e-07,
        -5.0938e-08, -2.5460e-07, -9.1638e-08, -5.9121e-07, -5.9334e-08,
         1.2146e-09, -5.5389e-08, -6.6976e-06, -1.4187e-07, -7.0777e-07,
        -2.4289e-07, -1.1335e-07, -4.5130e-06, -1.3425e-07, -8.0133e-08,
        -7.3728e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2543e-07, -1.0200e-06, -3.6610e-08, -4.5629e-08, -5.6513e-07,
        -1.6244e-08, -1.3570e-07, -4.3580e-08, -3.2520e-07, -6.0824e-08,
        -1.3548e-08, -2.6879e-08, -6.9842e-07, -9.7355e-08, -4.7746e-07,
        -6.4660e-08, -8.0147e-07, -4.6604e-08, -4.2843e-08, -4.5927e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5157e-07, -7.6548e-07, -5.4216e-08, -9.4866e-08, -5.0842e-07,
        -2.7759e-08, -1.6419e-07, -8.3513e-08, -4.7338e-07, -8.6985e-08,
        -4.1205e-09, -4.2733e-08, -6.5740e-06, -7.9768e-08, -3.4828e-07,
        -1.2820e-07, -6.8258e-08, -1.0248e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8306e-07, -8.9848e-07, -8.7208e-08, -7.6591e-08, -7.5062e-07,
        -1.9941e-08, -3.1563e-07, -6.7687e-08, -3.9527e-07, -6.1831e-08,
        -6.1101e-09, -5.7235e-08, -1.0472e-05, -1.4443e-07, -3.9730e-07,
        -1.1679e-07, -1.2055e-07, -1.4212e-07, -1.8175e-07, -3.4134e-08,
        -6.3470e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7180e-07, -2.3596e-06, -9.9510e-08, -9.6522e-08, -9.1461e-07,
        -5.9738e-08, -4.4645e-07, -1.0028e-07, -1.4551e-06, -1.0099e-07,
        -1.8925e-08, -5.3820e-08, -1.3641e-06, -1.6925e-07, -6.7950e-07,
        -1.9258e-07, -6.2369e-08, -1.6157e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3426e-07, -1.5288e-06, -1.2228e-07, -9.7102e-08, -5.1816e-07,
        -2.5875e-08, -2.2748e-07, -6.7526e-08, -5.6230e-07, -8.8399e-08,
        -1.0989e-08, -3.2445e-08, -1.5239e-05, -1.6309e-07, -6.2558e-07,
        -1.2824e-07, -1.2622e-07, -2.0312e-07, -7.4466e-08, -9.3057e-08,
        -9.5424e-08, -2.0974e-08, -4.5018e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1165e-07, -1.7685e-06, -6.3445e-08, -6.7642e-08, -5.5991e-07,
        -2.9761e-08, -2.2078e-07, -6.1035e-08, -8.1393e-07, -6.0236e-08,
        -1.9308e-08, -2.8580e-08, -1.6240e-06, -1.2622e-07, -4.8303e-07,
        -9.0262e-08, -5.1614e-08, -9.5686e-08, -6.3642e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8693e-07, -1.8513e-06, -8.1727e-08, -1.0475e-07, -4.7221e-07,
        -2.8594e-08, -3.1090e-07, -5.1781e-08, -5.1712e-07, -7.7254e-08,
        -2.1497e-08, -6.1328e-08, -5.0279e-06, -1.3919e-07, -4.2013e-07,
        -1.7620e-07, -2.0172e-06, -1.4432e-07, -7.6732e-08, -7.2586e-08,
        -6.5848e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0387e-07, -2.2951e-06, -8.9242e-08, -1.3155e-07, -1.0377e-06,
        -5.4193e-08, -3.1960e-07, -7.0480e-08, -6.7510e-07, -1.2260e-07,
        -2.8833e-08, -5.7120e-08, -1.0980e-06, -1.5017e-07, -6.2530e-07,
        -2.1359e-07, -9.4644e-08, -8.4759e-08, -1.6924e-07, -1.3490e-07,
        -1.4332e-07, -6.0294e-08, -1.0024e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8860e-06, -2.5487e-06, -4.6322e-08, -4.2107e-07, -1.4080e-06,
        -2.0321e-07, -1.4184e-07, -1.4589e-07, -2.8501e-07, -1.8902e-07,
        -1.2511e-07, -5.2861e-05, -3.5093e-07, -4.7912e-07, -1.5753e-06,
        -4.8979e-07, -3.0904e-07, -2.4197e-07, -3.7583e-07, -3.5120e-07,
        -3.7841e-07, -5.3925e-07, -7.8664e-08, -2.8424e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4650: [tensor([-1.2186e-06, -7.4667e-08, -2.1807e-08, -8.9438e-08, -4.1519e-08,
        -3.9833e-08, -4.5021e-08, -5.0321e-08, -9.6755e-09, -4.4442e-08,
        -9.1180e-07, -3.0608e-08, -1.2599e-07, -8.1370e-08, -1.4126e-08,
        -2.3102e-08, -6.7718e-07, -2.9311e-07, -1.3916e-07, -1.8991e-06,
        -9.9606e-08, -4.1686e-08, -8.8179e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6814e-07, -4.2487e-08, -1.6972e-08, -4.3395e-08, -1.9373e-08,
        -2.4683e-08, -3.5337e-08, -5.8248e-08, -2.7035e-08, -2.2759e-08,
        -3.3253e-07, -2.3911e-08, -9.6102e-08, -8.9178e-08, -2.0195e-09,
        -1.4723e-08, -4.7956e-07, -1.4783e-07, -6.8610e-08, -9.9206e-07,
        -5.3379e-08, -2.6434e-06, -1.2235e-07, -3.0986e-08, -5.5149e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4781e-07, -1.7615e-06, -5.6530e-08, -7.6268e-07, -5.9925e-08,
        -3.7776e-07, -4.2570e-08, -5.0249e-08, -3.5244e-07, -1.0332e-07,
        -7.6789e-09, -4.2806e-08, -2.4144e-06, -1.3262e-07, -7.4019e-07,
        -1.1562e-07, -7.2929e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3776e-07, -1.3694e-06, -4.5075e-08, -3.9358e-07, -4.9920e-08,
        -2.3564e-07, -3.0457e-08, -3.0198e-08, -3.6610e-07, -1.1831e-07,
        -1.1941e-08, -6.8255e-08, -1.0170e-06, -9.1387e-08, -5.8788e-07,
        -3.2020e-06, -9.7861e-08, -9.5484e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2131e-07, -1.6576e-06, -3.0036e-08, -6.4576e-07, -3.9267e-08,
        -4.4906e-07, -4.6694e-08, -3.4546e-08, -3.3841e-07, -6.9779e-08,
        -1.4929e-08, -3.7945e-08, -1.2642e-06, -8.3909e-08, -6.3800e-07,
        -6.8573e-08, -6.3734e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4613e-06, -1.4289e-06, -2.1874e-07, -2.1813e-07, -9.8373e-08,
        -9.3160e-08, -1.5118e-07, -1.4938e-06, -1.8311e-07, -1.6522e-07,
        -3.7676e-07, -1.4034e-07, -1.2195e-07, -6.9616e-07, -5.9569e-07,
        -1.7476e-07, -1.4134e-07, -5.0934e-07, -9.6751e-08,  6.2998e-08,
        -3.2333e-08, -1.5454e-06, -3.6954e-07, -6.9500e-07, -4.7856e-07,
        -2.2406e-07, -4.1246e-05, -2.0816e-07, -1.3750e-07, -1.8687e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6849e-06, -1.1294e-06, -7.7811e-08, -1.0662e-07, -2.4229e-08,
        -2.0433e-08, -5.8997e-08, -1.0212e-06, -6.6471e-08, -9.8556e-08,
        -1.3806e-07, -4.3734e-08, -5.8319e-08, -3.6919e-07, -2.3209e-07,
        -7.1203e-08, -5.1421e-08, -1.5374e-07, -6.0646e-08,  1.1126e-09,
        -1.5598e-08, -1.3474e-06, -2.8373e-07, -3.3858e-07, -1.4236e-07,
        -9.2537e-08, -1.1667e-07, -1.3684e-07, -3.3807e-07, -2.4404e-06,
        -3.9307e-08, -9.5735e-08, -4.5465e-06, -1.1318e-07, -4.2258e-08,
        -8.0759e-08], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4129e-07, -7.4942e-07, -4.5922e-08, -5.0564e-08, -1.9764e-08,
        -1.8928e-08, -2.1579e-08, -5.7000e-07, -4.5770e-08, -6.4626e-08,
        -7.4352e-08, -3.6148e-08, -6.0439e-08, -4.5926e-07, -1.3329e-07,
        -2.8617e-08, -1.7555e-08, -1.2412e-07, -3.3548e-08, -6.8906e-09,
        -1.5945e-08, -4.4169e-07, -1.0897e-07, -2.7309e-07, -5.0340e-08,
        -3.1751e-08, -5.9598e-09,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4012e-06, -1.1705e-06, -8.7641e-08, -1.0529e-07, -5.7494e-07,
        -2.8269e-08, -1.5715e-07, -3.2474e-08, -2.3612e-07, -9.8575e-08,
        -2.4320e-07, -5.7933e-07, -9.1988e-08, -1.4879e-07, -4.6465e-07,
        -1.1890e-07, -1.0418e-07, -6.7414e-08, -8.6657e-07, -2.4774e-08,
        -4.4481e-08, -1.4678e-08, -1.6266e-09, -5.8436e-07, -2.0565e-07,
        -1.2196e-06, -2.5735e-07, -8.4661e-08, -1.5130e-07, -2.0078e-05,
        -1.7574e-07, -1.2180e-07, -1.1217e-06, -1.2121e-06, -9.4074e-08,
        -1.2884e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6170e-07, -8.1272e-07, -8.6150e-08, -8.1046e-08, -7.2252e-07,
        -3.4586e-08, -8.5108e-08, -3.2694e-08, -2.5299e-07, -3.7182e-08,
        -1.7958e-07, -4.3526e-07, -9.5243e-08, -1.1517e-07, -3.9577e-07,
        -6.5461e-08, -7.2211e-08, -6.8414e-08, -6.6832e-07, -4.5127e-08,
        -1.4009e-08,  8.5278e-09, -1.8648e-08, -4.1797e-06, -1.8720e-07,
        -9.9538e-07, -2.3991e-07, -1.1389e-06, -3.6351e-06, -1.4132e-07,
        -9.7224e-08, -5.7364e-06, -1.1598e-07, -4.1318e-08,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1966e-06, -6.2387e-07, -4.2945e-08, -6.8863e-08, -5.4404e-07,
        -1.4920e-08, -5.9317e-08, -8.6697e-09, -1.3920e-07, -4.0110e-08,
        -1.5852e-07, -5.3448e-07, -4.7467e-08, -8.1481e-08, -5.2581e-07,
        -4.0218e-08, -3.2761e-08, -4.2330e-08, -8.4315e-07, -2.8955e-08,
        -1.0356e-08, -6.6519e-09, -1.1003e-08, -3.7731e-07, -1.1687e-07,
        -8.1312e-07, -2.1586e-07, -4.0173e-08, -4.5845e-08, -7.3731e-08,
        -8.3354e-08, -8.4858e-08, -7.4683e-08, -3.7547e-08, -1.5632e-08,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6778e-07, -7.1408e-07, -4.8090e-08, -4.6793e-07, -3.7653e-07,
        -3.1356e-08, -2.4166e-08, -2.0928e-08, -2.0805e-08,  2.4605e-09,
        -3.2347e-08, -1.1522e-08, -2.4847e-07, -3.7928e-08,  2.1244e-09,
        -1.9390e-08, -9.0761e-07, -8.1821e-08, -2.3147e-07, -3.4931e-06,
        -4.3520e-08, -2.0297e-08, -7.9557e-08, -6.0887e-09, -3.7413e-08,
        -3.9310e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4700: [tensor([-6.2344e-07, -4.4693e-07, -1.5691e-08, -9.4388e-08, -3.8217e-08,
        -1.2698e-08, -2.9290e-08, -5.0105e-08, -5.1654e-08, -2.2392e-07,
        -2.0071e-08, -2.7350e-08, -4.9203e-07, -5.2896e-08, -7.5030e-10,
        -1.5659e-08, -4.3748e-07, -8.1461e-08, -5.2089e-08, -7.3438e-08,
        -4.3434e-08, -1.5222e-06, -2.0831e-07, -1.0032e-07, -2.6506e-06,
        -7.1797e-08, -1.7863e-08, -3.6271e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2976e-07, -1.1258e-06, -2.3454e-08, -1.2600e-07, -5.6381e-08,
        -1.3813e-08, -3.6266e-08, -7.3558e-08, -5.3247e-08, -3.0443e-07,
        -2.4597e-08, -3.6993e-08, -9.9195e-07, -1.1524e-07,  2.8927e-09,
        -2.4166e-08, -1.9825e-06, -7.6059e-08, -6.5356e-08, -5.0621e-06,
        -8.0468e-08, -4.3678e-08, -4.7827e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7583e-07, -9.3206e-07, -2.4407e-08, -1.2681e-07, -6.6101e-08,
        -2.2789e-08, -2.0768e-08, -5.0871e-08, -6.8202e-08, -2.8686e-07,
        -2.6259e-08, -6.8518e-08, -8.9845e-07, -1.5228e-07, -1.4842e-09,
        -2.6576e-08, -1.3350e-06, -1.1260e-07, -2.8759e-07, -8.2947e-08,
        -3.0989e-08, -4.8444e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1184e-06, -1.8259e-06, -9.0638e-08, -9.1612e-07, -7.3993e-07,
        -1.8387e-08, -6.4812e-08, -4.7295e-08, -7.8428e-07,  1.8669e-08,
        -8.0888e-09, -1.5645e-08, -1.4927e-06, -1.2864e-07, -5.5809e-07,
        -5.5490e-08, -9.8208e-08, -1.8474e-07, -1.8001e-07, -1.2299e-06,
        -1.5055e-06, -6.1196e-08, -1.4906e-07, -7.5449e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5547e-07, -9.9733e-07, -2.0290e-07, -8.0141e-07, -8.3853e-07,
        -7.1855e-08, -1.7359e-07, -1.1649e-07, -7.7322e-07,  2.2280e-08,
         7.9056e-09, -3.1589e-08, -1.7576e-05, -3.4931e-07, -1.9921e-07,
        -7.6741e-06, -8.2061e-08, -1.8170e-07, -2.0736e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2586e-07, -9.1935e-07, -6.1656e-08, -3.6587e-07, -4.3965e-07,
        -1.9564e-08, -3.5010e-08, -2.0790e-08, -5.0502e-07, -2.4175e-10,
        -2.4708e-09, -3.0446e-08, -6.6032e-08, -7.2879e-07, -1.5478e-08,
        -3.2149e-08, -7.4167e-08, -1.1145e-07, -3.9333e-08, -4.8340e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0226e-07, -9.0901e-08, -3.6290e-08, -3.1768e-08, -1.0226e-08,
        -7.3462e-08, -2.5341e-07, -4.0196e-08, -2.2466e-08, -3.8348e-08,
        -7.6647e-08, -1.6446e-07, -9.2955e-08, -1.6294e-07, -3.0532e-07,
        -9.3777e-08, -3.9869e-08, -6.6735e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3477e-07, -2.0459e-07, -5.5708e-08, -1.3479e-07, -3.0928e-08,
        -1.3349e-07, -3.4321e-07, -7.1838e-08, -2.1647e-08, -7.6742e-08,
        -6.4418e-08, -9.1730e-08, -7.5353e-08, -8.3495e-08, -8.3634e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7726e-07, -2.2300e-07, -7.5934e-08, -7.0891e-08, -3.2570e-08,
        -1.1801e-07, -4.7122e-07, -5.0008e-08, -1.3830e-08, -4.9405e-08,
        -2.1586e-07, -2.7012e-07, -1.2177e-07, -1.7185e-06, -2.1867e-06,
        -1.3971e-07, -7.9560e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0553e-07, -2.9818e-08, -7.3536e-08, -5.1639e-08, -4.5305e-08,
        -1.6610e-07, -3.4057e-08, -2.7423e-08, -1.1170e-07, -7.1906e-08,
         2.4249e-09, -2.4831e-08, -4.8755e-06, -1.4316e-07, -3.4974e-07,
        -1.2797e-07, -5.2639e-07, -5.2459e-07, -4.5417e-08, -1.7001e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4545e-06, -3.2586e-08, -7.7993e-08, -6.4638e-08, -4.5777e-08,
        -2.7102e-07, -3.5078e-08, -2.0782e-08, -9.2391e-08, -8.5767e-08,
        -2.4394e-08, -8.1004e-09, -2.0177e-06, -1.4390e-07, -9.1676e-08,
        -7.0034e-06, -8.6856e-08, -9.6993e-08, -1.4529e-07, -3.4344e-08,
        -4.8136e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3575e-06, -5.3529e-08, -1.1093e-07, -6.4871e-08, -5.4088e-08,
        -3.5158e-07, -6.2576e-08, -6.8395e-08, -1.4645e-07, -1.7444e-07,
        -6.8636e-09, -4.7522e-08, -1.2468e-05, -1.5260e-07, -6.4761e-07,
        -1.9833e-07, -1.2326e-07, -1.3315e-07, -8.1882e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4750: [tensor([-1.0589e-06, -3.1294e-07, -1.0234e-07, -1.1055e-07, -2.4874e-07,
        -1.3641e-08, -6.9600e-08, -1.6291e-07, -1.2967e-07, -6.7151e-06,
        -2.7637e-08, -1.6749e-07, -1.7972e-08, -5.9092e-08, -4.4714e-07,
        -1.1690e-07, -2.5902e-07, -2.3200e-07, -8.9294e-07, -7.8181e-08,
        -1.2178e-07, -3.4468e-08, -7.9819e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3370e-06, -3.7370e-07, -8.8655e-08, -1.1964e-07, -3.1242e-07,
        -5.7870e-08, -8.0093e-08, -1.6256e-07, -1.1639e-07, -6.3608e-06,
        -4.3930e-08, -1.6325e-07, -2.8049e-08, -2.3754e-08, -5.6632e-07,
        -1.5616e-07, -1.7839e-07, -2.2745e-07, -1.4969e-07, -5.3083e-08,
        -1.0973e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4661e-06, -6.7967e-07, -1.1484e-07, -2.0207e-07, -3.9508e-07,
        -5.7597e-08, -7.5390e-08, -4.1971e-07, -2.4795e-07, -2.5038e-06,
        -9.8669e-08, -1.8651e-07, -5.3695e-08, -7.1611e-08, -8.6784e-07,
        -3.0255e-07, -2.4749e-07, -4.0261e-06, -2.0876e-07, -2.6494e-07,
        -6.0596e-07, -2.7778e-07, -2.1664e-05, -1.5391e-07, -7.9951e-08,
        -1.5619e-07,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0187e-07, -2.6101e-07, -5.6528e-08, -8.5011e-08, -2.3336e-07,
        -2.6192e-08, -6.2484e-08, -1.4006e-07, -8.1813e-08, -3.5701e-06,
        -3.3145e-08, -8.7393e-08, -6.5747e-09, -2.8281e-08, -3.9884e-07,
        -8.6154e-08, -1.1870e-07, -4.6870e-06, -9.3672e-08, -9.2325e-08,
        -1.8784e-07, -7.6980e-08, -3.1508e-08, -7.4546e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7431e-08, -4.2322e-07, -4.9762e-08, -1.4995e-07, -1.6305e-07,
        -2.2637e-08, -4.2469e-08, -2.1891e-07, -1.0882e-07, -6.8957e-06,
        -4.9176e-08, -1.1283e-07, -1.4531e-08, -3.8724e-08, -4.1081e-08,
        -1.3029e-07, -1.1563e-07, -9.4377e-08, -7.9668e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0763e-06, -5.9618e-07, -9.5717e-08, -1.5396e-07, -2.4065e-07,
        -5.1845e-08, -7.8767e-08, -2.6517e-07, -1.1442e-07, -1.1172e-05,
        -5.4170e-08, -1.4304e-07, -1.5114e-08, -4.0470e-08, -6.6085e-07,
        -1.1103e-07, -4.6087e-07, -1.2772e-07, -3.3008e-07, -1.8117e-07,
        -2.1546e-08, -5.7957e-08, -1.3002e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6043e-07, -3.7634e-07, -5.2123e-08, -1.1978e-07, -2.5538e-07,
        -2.7698e-08, -8.6855e-08, -2.2127e-07, -1.1432e-07, -1.0135e-05,
        -5.3480e-08, -1.0762e-07, -2.2273e-08, -4.5429e-08, -4.2117e-07,
        -1.3645e-07, -2.2237e-07, -1.8885e-07, -1.3126e-07, -1.5811e-07,
        -2.2881e-08, -6.9765e-08, -2.0351e-06, -6.8932e-08, -3.5677e-06,
        -1.5783e-07, -1.6833e-08, -8.2438e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4496e-06, -6.2146e-07, -5.0922e-08, -1.5951e-07, -3.8772e-07,
        -4.4603e-08, -1.2324e-07, -2.7266e-07, -1.6708e-07, -2.9395e-06,
        -7.3981e-08, -1.6470e-07, -1.7092e-08, -5.4960e-08, -7.1321e-07,
        -2.2008e-07, -3.1692e-07, -2.5223e-07, -2.1755e-07, -3.2540e-07,
        -1.3924e-07, -2.1097e-07, -9.9649e-06, -8.8421e-08, -6.2772e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0863e-07, -4.7259e-07, -6.9095e-08, -1.2153e-07, -2.3644e-07,
        -3.0072e-08, -5.2248e-08, -1.2996e-07, -1.1336e-07, -6.0000e-06,
        -1.2898e-08, -9.0081e-08, -3.8170e-09, -3.2882e-08, -4.2657e-07,
        -1.0926e-07, -2.2376e-07, -6.7293e-08, -1.4432e-07, -1.4310e-07,
        -3.5256e-08, -4.1964e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0100e-07, -5.3098e-07, -5.0434e-08, -7.6415e-08, -1.9669e-07,
        -4.3587e-08, -4.6566e-08, -1.5900e-07, -1.0834e-07, -7.7825e-06,
        -2.7912e-08, -1.3935e-07, -1.1029e-08, -4.4401e-08, -5.6518e-07,
        -1.2613e-07, -1.8604e-07, -2.9095e-07, -9.3229e-07, -1.0777e-07,
        -1.4331e-07, -1.8927e-08, -9.6913e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8398e-07, -2.6173e-07, -4.3005e-08, -5.0085e-08, -1.4390e-07,
        -1.8826e-08, -2.5688e-08, -8.1757e-08, -6.3741e-08, -3.3489e-06,
        -1.5557e-08, -6.9643e-08, -1.3732e-08, -1.0927e-08, -3.5948e-07,
        -4.6577e-08, -9.9210e-08, -9.0652e-08, -4.5086e-08, -6.9791e-08,
        -1.3221e-07, -5.8888e-08, -1.1506e-08, -1.4682e-08, -1.0678e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0920e-07, -3.3261e-07, -4.9170e-08, -8.4289e-08, -1.9763e-07,
        -2.0645e-08, -5.9997e-08, -1.3305e-07, -9.1241e-08, -5.2098e-06,
        -1.6267e-08, -1.1549e-07, -4.4783e-09, -4.4139e-08, -1.0008e-07,
        -1.0056e-07, -1.2445e-07, -2.4677e-06, -1.2859e-07, -9.1476e-08,
        -2.6672e-06, -8.2268e-08, -3.2608e-07, -5.0077e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4800: [tensor([-1.0706e-06, -9.3518e-07, -1.0327e-07, -2.8146e-08, -6.5682e-08,
        -6.1320e-08, -2.9124e-07, -1.1517e-07, -5.2529e-07, -1.0158e-07,
        -1.8861e-08, -3.2507e-08, -1.9388e-07, -5.9249e-08, -6.5950e-08,
        -7.6815e-08, -3.0098e-06, -6.8818e-08, -7.4801e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0091e-06, -5.2346e-08, -5.6426e-08, -8.0241e-08, -2.8642e-07,
        -4.4841e-09, -5.6225e-08, -2.1706e-08, -3.2255e-08, -5.4964e-07,
        -1.0280e-07, -1.0613e-06, -2.0017e-06, -7.2101e-08, -3.3457e-08,
         8.0643e-10, -3.6220e-08, -1.4941e-07, -8.2734e-07, -1.3641e-07,
        -7.0532e-08, -1.1126e-06, -2.5478e-07, -7.2597e-08, -6.4038e-08,
        -7.6071e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8669e-07, -1.0352e-07, -7.9391e-08, -5.5240e-08, -2.4117e-07,
        -1.2864e-08, -3.1376e-08, -3.2706e-08, -5.6821e-08, -9.0397e-07,
        -8.4497e-08, -2.2403e-06, -1.4110e-06, -7.4215e-08, -1.2252e-08,
        -1.6991e-08, -4.4658e-08, -1.6872e-07, -7.9890e-08, -1.0279e-07,
        -1.4256e-06, -5.9587e-08, -7.6255e-08, -1.3275e-06, -7.1046e-07,
        -4.1647e-08, -5.4182e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8953e-07, -5.6847e-08, -6.0844e-08, -4.4569e-08, -1.5631e-07,
        -1.6426e-08, -2.4725e-08, -3.7059e-08, -1.1214e-08, -2.9875e-07,
        -8.6887e-08, -4.2160e-07, -1.1836e-06, -4.4689e-08, -2.0357e-08,
        -8.4286e-09, -2.1351e-08, -1.8894e-06, -2.4015e-08, -5.3808e-08,
        -2.5837e-07, -5.2178e-08, -3.0213e-08, -6.5886e-08, -1.1209e-07,
        -9.9732e-08, -8.5459e-07, -1.8111e-08, -6.9663e-08, -4.0291e-08,
        -4.6494e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4126e-06, -4.2971e-07, -7.4807e-08, -8.4484e-07, -3.3668e-08,
        -9.1723e-08, -4.6575e-08, -8.4985e-08, -7.1040e-08, -7.6358e-08,
        -7.2009e-08, -2.3378e-10, -7.3334e-08, -1.3069e-08, -2.4883e-08,
        -9.7135e-07, -1.1652e-07, -4.3456e-06, -1.5621e-07, -1.4593e-07,
        -6.7628e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5419e-07, -4.7479e-07, -7.9146e-08, -4.0787e-07, -4.6036e-08,
        -6.9227e-08, -3.0874e-08, -7.8244e-08, -5.8413e-08, -4.3846e-08,
        -4.9639e-08,  5.5940e-09, -7.5868e-08, -1.2415e-08, -2.0504e-08,
        -6.7661e-07, -1.4400e-07, -2.8887e-06, -1.0088e-07, -1.7513e-07,
        -4.8065e-08, -6.6495e-08, -6.6624e-06, -9.6930e-08, -4.0117e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0396e-07, -3.4511e-07, -4.6825e-08, -4.0950e-07, -3.5588e-08,
        -7.6610e-08, -2.4713e-08, -5.5361e-08, -5.6651e-08, -4.9416e-08,
        -4.6248e-08, -1.6582e-09, -3.8293e-08, -9.8224e-09, -2.0757e-08,
        -9.0611e-07, -9.7345e-08, -4.6375e-06, -8.1498e-08, -1.8015e-07,
        -3.3322e-08, -5.6849e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2954e-06, -4.2340e-08, -4.6328e-08, -3.2179e-08, -1.0508e-07,
        -7.6326e-07, -1.0785e-08, -4.7638e-08, -1.1772e-08, -2.6570e-08,
        -3.3216e-07, -3.9833e-08, -5.6974e-08, -1.1609e-06, -5.9244e-08,
        -1.5738e-08, -5.6014e-08, -5.1633e-08, -4.6321e-07, -4.9563e-09,
        -9.4434e-08, -2.7102e-08, -2.9948e-08, -1.9547e-07, -2.8400e-07,
        -3.1448e-07, -7.0815e-06, -2.5372e-08, -3.5398e-08,  4.5931e-09,
        -1.4954e-08, -5.1493e-07, -3.7601e-08, -5.0566e-08, -8.1067e-08,
        -1.5627e-06, -1.3622e-07, -8.1432e-08, -3.8703e-07, -2.9707e-08,
        -1.2667e-07, -2.6239e-08, -7.3903e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1384e-07, -6.9760e-08, -9.3484e-08, -6.5291e-08, -2.2277e-07,
        -1.1925e-06, -1.5047e-09, -6.2461e-08, -3.4152e-08, -5.8836e-08,
        -6.5944e-07, -6.9463e-08, -7.9153e-08, -1.1173e-06, -7.3471e-08,
        -2.4903e-08, -9.0765e-08, -7.6729e-08, -4.2220e-07, -1.1784e-08,
        -1.2463e-07, -6.8056e-08, -6.2489e-08, -3.4981e-07, -1.9061e-07,
        -3.9547e-07, -1.7716e-05, -3.5310e-08, -4.8509e-08,  1.5102e-08,
        -3.2018e-08, -6.3645e-07, -1.0556e-07, -6.4490e-08, -1.1797e-07,
        -2.9241e-06, -4.3522e-08, -1.3836e-07, -1.6043e-06, -1.5715e-07,
        -6.1936e-08, -3.4345e-08, -1.0396e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4925e-07, -4.0237e-08, -2.2212e-08, -4.7422e-08, -1.8400e-07,
        -4.4529e-07, -4.0593e-09, -4.8935e-08, -2.8176e-08, -4.0915e-08,
        -3.7195e-07, -3.7514e-08, -6.6086e-08, -1.0794e-06, -6.2309e-08,
        -1.6886e-09, -5.0710e-08, -4.1776e-08, -4.4373e-07, -3.3320e-09,
        -1.0577e-07, -3.6541e-08, -1.7361e-08, -1.7098e-07, -3.1021e-07,
        -3.8284e-07, -6.2459e-06, -2.5613e-08, -3.9224e-08, -9.4438e-09,
        -1.0562e-08, -8.1716e-07, -3.4116e-08, -4.1189e-08, -9.7602e-08,
        -2.5592e-06, -1.6560e-07, -7.2881e-08, -1.8847e-08, -3.7372e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3376e-07, -3.5139e-08, -3.8614e-08, -5.8143e-08, -3.3390e-08,
        -3.1541e-08, -3.3519e-08, -3.0620e-08, -2.5924e-07, -1.3340e-07,
        -9.4115e-09, -2.2788e-08, -6.0393e-08, -4.4011e-08, -4.4998e-08,
        -5.7667e-07, -8.8162e-08, -1.7441e-07, -7.2939e-06, -7.5911e-08,
        -6.9223e-08, -1.3774e-07, -3.1833e-08, -2.5346e-08, -4.7697e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5875e-06, -2.1393e-07, -1.4692e-07, -1.3778e-07, -1.3775e-07,
        -1.6374e-07, -1.5768e-07, -7.5254e-08, -6.6940e-07, -2.7601e-07,
         6.1838e-08, -1.0633e-07, -1.4558e-06, -3.7659e-07, -1.2067e-06,
        -3.7506e-07, -3.5358e-07, -3.5831e-05, -3.1492e-07, -9.9261e-08,
        -1.5476e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4850: [tensor([-9.8123e-07, -1.9122e-06, -4.7421e-08, -1.3958e-07, -7.2957e-07,
        -5.5391e-08, -6.8424e-08, -4.4095e-08, -5.6837e-08, -6.7616e-08,
        -6.4233e-09, -5.2154e-08, -7.3486e-06, -1.4648e-07, -8.7066e-07,
        -1.3004e-07, -1.3244e-07, -1.1287e-07, -8.8596e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7614e-07, -5.2013e-07, -4.2773e-08, -1.0160e-07, -3.7560e-08,
        -8.0035e-08, -9.2980e-08, -5.0404e-08, -6.8911e-08, -1.2040e-07,
        -5.0892e-08, -1.0227e-07, -3.4447e-07, -2.1608e-07, -1.3851e-08,
        -3.4702e-08, -6.3173e-07, -9.2032e-08, -9.8750e-08, -7.2302e-08,
        -7.0514e-08, -1.7471e-07, -1.7103e-07, -8.9665e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3225e-07, -2.8974e-07, -2.3817e-08, -2.7071e-08, -1.8973e-08,
        -4.6603e-08, -7.4169e-08, -2.5700e-08, -5.8838e-08, -7.8369e-08,
        -2.7105e-08, -5.5971e-08, -3.3283e-07, -1.1209e-07, -8.8769e-09,
        -2.0996e-08, -4.1757e-07, -5.4686e-08, -1.0044e-07, -4.6999e-08,
        -6.1875e-08, -4.5367e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0193e-08, -2.4234e-07, -1.8285e-08, -3.2463e-08, -7.9779e-09,
        -3.0444e-08, -5.6384e-08, -1.6932e-08, -5.3411e-08, -4.0251e-08,
        -2.8963e-08, -3.0281e-08, -2.8350e-07, -1.0977e-07, -9.4486e-09,
        -2.8490e-08, -3.9633e-07, -2.0309e-07, -1.3274e-07, -1.1961e-07,
        -9.3585e-08, -4.3674e-08, -2.6085e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9915e-07, -2.8335e-06, -6.4011e-08, -1.7836e-07, -4.0716e-08,
        -3.5911e-08, -4.6157e-08, -3.3775e-08, -1.8188e-06, -8.3836e-08,
        -9.4874e-08, -1.2672e-07, -1.0810e-07, -3.1059e-07, -7.7853e-08,
        -7.4091e-08, -7.1515e-08, -1.7520e-07, -7.6106e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5572e-07, -2.6623e-06, -3.7866e-08, -1.4084e-07, -4.5353e-08,
        -4.3313e-08, -5.1798e-08, -4.1898e-08, -2.0617e-06, -2.6863e-07,
        -4.5990e-08, -1.2947e-07, -1.6994e-07, -1.7615e-07, -6.8693e-08,
        -6.3725e-08, -7.5352e-08, -8.9274e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0266e-06, -1.6377e-06, -3.5481e-08, -1.6849e-07, -3.8637e-08,
        -3.0336e-08, -3.3677e-08, -4.9523e-08, -5.3467e-07, -9.3954e-08,
        -5.2778e-08, -9.7134e-08, -6.3174e-08, -2.0134e-07, -5.4887e-08,
        -3.8587e-06, -9.1578e-08, -6.0787e-08, -5.4498e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4521e-06, -8.5739e-07, -4.0395e-08, -1.4980e-07, -3.8556e-08,
        -1.6082e-07, -3.9111e-08, -5.2669e-08, -6.4542e-07, -3.2590e-08,
        -1.0994e-08, -2.8160e-08, -9.5336e-08, -5.0819e-07, -4.7570e-08,
        -1.3172e-06, -8.6519e-08, -3.6029e-07, -6.7547e-07, -2.2707e-08,
        -3.5815e-08, -5.0226e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6417e-07, -2.1170e-06, -6.1192e-08, -1.7803e-07, -3.6390e-08,
        -2.0113e-07, -4.7692e-08, -1.0687e-07, -5.4891e-07, -3.0741e-08,
        -6.6490e-09, -4.1163e-08, -1.3701e-07, -2.1269e-06, -4.6521e-08,
        -2.1429e-07, -5.8047e-08, -1.3052e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5564e-07, -2.4475e-06, -9.4224e-08, -2.3751e-07, -6.9033e-08,
        -2.0302e-07, -5.9190e-08, -6.1987e-08, -1.0305e-06, -6.7765e-08,
        -2.3211e-08, -8.9346e-08, -1.6296e-07, -1.7803e-06, -1.1009e-07,
        -9.9228e-08, -9.1881e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3884e-06, -1.4440e-06, -3.5568e-08, -5.1318e-07, -8.5918e-07,
        -4.0513e-08, -1.0213e-08, -2.4573e-08, -2.6735e-08, -7.5906e-08,
        -4.3986e-08, -9.9923e-09, -3.5030e-08, -8.3076e-08, -1.0057e-07,
        -4.2654e-06, -1.0713e-07, -4.7730e-08, -7.2885e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3540e-07, -1.4537e-06, -3.4782e-08, -8.3150e-07, -1.0030e-06,
        -4.6370e-08, -1.0926e-08, -3.7382e-08, -2.7481e-08, -3.0077e-08,
        -6.8849e-08, -4.7668e-08, -5.0518e-08, -9.2139e-08, -7.1099e-08,
        -9.4766e-08, -9.5316e-08, -1.2844e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #4900: [tensor([-1.3037e-07, -2.2295e-06, -7.9786e-08, -1.4414e-07, -8.1936e-08,
        -3.7971e-08, -1.1246e-08, -2.1318e-08, -3.4659e-07, -2.5451e-08,
        -2.4934e-07, -3.1675e-06, -1.3634e-08, -2.7661e-08, -3.7873e-08,
        -6.4077e-08, -2.1005e-08, -7.4964e-08, -8.6793e-08,  1.4521e-08,
        -2.2680e-08, -7.1106e-06, -7.4536e-08, -1.8647e-06, -1.0140e-07,
        -7.4028e-08, -6.7168e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.5407e-07, -1.2169e-06, -7.9727e-08, -1.2092e-07, -5.4666e-08,
        -2.8855e-08, -1.7917e-08, -2.3998e-08, -2.8069e-07, -3.2420e-08,
        -1.6492e-07, -4.6460e-06, -2.7198e-08, -5.0548e-08, -6.8939e-08,
        -9.1690e-08, -3.3856e-08, -6.3186e-08, -7.4922e-08,  2.8231e-08,
        -2.0322e-08, -7.3542e-06, -4.8968e-08, -8.1036e-07, -3.0102e-08,
        -1.2533e-07, -4.0954e-06, -1.8166e-07, -5.7880e-08, -6.9420e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6775e-07, -1.7878e-06, -5.2749e-08, -1.0658e-07, -5.0335e-08,
        -3.6417e-08, -1.2189e-08, -3.8839e-09, -2.2309e-07, -2.7694e-08,
        -1.9286e-07, -4.6655e-06, -2.0457e-08, -5.9867e-08, -5.4191e-08,
        -4.5681e-08, -2.9150e-08, -3.7022e-08, -7.3110e-08,  8.9817e-09,
        -3.2103e-08, -3.9579e-06, -5.1621e-08, -6.9481e-07, -4.6691e-08,
        -1.7229e-07, -3.2328e-08, -5.2365e-08, -6.6810e-08, -5.2566e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3172e-07, -1.5931e-06, -2.5879e-08, -5.8457e-08, -4.9929e-08,
        -5.6081e-08, -8.7964e-08, -3.1129e-08,  1.7649e-09, -3.6673e-08,
        -1.7033e-06, -7.3503e-08, -6.6530e-08, -1.2780e-07, -4.8656e-07,
        -5.9732e-08, -5.3416e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5422e-07, -1.8127e-06, -1.9145e-08, -4.5570e-08, -5.0037e-08,
        -9.2881e-08, -9.7058e-08, -3.6378e-08, -2.0453e-08, -3.2707e-08,
        -3.5392e-08, -6.0767e-08, -1.0631e-07, -2.3115e-06, -8.9071e-08,
        -3.6120e-07, -7.5897e-08, -1.0202e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3907e-07, -1.4641e-06, -2.6375e-08, -4.3793e-08, -2.8113e-08,
        -8.3521e-08, -1.2043e-07, -2.5672e-08, -2.2483e-09, -4.6265e-08,
        -1.6783e-06, -1.1917e-07, -8.5080e-08, -1.0311e-06, -5.6588e-07,
        -1.1890e-07, -1.8614e-07, -3.4157e-06, -8.4826e-08, -5.6343e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1898e-07, -6.7030e-07, -8.1709e-08, -5.5628e-08, -1.9781e-07,
        -9.6399e-07, -3.6304e-08, -8.5338e-08, -2.3651e-07,  1.9148e-09,
        -1.6705e-08, -8.3237e-08, -5.0881e-08, -1.4294e-06, -3.8188e-08,
        -1.3081e-07, -4.2936e-08, -6.3225e-08, -8.9367e-08, -9.5688e-08,
        -5.3229e-08, -6.1783e-09, -1.9253e-05, -1.0238e-07,  4.1187e-08,
        -4.0076e-08, -2.1427e-06, -7.1644e-08, -2.7695e-06, -1.1614e-07,
        -8.7887e-08, -1.0072e-07, -1.3872e-07, -2.3385e-08, -6.9062e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5453e-06, -1.3752e-06, -1.2873e-07, -9.5367e-08, -3.1503e-07,
        -1.2543e-06, -5.2729e-08, -1.2833e-07, -2.8990e-07, -3.1070e-08,
        -3.8582e-08, -8.1029e-08, -7.3191e-08, -2.4247e-06, -5.0777e-08,
        -2.2919e-07, -7.4360e-08, -7.8515e-08, -1.6132e-07, -1.3304e-07,
        -6.2674e-08, -5.2199e-08, -1.7833e-05, -1.6950e-07,  1.9692e-08,
        -3.6273e-08, -3.5201e-06, -8.6791e-08, -2.1513e-06, -2.0961e-07,
        -1.0047e-07, -2.0467e-07, -9.5530e-08, -1.0827e-07,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0753e-06, -3.1455e-06, -3.5174e-07, -1.7003e-07, -1.0629e-06,
        -2.0617e-06, -1.4281e-07, -3.9204e-07, -1.1096e-06, -3.0653e-08,
        -1.5974e-07, -3.8248e-07, -3.2620e-07, -3.9660e-06, -1.7905e-07,
        -6.5309e-07, -1.0205e-07, -3.1937e-07, -5.7651e-07, -4.2524e-07,
        -2.0456e-07, -7.1262e-08, -4.6702e-05, -5.0689e-07,  1.4923e-07,
        -2.0379e-07, -5.7084e-05, -3.6332e-07, -2.8078e-06, -6.2732e-07,
        -1.9737e-06, -1.6210e-06, -2.9153e-07, -9.4341e-08, -2.8988e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4769e-07, -1.4491e-06, -1.8980e-08, -1.4742e-07, -3.8437e-08,
        -7.4878e-08, -2.3039e-07, -8.9539e-08, -7.7686e-08, -6.6390e-08,
        -4.3034e-07, -2.4205e-07, -3.0179e-08, -5.4590e-08, -1.3892e-06,
        -8.3532e-08, -1.4679e-07, -1.6297e-07, -5.2945e-08, -1.6191e-07,
        -1.0613e-07, -5.6094e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0978e-06, -7.6573e-07, -1.4153e-09, -9.5791e-08, -2.5453e-08,
        -6.7231e-08, -1.5551e-07, -6.8426e-08, -4.9975e-08, -5.5317e-08,
        -3.0626e-07, -9.0434e-08, -1.4968e-08, -2.6994e-08, -3.4878e-06,
        -5.7963e-08, -1.1045e-07, -7.3556e-08, -2.7427e-07, -7.6798e-08,
        -1.6808e-07, -1.2032e-08, -2.6539e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0815e-06, -1.1857e-06, -1.0660e-09, -7.8145e-08, -2.3022e-08,
        -7.1144e-08, -2.7789e-07, -6.8656e-08, -4.9627e-08, -4.7996e-08,
        -5.0558e-07, -1.5290e-07, -1.3689e-08, -3.8641e-08, -1.5013e-06,
        -5.9570e-08, -1.1700e-07, -8.2932e-08, -5.7386e-08, -2.3307e-07,
        -4.3350e-08, -3.9980e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #4950: [tensor([-8.9535e-07, -7.6247e-07, -1.2064e-07, -6.0027e-08, -8.2992e-07,
        -5.3237e-08, -9.7296e-08, -3.9472e-08, -2.3733e-08, -3.9354e-08,
        -1.2411e-06, -2.2430e-07, -1.0911e-07, -1.5002e-06, -8.9932e-08,
        -9.4471e-08, -5.3458e-07, -1.0859e-07, -2.7978e-06, -7.4940e-08,
        -6.4497e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9643e-07, -1.8046e-07, -8.1693e-09, -1.0818e-07, -6.5298e-07,
        -8.3743e-08, -7.7279e-08, -4.2134e-08, -6.4853e-07, -6.2282e-09,
        -3.4590e-08, -1.0114e-07, -7.7361e-08, -9.1947e-08, -2.1105e-08,
        -7.6457e-07, -1.0778e-07, -4.8997e-06, -7.9231e-08, -1.2766e-07,
        -3.3645e-08, -8.5063e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6347e-07, -1.2605e-07, -3.6105e-08, -7.8685e-08, -1.1577e-06,
        -6.6541e-08, -1.3307e-07, -7.2514e-08, -8.4504e-07, -2.8729e-08,
        -5.9347e-08, -2.1780e-07, -8.8193e-08, -1.9275e-07, -3.3784e-08,
        -1.0471e-06, -1.4801e-07, -4.8931e-06, -9.7757e-08, -1.0940e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0067e-06, -3.0328e-07, -5.5234e-08, -1.3723e-07, -1.0166e-06,
        -1.2623e-07, -1.1374e-07, -7.8486e-08, -8.7883e-07, -5.4153e-08,
        -5.1399e-08, -1.0112e-07, -1.1791e-07, -1.3247e-07, -3.9651e-08,
        -1.0703e-06, -1.7538e-07, -1.1457e-05, -1.4757e-07, -6.4162e-08,
        -1.0355e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3379e-06, -9.8719e-07, -4.6725e-08, -1.3981e-07, -4.3972e-08,
        -1.0200e-06, -7.3216e-08, -9.2557e-08, -4.2656e-08, -5.4252e-07,
        -6.1251e-08, -7.8000e-08, -7.0568e-08, -6.6700e-07, -4.7741e-08,
        -1.5300e-07, -6.1037e-07, -8.7698e-08, -8.5391e-08, -1.1138e-08,
         1.8066e-09, -2.8390e-08, -1.1574e-07, -7.7374e-08, -1.2635e-07,
        -9.9600e-09,  2.3682e-08, -2.0653e-08, -2.2895e-05, -1.0438e-07,
        -1.3145e-06, -1.4461e-07, -8.8000e-08, -1.7384e-08, -1.3433e-07,
        -1.0995e-07, -5.1950e-08, -9.5543e-08,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2993e-06, -1.4210e-06, -5.2819e-08, -1.5102e-07, -7.5305e-08,
        -5.0882e-07, -4.2458e-08, -7.8216e-08, -3.7510e-08, -6.7924e-07,
        -7.8577e-08, -1.0529e-07, -8.9747e-08, -7.0108e-07, -3.4190e-08,
        -1.9145e-07, -6.0384e-07, -6.3272e-08, -9.3453e-08, -7.2758e-09,
         1.2247e-08, -2.6082e-08, -5.0781e-08, -6.0208e-08, -1.1276e-07,
         1.1165e-08,  3.1070e-08, -2.1659e-08, -1.1774e-05, -8.5283e-08,
        -7.4579e-07, -9.3787e-08, -1.3803e-07, -1.0158e-05, -1.4105e-07,
        -8.6126e-08, -9.1151e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5727e-06, -1.2792e-06, -5.9658e-08, -1.7090e-07, -6.8074e-08,
        -8.2920e-07, -8.9676e-08, -2.0466e-07, -5.4546e-08, -6.4081e-07,
        -8.2345e-08, -1.6691e-07, -1.3933e-07, -6.3526e-07, -5.5913e-08,
        -3.4787e-07, -4.4911e-07, -1.1306e-07, -1.5352e-07, -1.7670e-09,
         1.6821e-08, -1.0990e-08, -1.0114e-07, -7.3852e-08, -1.9371e-07,
         1.5350e-08,  3.6207e-08, -4.2426e-08, -2.7256e-05, -1.7121e-07,
        -1.1156e-06, -2.0283e-07, -2.4111e-07, -8.3990e-06, -1.8541e-07,
        -1.1145e-07, -1.3415e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1839e-07, -9.9894e-07, -4.0125e-08, -2.1421e-08, -1.1334e-08,
        -5.8557e-07, -6.7036e-09, -1.2742e-08, -2.1876e-08, -1.3294e-08,
        -1.3804e-07, -3.2210e-08, -2.7424e-08, -1.9910e-08, -3.1462e-08,
        -3.0022e-08, -1.2168e-07, -6.7044e-08, -1.4203e-06, -2.1287e-08,
        -2.2365e-08, -1.2318e-08, -5.4939e-08, -8.2522e-07, -1.0079e-07,
        -1.2449e-09, -2.2685e-08, -9.6263e-09, -3.4306e-08, -9.6106e-08,
        -6.2135e-08, -4.8205e-08, -4.7737e-08, -2.1019e-08, -3.9461e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7246e-07, -1.1106e-06, -4.3477e-08, -3.2398e-08, -2.6974e-08,
        -8.4570e-07, -1.5891e-08, -2.5861e-08, -2.5885e-08, -2.8320e-08,
        -1.9742e-07, -5.8667e-08, -5.5705e-08, -3.8021e-08, -2.9421e-08,
        -6.0108e-08, -2.0979e-07, -7.3083e-08, -5.0204e-07, -2.7417e-08,
        -1.4829e-08, -2.2982e-08, -1.0224e-07, -1.6123e-06, -1.2724e-07,
        -1.4328e-08, -3.0743e-08, -2.7683e-08, -3.2715e-06, -3.0540e-08,
        -1.0593e-07, -6.2738e-07, -1.2318e-07, -1.2290e-07, -6.2058e-08,
        -6.0780e-08, -1.2516e-07, -5.2965e-08, -6.4238e-08,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3841e-07, -1.1258e-06, -2.2008e-08, -1.8245e-08, -1.0239e-08,
        -5.3549e-07, -1.2580e-08, -7.9077e-09, -1.5732e-08, -1.6726e-08,
        -9.0062e-08, -2.8248e-08, -2.7645e-08, -1.4636e-08, -2.3177e-08,
        -3.2458e-08, -1.4965e-07, -3.1278e-08, -5.6441e-07, -9.5277e-09,
        -3.8741e-09, -1.4748e-08, -3.4205e-08, -5.6195e-07, -6.7384e-08,
        -1.4898e-08, -2.3304e-08, -7.4555e-09, -3.7794e-08, -5.5175e-07,
        -3.1525e-08, -3.0752e-08, -5.0870e-08, -1.7307e-08, -2.6045e-08,
        -3.0211e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6529e-06, -7.0027e-07, -5.2704e-08, -2.4420e-08, -3.0055e-08,
        -5.6865e-07, -1.5297e-08, -1.0132e-08, -2.6538e-08, -1.5866e-08,
        -1.7927e-07, -2.0247e-08, -2.3290e-08, -2.4703e-08, -3.0686e-08,
        -6.5968e-08, -1.5838e-07, -7.6187e-08, -7.8571e-07, -1.3886e-08,
        -1.0834e-08, -3.3368e-08, -3.1693e-08, -1.8401e-06, -8.3159e-08,
         1.1017e-09, -6.7414e-09, -1.8586e-08, -6.9853e-08, -1.4693e-07,
        -1.2331e-07, -1.1166e-07, -1.1076e-07, -3.7267e-08, -1.3429e-07,
        -2.9076e-08, -2.4327e-06, -5.4291e-07, -3.7830e-08, -4.2420e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5386e-07, -9.4089e-07, -1.0012e-07, -4.1724e-08, -4.1261e-08,
        -4.3046e-07, -3.0080e-08, -9.7870e-09, -3.1554e-08, -3.4700e-08,
        -2.5184e-07, -3.8641e-08, -7.0311e-08, -5.9549e-08, -5.3353e-08,
        -8.7066e-08, -1.2830e-07, -7.8091e-08, -6.4106e-07, -1.3472e-08,
        -1.7538e-08, -3.1840e-08, -4.5984e-08, -9.0547e-07, -1.7366e-07,
        -1.5143e-08, -7.8080e-09, -2.3840e-08, -6.4045e-06, -4.6157e-08,
        -3.7207e-07, -9.2002e-08, -7.2399e-08, -2.4596e-07, -3.9773e-07,
        -6.7262e-08, -6.2210e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #5000: [tensor([-3.1164e-07, -1.1629e-08, -3.8335e-08, -8.4980e-08, -1.3992e-08,
        -3.4270e-08, -5.0826e-08, -2.8232e-08, -3.8315e-08, -1.6659e-08,
        -1.9566e-08, -8.6197e-08, -1.4278e-08, -3.7709e-07, -2.6692e-08,
        -1.6044e-08, -4.3080e-08, -3.8927e-09, -3.6782e-08, -3.5182e-08,
        -2.1327e-07, -6.5429e-08, -1.9380e-08, -2.2461e-08, -1.4990e-08,
        -4.1229e-08, -4.7096e-08, -8.3437e-08, -3.2072e-08, -2.4125e-08,
        -2.7599e-08, -3.6342e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7670e-07, -9.5950e-09, -4.7150e-08, -8.0833e-08, -5.6133e-09,
        -3.4940e-08, -6.3872e-08, -6.8395e-08, -2.9368e-08, -2.0891e-08,
        -2.6534e-08, -7.0162e-08, -3.1245e-08, -5.5624e-07, -1.8040e-08,
        -1.6991e-08, -5.9082e-08, -1.5576e-08, -4.0337e-08, -1.7814e-08,
        -2.8183e-07, -5.2336e-08, -7.9478e-09, -2.0181e-08, -1.0526e-08,
        -5.4167e-08, -4.2320e-07, -3.2464e-08, -8.4949e-08, -3.1089e-07,
        -6.3829e-08, -1.0751e-07, -2.2877e-08, -2.7753e-08, -2.9677e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7479e-07, -2.8230e-08, -7.1339e-08, -1.6009e-07, -2.9568e-08,
        -5.6952e-08, -1.7685e-07, -7.2924e-08, -5.8529e-08, -4.2729e-08,
        -2.4436e-08, -9.0259e-08, -4.8788e-08, -8.2779e-07, -2.9095e-08,
        -3.2479e-08, -1.0627e-07, -4.8480e-08, -7.2335e-08, -4.0072e-08,
        -3.9517e-07, -1.0257e-07, -1.7568e-08, -2.4868e-08, -1.3886e-08,
        -1.1340e-07, -3.8873e-07, -4.5935e-08, -3.1038e-07, -1.4405e-06,
        -6.4133e-08, -1.4749e-06, -7.6169e-08, -4.7980e-07, -1.2131e-07,
        -6.4240e-08, -6.3178e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4741e-06, -8.9931e-08, -9.8069e-08, -2.8820e-07, -8.0931e-08,
        -8.3830e-08, -2.6511e-07, -1.0816e-07, -1.0032e-07, -5.7543e-08,
        -5.0619e-08, -7.3782e-08, -6.8993e-08, -5.3999e-07, -8.5161e-08,
        -6.3133e-08, -1.8132e-07, -5.1353e-08, -1.5576e-07, -9.9688e-08,
        -4.6107e-07, -8.9671e-08,  5.1754e-08, -6.8175e-08, -2.6054e-05,
        -1.8976e-07, -6.1047e-07, -1.2752e-07, -2.4885e-07, -2.3553e-07,
        -4.1971e-06, -1.8245e-07, -1.4639e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0543e-06, -1.2211e-07, -5.6192e-08, -3.3535e-07, -7.2466e-08,
        -8.9351e-08, -3.2510e-07, -1.1153e-07, -1.4730e-07, -9.8838e-08,
        -6.9066e-08, -1.6067e-07, -6.2163e-08, -6.6675e-07, -9.5868e-08,
        -9.2164e-08, -2.1288e-07, -4.1308e-08, -2.2442e-07, -7.6967e-08,
        -6.0210e-07, -1.5662e-07,  1.9689e-08, -5.2811e-08, -4.8589e-07,
        -1.2683e-06, -2.6811e-05, -5.2769e-08, -2.6670e-07, -1.9363e-07,
        -1.0477e-06, -2.2375e-07, -3.0157e-07, -5.7580e-08, -7.7901e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5173e-06, -1.1370e-07, -1.2861e-07, -4.0410e-07, -8.3494e-08,
        -1.2718e-07, -3.6850e-07, -1.7236e-07, -1.6547e-07, -1.0762e-07,
        -7.9354e-08, -7.7985e-08, -9.6460e-08, -8.2948e-07, -1.1930e-07,
        -1.0077e-07, -2.5948e-07, -6.8219e-08, -1.8643e-07, -1.2681e-07,
        -9.1212e-07, -1.6071e-07,  7.0964e-08, -5.6084e-08, -3.8886e-05,
        -2.1151e-07, -1.0122e-06, -2.3928e-07, -3.1818e-06, -1.5778e-07,
        -2.7722e-07, -9.5795e-08, -2.0173e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0362e-06, -1.4662e-08, -3.8869e-08, -1.2094e-07, -1.0473e-08,
        -5.4816e-08, -1.0767e-07, -3.4595e-08, -5.4275e-08, -2.1959e-08,
        -1.6007e-08, -5.5026e-08, -2.2898e-08, -3.5440e-07, -2.9528e-08,
        -1.9076e-08, -5.4504e-08, -1.9334e-08, -3.4352e-08, -2.6991e-08,
        -3.1332e-07, -4.5894e-08, -2.7485e-08, -2.2274e-08, -1.6622e-08,
        -8.9804e-08, -4.0460e-07, -3.0345e-08, -2.0051e-07, -5.3801e-07,
        -5.8053e-08, -7.3974e-08, -3.7702e-08, -3.5622e-08, -1.8091e-06,
        -2.2276e-08, -5.9766e-08, -7.2636e-08, -3.9894e-08, -3.8962e-08],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0140e-06, -2.4761e-08, -4.3473e-08, -1.6872e-07, -1.4331e-08,
        -4.3121e-08, -1.0043e-07, -7.3104e-08, -4.1355e-08, -2.7048e-08,
        -3.1552e-08, -9.8961e-08, -2.1831e-08, -7.6408e-07, -3.8957e-08,
        -2.7436e-08, -8.0945e-08, -1.7447e-08, -4.0018e-08, -5.1747e-08,
        -3.6406e-07, -1.0301e-07, -5.1433e-08, -1.5741e-08, -1.7447e-08,
        -3.7751e-08, -9.0079e-08, -4.6899e-08, -1.4051e-07, -5.3835e-08,
        -6.3277e-08, -9.8643e-08, -6.9099e-08, -3.6964e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1353e-06, -5.1871e-08, -7.7628e-08, -2.3310e-07, -6.1746e-08,
        -8.9724e-08, -2.2932e-07, -1.2221e-07, -1.0645e-07, -4.0950e-08,
        -5.0792e-08, -9.8325e-08, -3.8772e-08, -4.4578e-07, -6.5506e-08,
        -4.6098e-08, -1.5813e-07, -4.8822e-08, -1.2071e-07, -7.2790e-08,
        -4.8412e-07, -9.4048e-08, -7.2296e-09, -2.8736e-08, -2.0594e-05,
        -9.7962e-08, -1.4943e-07, -1.2389e-07, -2.5255e-07, -1.7069e-07,
        -1.3568e-07, -1.7378e-07, -2.6422e-07, -4.1338e-06, -1.7587e-07,
        -1.4416e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3840e-06, -3.2067e-08, -6.0786e-08, -2.2189e-07, -2.9559e-08,
        -5.1754e-08, -1.5763e-07, -1.1267e-07, -7.3969e-08, -5.1331e-08,
        -2.7394e-08, -5.6800e-08, -3.7219e-08, -7.2787e-07, -5.0477e-08,
        -3.6764e-08, -1.1824e-07, -3.9716e-08, -7.7050e-08, -5.1686e-08,
        -3.9735e-07, -7.0787e-08, -2.4667e-08, -3.6410e-08, -7.8697e-08,
        -9.0582e-07, -1.6862e-06, -8.4656e-08, -7.5455e-08, -1.6975e-07,
        -7.7457e-08, -6.0801e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3168e-06, -2.2219e-08, -6.4839e-08, -1.4556e-07, -3.4247e-08,
        -6.2549e-08, -1.3017e-07, -8.3450e-08, -7.2066e-08, -4.2278e-08,
        -2.9717e-08, -6.1774e-08, -3.2001e-08, -4.2438e-07, -2.9749e-08,
        -1.2367e-08, -1.0149e-07, -3.6989e-08, -6.3239e-08, -4.1272e-08,
        -3.1866e-07, -5.0155e-08,  9.5649e-09, -2.2514e-08, -8.5115e-06,
        -1.0506e-07, -7.7525e-08, -1.3071e-07, -1.4814e-07, -1.3863e-07,
        -9.6669e-08, -9.6962e-07, -2.9921e-08, -4.4473e-08, -6.4420e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8004e-07, -1.9055e-08, -5.6259e-08, -1.0392e-07, -2.2372e-08,
        -4.4196e-08, -8.6909e-08, -4.1377e-08, -2.9852e-08, -1.7713e-08,
        -2.2392e-08, -7.2861e-08, -2.4775e-08, -3.6797e-07, -2.7024e-08,
        -1.8527e-08, -6.9580e-08, -7.4185e-10, -5.6502e-08, -2.2756e-08,
        -3.7733e-07, -8.1052e-08, -1.2088e-08, -7.5746e-09, -2.0457e-08,
        -8.8606e-08, -8.0200e-07, -3.6626e-08, -3.5643e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #5050: [tensor([ 2.7721e-08, -4.5409e-08, -6.7688e-08, -7.7735e-08, -7.3644e-08,
        -5.8599e-08, -7.7194e-08, -1.8290e-06, -7.0800e-08, -6.2671e-09,
        -6.1958e-07, -1.2142e-07, -8.1153e-08, -1.5976e-08, -3.4859e-08,
        -9.1168e-07, -6.3561e-08, -1.6693e-07, -7.1500e-07, -1.1498e-06,
        -7.6681e-08, -1.1670e-06, -2.8648e-08, -2.0531e-07, -1.3154e-08,
        -6.6219e-08, -1.0190e-07, -5.9673e-08, -6.1831e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9350e-07, -6.2449e-07, -3.5455e-08, -7.7811e-08, -6.3297e-08,
        -3.6603e-09, -5.0385e-08, -7.4987e-08, -4.8894e-08, -4.1651e-08,
        -8.6170e-08, -1.7316e-05, -7.8576e-08, -1.2292e-08, -2.2945e-08,
        -2.3731e-06, -1.0395e-07, -1.5476e-07, -1.0182e-06, -1.0584e-07,
        -7.9099e-07, -1.5350e-07, -1.8166e-07, -1.1971e-07, -2.3366e-07,
        -1.0046e-07, -2.2301e-07, -3.8061e-08, -8.4559e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4151e-06, -1.1317e-06, -7.6289e-08, -1.1344e-07, -1.3045e-07,
        -6.4485e-08, -9.2175e-08, -1.0861e-07, -7.4931e-08, -7.8055e-08,
        -1.4107e-07, -6.1465e-06, -1.3471e-07, -5.1503e-08, -4.3056e-08,
        -1.1659e-06, -2.3291e-07, -4.0225e-06, -4.3525e-06, -1.3662e-07,
        -7.5648e-08, -1.4295e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1777e-06, -2.4033e-06, -1.2355e-07, -2.0304e-07, -1.9905e-07,
        -8.8875e-08, -1.3301e-07, -2.1017e-07, -1.1857e-07, -1.1770e-07,
        -1.6317e-07, -1.2425e-05, -1.6303e-07, -9.6251e-09, -5.6512e-08,
        -1.1986e-05, -3.0111e-07, -1.1330e-05, -2.8303e-07, -1.4730e-07,
        -2.3309e-07, -2.2230e-07, -8.6901e-08, -1.2995e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8211e-07, -1.1068e-06, -5.9531e-08, -6.6393e-08, -1.0033e-07,
        -4.5130e-08, -5.5858e-08, -9.1839e-08, -7.1544e-08, -4.6783e-08,
        -1.0793e-07, -5.1159e-06, -9.9934e-08, -1.2215e-08, -3.3147e-08,
        -7.0080e-06, -1.2372e-07, -2.2856e-07, -9.0525e-08, -6.4506e-08,
        -8.0446e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7296e-07, -2.1975e-06, -1.4411e-07, -1.3181e-07, -1.9319e-07,
        -6.5623e-08, -1.3468e-07, -1.5020e-07, -1.4043e-07, -1.3009e-07,
        -1.4170e-07, -8.1949e-06, -1.9046e-07,  2.1852e-08, -3.9362e-08,
        -7.2485e-06, -2.7789e-07, -3.7994e-07, -1.8821e-05, -2.9427e-07,
        -1.9462e-06, -4.7364e-07, -4.7677e-07, -7.5307e-08, -1.6036e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2748e-06, -1.4191e-06, -7.2736e-08, -1.1755e-07, -1.3205e-07,
        -5.6783e-08, -7.7326e-08, -1.2999e-07, -8.3838e-08, -6.1054e-08,
        -9.1063e-08, -2.5572e-06, -1.1393e-07, -1.1801e-08, -3.0056e-08,
        -9.8765e-07, -1.3699e-07, -1.4190e-07, -5.4533e-07, -1.8941e-07,
        -9.9750e-08, -1.8049e-07, -1.0415e-07, -1.5641e-07, -2.1515e-05,
        -1.5766e-07, -8.7806e-08, -1.3276e-07,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5572e-07, -1.6468e-06, -5.9884e-08, -7.9180e-08, -6.2441e-08,
        -4.8433e-08, -6.1788e-08, -8.0363e-08, -5.7739e-08, -4.1734e-08,
        -9.9696e-08, -2.0851e-06, -8.6631e-08, -1.9211e-08, -3.6300e-08,
        -1.3713e-06, -1.0874e-07, -1.5538e-07, -1.5216e-06, -9.0597e-08,
        -1.3159e-07, -7.1739e-06, -1.9115e-08, -1.8035e-07, -2.5901e-07,
        -4.8585e-08, -2.9398e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1164e-06, -8.4594e-07, -4.6065e-08, -6.1393e-08, -7.2674e-08,
        -3.6887e-08, -5.3179e-08, -6.7378e-08, -5.4062e-08, -4.2914e-08,
        -7.0938e-08, -5.3804e-06, -8.2775e-08, -1.9918e-08, -1.2210e-08,
        -4.8555e-06, -9.9222e-08, -1.8012e-06, -1.0148e-07, -6.8996e-08,
        -5.4469e-08, -5.2317e-08, -9.8639e-08, -1.0713e-07, -1.2032e-07,
        -4.5335e-08, -1.1947e-07, -3.4133e-08, -9.3826e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.4926e-07, -1.9129e-06, -8.9957e-08, -9.2036e-08, -1.1206e-07,
        -3.3127e-08, -4.9227e-08, -9.5120e-08, -9.0067e-08, -5.8278e-08,
        -9.5712e-08, -1.0947e-05, -1.0153e-07, -1.6493e-08, -3.8967e-08,
        -7.2693e-06, -1.8243e-07, -8.8491e-08, -7.2524e-08, -1.0399e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6747e-06, -1.0755e-06, -9.4956e-08, -1.1035e-07, -1.4105e-07,
        -1.3449e-08, -6.6081e-08, -1.2112e-07, -1.0820e-07, -6.6577e-08,
        -8.5275e-08, -4.6419e-06, -1.1863e-07, -3.8244e-09, -1.5924e-08,
        -1.2971e-05, -2.0448e-07, -2.3959e-07, -2.9146e-06, -1.0387e-07,
        -1.9257e-07, -5.9340e-06, -1.5931e-07, -6.9316e-08, -3.6418e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4354e-07, -3.6464e-06, -9.1428e-08, -1.1092e-07, -1.6508e-07,
        -3.8839e-08, -1.2830e-07, -1.6822e-07, -1.1097e-07, -8.2975e-08,
        -9.7277e-08, -1.6937e-05, -1.4918e-07, -6.5273e-08, -3.5283e-08,
        -5.9003e-06, -2.8937e-07, -4.0590e-06, -3.0904e-07, -9.5312e-08,
        -2.2520e-07, -8.7021e-06, -1.8604e-07, -1.3270e-07, -1.2018e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #5100: [tensor([-8.1536e-06, -4.2910e-06, -2.7089e-07, -2.2823e-06, -6.4711e-07,
        -5.9811e-07, -7.5341e-07, -6.0038e-07, -2.0271e-07, -7.8504e-08,
        -1.4580e-07, -1.2366e-04, -1.0521e-06, -4.8606e-06, -1.7967e-06,
        -1.1717e-06, -1.1718e-06, -5.2588e-07, -8.0702e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6604e-07, -1.8215e-06, -7.1886e-08, -6.3543e-07, -4.0929e-08,
        -5.5329e-08, -4.5373e-08, -5.9683e-08, -1.2230e-08, -3.8685e-08,
        -2.8882e-08, -3.8316e-06, -1.1221e-07, -2.2837e-06, -9.2170e-08,
        -1.2316e-07, -5.6715e-08, -1.6103e-07, -5.8979e-08, -1.2756e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2091e-07, -8.5163e-07, -3.5843e-08, -6.9399e-07, -2.9755e-08,
        -3.7408e-08, -1.3756e-08, -5.0535e-08, -3.1577e-08, -4.0741e-08,
        -2.1038e-08, -8.4955e-08, -1.5632e-07, -3.8397e-08, -4.0192e-07,
        -1.3244e-07, -1.0433e-07, -9.8145e-08, -4.2737e-08, -5.5740e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2364e-06, -1.4889e-06, -3.9683e-08, -7.5602e-07, -9.4570e-08,
        -1.1232e-07, -1.1713e-07, -7.0325e-08, -3.6452e-08, -1.5500e-08,
        -4.6199e-08, -2.2672e-05, -1.4003e-07, -1.9019e-06, -1.2368e-07,
        -2.7478e-07, -1.0045e-07, -3.1449e-07, -2.6250e-07, -1.2523e-07,
        -6.9675e-08, -1.6794e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3313e-06, -2.4366e-06, -8.1420e-08, -1.0002e-06, -1.0008e-07,
        -2.2473e-07, -9.9676e-08, -1.6136e-07, -8.5927e-08, -1.7309e-08,
        -6.2215e-08, -1.3403e-05, -2.3244e-07, -4.6850e-06, -2.9746e-06,
        -1.5243e-07, -2.1216e-07, -1.8704e-07, -9.6078e-08, -1.5500e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8267e-07, -2.9930e-06, -8.0304e-08, -7.7033e-07, -6.6055e-08,
        -1.0689e-07, -8.1097e-08, -8.2143e-08, -1.8071e-08, -1.2043e-08,
        -5.1516e-08, -3.7186e-06, -1.6248e-07, -2.6410e-06, -1.3389e-07,
        -6.4728e-06, -1.6565e-07, -1.3883e-07, -1.1621e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2053e-06, -2.8792e-06, -1.2736e-07, -1.8119e-06, -1.8048e-07,
        -4.9465e-07, -5.2576e-07, -4.0870e-07, -2.2881e-07, -1.0322e-07,
        -1.6983e-07, -8.8758e-05, -8.3975e-07, -3.1309e-06, -4.6977e-07,
        -6.8664e-07, -2.6907e-06, -1.3006e-06, -4.9466e-07, -5.0048e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5640e-06, -3.0847e-06, -1.8630e-07, -1.8693e-06, -2.1003e-07,
        -4.4256e-07, -3.4649e-07, -4.5375e-07, -9.9000e-08,  1.3756e-08,
        -1.7955e-07, -6.9936e-05, -5.8979e-07, -3.6899e-06, -5.0776e-07,
        -1.3034e-05, -5.9101e-07, -1.9350e-07, -4.6787e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3792e-06, -2.3540e-07, -2.5314e-07, -2.4253e-07, -1.3889e-07,
        -8.2961e-08, -2.8531e-07, -6.6028e-07, -7.7339e-07, -6.7637e-09,
        -6.6533e-08, -6.9086e-06, -1.7208e-07, -1.0555e-07, -3.1762e-07,
         4.6510e-08, -9.2413e-08, -2.5583e-07, -6.1209e-08, -5.9771e-08,
         2.6563e-08, -8.4209e-08, -1.3247e-05, -3.2927e-07, -5.1677e-06,
        -2.3746e-07, -1.0151e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9247e-06, -1.4913e-07, -2.5894e-07, -1.7793e-07, -1.3271e-07,
        -7.9813e-08, -4.5897e-07, -5.6875e-07, -5.3909e-07, -1.5781e-08,
        -6.8901e-08, -7.1108e-06, -1.0589e-07, -1.0057e-07, -2.8715e-07,
        -2.6414e-08, -1.3880e-07, -2.8089e-07, -7.3880e-08, -9.3078e-08,
         2.7739e-08, -3.4069e-08, -1.4377e-05, -2.7736e-07, -7.1069e-06,
        -1.9026e-07, -3.3154e-07, -3.3746e-07, -1.1228e-07, -1.6747e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5956e-07, -9.5276e-08, -1.4629e-07, -1.0904e-07, -8.1944e-08,
        -3.3381e-08, -1.8198e-07, -4.9920e-07, -3.8147e-07, -1.0715e-08,
        -6.1433e-08, -8.2065e-06, -8.9452e-08, -7.8945e-08, -2.0903e-07,
        -2.0625e-08, -4.9583e-08, -1.6009e-07, -7.8246e-08, -6.7757e-08,
         1.2762e-08, -1.6209e-08, -2.9997e-07, -2.3849e-07, -5.7548e-06,
        -1.2724e-07, -1.0141e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2033e-06, -2.8252e-06, -4.8420e-07, -7.3670e-07, -1.9539e-06,
        -1.2125e-07, -3.6698e-07, -4.2446e-07, -3.7911e-07, -3.2578e-06,
        -2.0553e-07, -4.0180e-07, -4.6292e-07, -3.8455e-06, -4.3931e-07,
        -8.8425e-08, -2.4763e-07, -6.2280e-06, -4.9677e-07, -2.3354e-07,
        -1.4537e-07, -4.8175e-07, -7.5682e-05, -3.5554e-07, -9.9600e-07,
        -5.8609e-07, -6.8274e-07, -3.4009e-06, -4.7926e-07, -4.1108e-07],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #5150: [tensor([-1.3261e-06, -5.9864e-08, -2.3178e-08, -7.3805e-08, -1.3810e-08,
        -2.6153e-08, -3.8653e-08, -4.1765e-08, -2.1953e-08, -1.2993e-06,
        -8.9250e-07, -1.0405e-07, -5.2088e-08, -2.1926e-07, -1.2129e-07,
        -9.6386e-08, -1.4260e-07, -2.0895e-07, -7.3378e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9346e-06, -1.1605e-07, -3.8318e-08, -1.2091e-07, -5.1694e-08,
        -3.5535e-08, -7.9981e-08, -9.0463e-08, -8.1554e-08, -7.3053e-07,
        -8.8035e-07, -9.2383e-08, -5.5551e-08, -1.7297e-05, -1.6178e-07,
        -1.6348e-07, -1.6866e-07, -1.3470e-07, -1.8683e-07, -8.6010e-08,
        -1.0866e-07, -7.5456e-08, -9.8932e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0004e-07, -1.1039e-07, -3.0861e-08, -1.0369e-07, -4.0682e-09,
        -5.9465e-08, -2.8296e-08, -5.3456e-08, -4.6850e-08, -1.4600e-06,
        -1.0375e-06, -1.0549e-07, -4.0306e-08, -2.9443e-07, -2.3467e-07,
        -4.2190e-07, -6.7174e-08, -1.1307e-07, -9.7441e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4407e-07, -9.6920e-08, -3.9763e-08, -1.0104e-07, -6.1189e-09,
        -4.8785e-08, -3.1395e-08, -3.4922e-08, -3.2802e-08, -7.0062e-07,
        -4.3961e-07, -8.2594e-08, -4.9870e-08, -1.2318e-06, -4.9893e-08,
        -1.0952e-07, -4.5481e-08, -2.3044e-07, -7.4508e-08, -2.1898e-07,
        -5.8730e-08, -4.8435e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1513e-06, -7.0590e-08, -2.0458e-08, -1.0273e-07, -3.2401e-08,
        -2.3068e-08, -3.6544e-08, -4.7782e-08, -4.7280e-08, -8.8709e-07,
        -7.0357e-07, -7.2786e-08, -5.2719e-08, -3.4456e-07, -3.6853e-07,
        -1.1930e-07, -1.2309e-07, -1.5884e-07, -5.4898e-08, -4.3795e-08,
        -6.2391e-08,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3798e-07, -8.3585e-08, -7.3574e-09, -1.1117e-07, -3.7879e-09,
        -3.0875e-08, -3.4483e-08, -5.6874e-08, -4.4820e-08, -5.2591e-07,
        -6.0654e-07, -8.4663e-08, -3.5922e-08, -1.3677e-06, -6.5160e-08,
        -4.1329e-07, -7.1034e-08, -3.4337e-06, -7.6569e-07, -4.2466e-08,
        -9.5503e-08, -5.0345e-08, -8.9649e-08], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.1728e-07, -8.4596e-08, -4.0940e-08, -7.3859e-08,  2.3645e-09,
        -3.4087e-08, -4.6272e-08, -6.0444e-08, -6.1017e-08, -4.3357e-07,
        -6.4315e-07, -8.9045e-08, -5.2422e-08, -3.6518e-07, -4.0340e-07,
        -6.9599e-06, -4.3030e-08, -2.1712e-07, -9.2129e-07, -7.2077e-08,
        -5.5276e-08, -4.4239e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3386e-07, -6.1462e-08, -2.8797e-08, -9.5178e-08, -6.8832e-09,
        -3.8089e-08, -3.8532e-08, -3.7279e-08, -4.6721e-08, -8.8162e-07,
        -4.6143e-07, -1.1591e-07, -4.2053e-08, -2.1226e-06, -5.0185e-08,
        -3.8130e-07, -9.4289e-08, -6.6482e-08, -6.2309e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8548e-06, -9.9182e-08, -1.0171e-08, -9.5614e-08, -1.1810e-08,
        -4.3215e-08, -4.7439e-08, -8.4508e-08, -4.6666e-08, -1.3603e-06,
        -1.0559e-06, -9.1878e-08, -2.3149e-08, -1.6081e-06, -8.7022e-08,
        -7.9020e-07, -6.2728e-08, -8.0524e-08, -8.4724e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1076e-06, -1.7032e-07, -1.4682e-07, -1.3380e-07, -6.5727e-08,
        -8.8295e-08, -1.8918e-07, -1.8714e-07, -1.6422e-07, -9.3816e-07,
        -1.0231e-06, -2.2484e-07, -1.2324e-07, -2.1812e-06, -2.3179e-07,
        -7.7723e-07, -2.7269e-07, -4.0205e-05, -3.0739e-07, -4.4956e-07,
        -1.4073e-07, -9.9810e-08,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4230e-07, -7.3287e-08, -2.2123e-08, -9.7430e-08, -2.6878e-08,
        -2.3886e-08, -3.6294e-08, -2.5975e-08, -2.4491e-08, -4.1359e-07,
        -6.0310e-07, -9.2019e-08, -4.5921e-08, -1.4161e-06, -6.0054e-08,
        -3.4348e-07, -6.5612e-08, -1.0803e-07, -4.5793e-08, -5.3768e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9871e-07, -7.3377e-08, -2.8205e-08, -1.7960e-07, -1.4147e-08,
        -3.9535e-08, -5.0129e-08, -3.7559e-08, -3.5373e-08, -4.7520e-07,
        -7.6107e-07, -8.4517e-08, -2.5644e-08, -1.9828e-06, -6.7219e-08,
        -2.4441e-07, -1.3243e-07, -1.1271e-07, -7.2646e-08,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #5200: [tensor([-7.8774e-07, -1.6502e-06, -9.6781e-08, -1.1262e-07, -7.8227e-07,
        -1.8940e-08, -5.9559e-08, -1.5777e-06, -6.2252e-08, -1.7510e-08,
        -6.4224e-10, -5.0640e-08, -1.2892e-06, -1.0159e-07, -1.1500e-07,
        -1.5595e-07, -6.4192e-07, -4.2892e-06, -1.1484e-07, -1.2128e-07,
        -2.3715e-07, -9.0496e-07, -4.3719e-08, -4.2812e-08, -5.4462e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8269e-07, -1.3386e-07, -1.0130e-07, -1.2809e-07, -5.1433e-08,
        -6.2386e-08, -6.0552e-07, -1.0026e-07, -4.2834e-08, -4.1558e-08,
        -1.3366e-08, -2.8242e-08, -2.3415e-07, -1.2985e-07, -6.4048e-08,
        -4.7293e-07, -7.2533e-08, -8.3224e-08, -9.1905e-07, -1.1602e-07,
        -1.9593e-07, -6.3035e-06, -1.0698e-07, -3.7245e-08, -8.8922e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5371e-07, -1.5690e-07, -5.6855e-08, -1.0551e-07, -9.2269e-08,
        -4.3422e-08, -5.3646e-07, -7.1315e-08, -6.5227e-08, -6.7934e-08,
         5.4905e-09, -3.3404e-08, -6.4522e-06, -1.4178e-07, -5.4933e-07,
        -1.2348e-07, -6.7477e-08, -3.3808e-07, -8.9853e-08, -1.7848e-07,
        -1.2715e-07, -3.0469e-08, -2.7570e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2165e-07, -1.2753e-07, -8.6270e-08, -1.8152e-07, -9.9621e-08,
        -7.2313e-08, -3.7596e-07, -6.6335e-08, -6.1054e-08, -4.8721e-08,
        -1.0893e-08, -5.3482e-08, -7.0529e-06, -1.7899e-07, -4.2807e-07,
        -1.2040e-07, -2.1074e-07, -2.2482e-07, -1.7429e-07, -3.0900e-07,
        -6.4807e-08, -7.0433e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0638e-07, -6.7856e-08, -1.0293e-07, -1.3742e-07, -1.6683e-07,
        -5.6395e-08, -3.5293e-08, -3.8219e-08, -4.8150e-08, -5.2813e-07,
        -8.2871e-08, -4.3238e-08, -1.5869e-07, -6.8616e-07, -4.1495e-08,
        -1.3785e-08, -3.6773e-08, -3.0201e-06, -1.0441e-07, -1.7831e-07,
        -1.7471e-07, -1.2179e-07, -2.8467e-07, -4.4964e-07, -1.2677e-07,
        -5.2833e-08, -5.9073e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5367e-06, -5.5966e-07, -5.4201e-07, -6.5235e-07, -8.5969e-07,
        -2.5061e-07, -2.1807e-07, -2.7924e-07, -3.9308e-07, -1.3953e-06,
        -5.7139e-07, -1.8593e-07, -7.5735e-07, -1.9617e-06, -1.3287e-07,
        -7.5971e-09, -2.6517e-07, -3.6074e-05, -8.3431e-07, -2.4487e-06,
        -6.0464e-07, -3.8839e-07, -5.6992e-05, -5.7462e-07, -1.1818e-07,
        -4.2952e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3956e-06, -8.2515e-08, -8.6299e-08, -1.2341e-07, -1.5941e-07,
        -5.0836e-08, -2.6553e-08, -5.7492e-08, -6.3976e-08, -5.8974e-07,
        -1.1814e-07, -2.6379e-08, -1.4598e-07, -6.9469e-07, -3.2552e-08,
        -2.4521e-08, -4.9770e-08, -5.8106e-07, -2.2568e-07, -9.1061e-08,
        -1.1928e-07, -1.7515e-07, -1.3906e-06, -1.3304e-06, -1.2084e-07,
        -6.1205e-08, -9.8347e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2763e-06, -5.6422e-08, -9.0633e-08, -1.6207e-07, -9.2349e-08,
        -2.8459e-08, -9.6602e-08, -1.6876e-07, -8.2455e-07, -6.1695e-08,
        -1.4393e-07, -2.0454e-07, -4.6974e-08, -3.8062e-08, -5.2405e-08,
        -8.7100e-08, -1.8440e-07, -1.9038e-07, -3.7581e-07, -2.7923e-07,
        -5.8878e-08, -6.5889e-08, -5.8294e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1036e-06, -9.6249e-08, -1.3363e-07, -1.3256e-07, -6.3700e-08,
        -5.2199e-08, -4.3903e-08, -1.6241e-07, -7.8813e-07, -5.5923e-08,
        -1.5264e-07, -2.2744e-07, -5.0213e-08, -1.3367e-08, -2.5169e-08,
        -9.5645e-06, -2.2571e-07, -1.9772e-07, -9.7586e-07, -2.3734e-07,
        -1.2781e-07, -1.8525e-08, -9.6967e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9847e-07, -5.8300e-08, -6.9980e-08, -8.9834e-08, -5.6869e-08,
        -1.8219e-08, -4.7104e-08, -9.2365e-08, -5.8683e-07, -3.8460e-08,
        -9.5851e-08, -1.3291e-07, -2.7948e-08, -1.2646e-08, -1.5198e-08,
        -8.1681e-08, -1.0856e-07, -1.1962e-07, -2.0995e-07, -1.0016e-06,
        -6.1341e-06, -5.0167e-08, -4.6103e-08, -2.0017e-07, -3.4999e-08,
        -3.1182e-08, -5.3652e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0390e-07, -3.8553e-08, -7.5315e-08, -5.7985e-08, -4.9906e-07,
        -1.8023e-08, -3.3348e-08, -9.9104e-08, -4.8833e-08, -9.1645e-08,
        -4.4100e-08, -6.8593e-08, -3.5650e-07, -4.8255e-07, -6.1866e-08,
        -1.6846e-07, -3.7523e-08, -1.4564e-08, -1.9272e-08, -5.4110e-08,
        -1.1882e-07, -2.6896e-07, -2.2391e-07, -1.2736e-07, -4.5739e-08,
        -1.7168e-07, -2.9856e-07, -1.3896e-08, -1.7591e-08,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8130e-06, -7.6301e-08, -1.0392e-07, -1.2144e-07, -7.3481e-07,
        -3.8234e-08, -6.6219e-08, -1.4587e-07, -8.1093e-08, -8.8109e-08,
        -4.5778e-08, -1.3454e-07, -5.5910e-07, -6.3343e-07, -9.1049e-08,
        -2.4132e-07, -6.8786e-08, -3.6802e-08, -2.0385e-08, -1.6206e-07,
        -3.7880e-08, -5.8865e-06, -8.4170e-08, -9.3566e-08, -1.9987e-07,
        -2.0938e-06, -7.5297e-08, -1.3693e-07, -3.3473e-08, -5.2107e-08],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #5250: [tensor([-4.7099e-06, -8.8941e-07, -6.2122e-07, -9.1916e-07, -5.2625e-07,
        -3.5691e-06, -8.1229e-07,  1.6424e-09, -2.4173e-07, -8.3198e-05,
        -1.0198e-06, -1.3769e-05, -1.1840e-06, -1.3192e-05, -1.4229e-06,
        -5.4550e-07, -8.5238e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1102e-07, -1.0352e-07, -1.9556e-07, -5.3383e-07, -9.7157e-08,
        -7.5808e-07, -1.3502e-06, -9.1912e-08, -4.5241e-08, -7.1098e-08,
        -5.9598e-08, -1.4823e-07, -2.4649e-07, -2.8277e-07, -8.6722e-08,
        -1.6585e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2243e-06, -5.1342e-08, -9.5589e-08, -3.3565e-07, -4.4065e-08,
        -3.3591e-07, -1.1057e-06, -3.8997e-08, -3.0473e-08, -2.8807e-08,
        -3.6810e-08, -1.0514e-07, -2.4272e-07, -3.7545e-08, -6.1561e-08,
        -1.5352e-07, -1.5519e-07, -8.1060e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2186e-06, -7.9965e-08, -1.9629e-07, -5.0232e-07, -7.9291e-08,
        -6.0870e-07, -1.4092e-06, -6.3802e-08, -8.4615e-08, -4.4505e-08,
        -5.1092e-08, -1.3799e-07, -2.9918e-07, -1.6749e-07, -1.1303e-06,
        -1.8602e-06, -1.6507e-07, -1.5665e-07, -1.1771e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4135e-06, -1.0799e-06, -4.0985e-08, -9.5482e-08, -7.9807e-08,
        -4.9804e-07, -1.2007e-08, -5.4214e-07, -8.1894e-07, -6.3406e-08,
        -7.6152e-08, -1.5797e-05, -9.7458e-08, -7.4924e-08,  5.5750e-09,
        -6.1929e-08, -2.2899e-07, -3.2160e-07, -1.8172e-07, -1.9681e-06,
        -1.9621e-06, -1.1139e-07, -8.5474e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4653e-06, -2.3023e-06, -2.5686e-08, -2.0743e-07, -2.3271e-07,
        -1.0024e-06, -5.4844e-08, -7.7512e-07, -1.0624e-06, -9.4983e-08,
        -1.9436e-07, -9.5403e-06, -2.7227e-07, -1.3997e-07, -1.0723e-08,
        -1.0985e-07, -4.2608e-07, -3.6289e-07, -4.0160e-07, -2.3299e-07,
        -3.9987e-07, -3.0526e-07, -4.2057e-05, -3.9559e-07, -1.8866e-07,
        -1.8059e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0939e-07, -2.2588e-06, -3.7309e-08, -1.0188e-07, -7.1834e-08,
        -5.4322e-07, -4.7086e-08, -4.0449e-07, -1.1575e-06, -4.7882e-08,
        -7.7272e-08, -1.7487e-05, -1.2139e-07, -4.9973e-08,  2.4359e-08,
        -4.4415e-08, -2.7159e-07, -2.8846e-07, -3.7265e-07, -1.2210e-07,
        -2.2563e-06, -5.8037e-06, -1.1865e-07, -1.4768e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2560e-06, -9.0960e-08, -5.5687e-08, -1.0333e-06, -1.0487e-06,
        -5.0613e-08, -9.6664e-08, -1.2192e-07, -1.3752e-08, -3.4517e-08,
        -2.9900e-08, -3.3933e-07, -1.7296e-07, -3.3750e-06, -2.0668e-06,
        -1.3574e-07, -6.0400e-08, -1.5257e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6896e-06, -1.3821e-07, -7.9149e-08, -8.8965e-07, -1.4163e-06,
        -6.9054e-08, -1.1599e-07, -1.6976e-07, -4.2197e-08, -7.1907e-08,
        -7.2580e-08, -4.1449e-07, -2.5776e-07, -3.6537e-07, -8.6670e-07,
        -2.1236e-05, -1.6339e-07, -3.0069e-07, -3.4225e-07, -9.0670e-08,
        -1.1186e-07, -1.1289e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5223e-07, -8.2864e-08, -4.2722e-08, -4.4500e-07, -6.2163e-07,
        -7.6519e-09, -5.6094e-08, -5.8253e-08, -2.0253e-08, -2.6996e-08,
        -1.9784e-08, -1.8389e-07, -8.3615e-08, -2.3935e-06, -1.3523e-06,
        -8.8132e-08, -1.1074e-07, -9.7670e-08, -4.1071e-08, -5.1392e-08,
        -5.0258e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1674e-06, -1.3880e-06, -2.5700e-07, -2.9372e-07, -1.1995e-07,
        -1.0011e-07, -1.3416e-07, -1.4161e-06, -2.2576e-07, -1.6449e-07,
        -3.5178e-07, -4.5651e-06, -1.1107e-07, -1.4129e-07, -6.4176e-08,
        -1.9399e-06, -1.1267e-07, -1.8187e-07, -9.3708e-08, -7.8445e-08,
        -1.8236e-08, -3.3707e-08, -1.9128e-09, -3.7994e-06, -5.6384e-07,
        -2.2851e-07, -1.2970e-05, -1.3759e-06, -1.8312e-05, -6.1874e-08,
        -1.8448e-07, -7.2544e-08, -2.0155e-07,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7986e-07, -9.9103e-07, -8.3045e-08, -1.1051e-07, -5.3968e-08,
        -4.1067e-08, -6.5738e-08, -9.9515e-07, -1.2030e-07, -7.9836e-08,
        -2.7781e-07, -6.2258e-06, -4.1148e-08, -9.9111e-08, -7.2965e-08,
        -3.1739e-06, -5.9001e-08, -8.2863e-08, -5.0249e-08, -6.6002e-08,
        -1.4784e-08, -1.5422e-08, -5.2388e-09, -6.8696e-06, -6.3360e-08,
        -1.1595e-07, -2.2430e-07, -1.8805e-06, -5.1250e-07, -1.4318e-07,
        -1.7010e-07, -4.6907e-08, -1.7605e-07, -4.7414e-08, -5.1482e-08],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #5300: [tensor([-7.9260e-07, -1.0014e-06, -6.7368e-08, -7.8914e-07, -5.2159e-08,
        -1.9631e-08, -2.4392e-08, -7.9912e-08, -2.4286e-08, -1.9711e-08,
        -4.7511e-08, -7.7822e-08, -3.0882e-08, -4.5664e-08, -1.0959e-08,
        -3.0189e-08, -4.6775e-07, -1.7995e-07, -8.0750e-07, -1.0809e-07,
        -6.1067e-08, -1.1717e-07, -1.0045e-07, -3.1541e-08, -2.5083e-08,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4723e-06, -1.2361e-06, -1.7323e-07, -1.6725e-07, -8.7997e-08,
        -9.4872e-08, -7.0224e-07, -1.6105e-07, -6.5978e-08, -6.2267e-07,
        -8.8455e-08, -8.5067e-06, -1.2575e-07, -1.5496e-07, -6.4914e-07,
        -1.4260e-07, -1.6559e-07, -1.3222e-07, -1.3329e-06, -2.2308e-08,
        -2.7776e-08, -1.0072e-07, -4.1289e-08,  1.5830e-09,  2.0835e-08,
        -3.2858e-06, -1.4884e-07, -3.3319e-07, -2.1340e-07, -1.0427e-05,
        -1.6976e-07, -1.1410e-07, -5.9346e-06, -2.3515e-07, -3.6974e-07,
        -2.8604e-05, -1.4320e-07, -1.7800e-07, -1.5766e-07, -2.0165e-07,
        -1.4733e-07, -1.2864e-07, -1.7065e-07, -2.0790e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5166e-07, -8.0747e-07, -6.5665e-08, -8.2478e-08, -1.7042e-08,
        -2.9294e-08, -7.9411e-07, -6.3554e-08, -2.8999e-08, -9.1422e-07,
        -4.4680e-08, -2.0094e-06, -3.9316e-08, -1.3709e-07, -7.0222e-07,
        -4.6611e-08, -5.0900e-08, -3.9057e-08, -5.6162e-07, -4.7660e-08,
        -1.5708e-08, -4.5989e-08, -2.7589e-08,  1.1887e-09, -2.2028e-08,
        -5.6888e-08, -2.3007e-07, -9.6881e-07, -3.5200e-07, -2.7883e-08,
        -6.3115e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2550e-06, -4.4197e-07, -6.5740e-08, -6.7903e-08, -8.4895e-09,
        -3.8178e-08, -3.9647e-07, -8.3170e-08, -2.3426e-08, -4.2777e-07,
        -7.0551e-08, -4.6492e-06, -4.4473e-08, -6.9620e-08, -4.3340e-07,
        -5.6544e-08, -5.0739e-08, -5.4771e-08, -4.4637e-07, -2.7018e-08,
        -3.2000e-08, -2.5960e-08, -1.6554e-08,  9.1134e-09, -2.0202e-08,
        -5.0667e-06, -5.6441e-07, -8.0563e-08, -8.0754e-08, -4.9099e-08,
        -1.5039e-07, -1.0515e-07, -6.9154e-08, -3.0227e-08, -9.4128e-08,
        -5.3275e-06, -5.9064e-08, -9.9833e-08, -6.9772e-08, -5.1783e-08,
        -1.1636e-06, -8.5862e-08, -3.2067e-07, -6.9650e-08, -2.7773e-08,
        -5.1945e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3500e-06, -2.6203e-06, -2.0109e-07, -3.9995e-07, -8.3044e-08,
        -7.0965e-08, -1.0072e-07, -2.6421e-06, -2.0192e-07, -1.0515e-06,
        -8.8673e-08, -5.3404e-07, -4.8718e-06, -1.6632e-07, -5.1003e-08,
        -1.4040e-06, -2.2365e-07, -3.2629e-07, -1.4632e-07, -1.7097e-08,
        -2.1471e-07, -1.6358e-07, -1.0434e-07, -6.5102e-08, -6.9404e-08,
         1.2954e-08, -3.6496e-08, -5.7064e-07, -3.4414e-06, -2.5240e-05,
        -3.0217e-07, -4.6957e-07, -2.6716e-07, -1.1002e-07, -1.2190e-07,
        -5.7871e-06, -4.0757e-07, -2.0347e-07, -1.5921e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8480e-06, -1.0258e-06, -1.5518e-07, -2.9313e-07, -7.8248e-08,
        -2.9209e-08, -6.3814e-08, -8.6522e-07, -1.6822e-07, -5.5116e-07,
        -1.0870e-07, -2.4389e-07, -1.5844e-06, -1.0796e-07, -8.1275e-09,
        -1.1081e-06, -1.6537e-07, -1.5207e-07, -1.0745e-07, -2.4194e-08,
        -8.7055e-08, -8.5635e-08, -8.2301e-08, -7.2936e-08, -3.7693e-08,
         4.4243e-09,  5.7407e-09, -1.3184e-05, -4.0821e-07, -3.8130e-07,
        -2.0049e-07, -5.9682e-08, -1.7630e-07, -1.0782e-05, -3.7741e-07,
        -8.4662e-06, -1.6392e-07, -4.1236e-07, -1.6517e-05, -1.8524e-07,
        -6.2143e-08, -5.8876e-06, -2.7070e-07, -2.2886e-07, -1.5924e-07,
        -1.3395e-07, -2.6308e-07, -2.6191e-07, -1.9926e-07, -1.9115e-07,
        -1.2350e-07, -1.2730e-07, -1.2821e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2594e-06, -2.9508e-06, -1.2770e-07, -2.5196e-07, -8.2982e-08,
        -4.5276e-08, -6.0214e-08, -8.9774e-07, -1.2362e-07, -8.0162e-07,
        -7.0517e-08, -3.2357e-07, -3.4345e-06, -9.9735e-08, -5.4457e-08,
        -1.0131e-06, -1.3265e-07, -1.3644e-07, -6.6478e-08, -3.2660e-08,
        -7.7283e-08, -1.2844e-07, -9.7026e-08, -4.2631e-08, -3.9345e-08,
        -1.8589e-08, -1.6903e-08, -1.5622e-06, -2.6249e-07, -1.4347e-06,
        -9.8101e-08, -2.1496e-06, -7.2677e-06, -8.4669e-08, -1.0309e-07,
        -8.0521e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3615e-06, -5.5232e-07, -3.4624e-07, -1.6724e-06, -2.4057e-07,
        -3.1517e-07, -4.7697e-07, -5.1543e-07, -3.0950e-07,  1.1402e-07,
        -1.8360e-07, -6.7776e-05, -7.5149e-07, -4.6110e-06, -5.6292e-07,
        -3.9332e-07, -4.6264e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4408e-06, -1.1021e-06, -3.9723e-07, -2.6253e-06, -8.6911e-07,
        -5.9781e-07, -9.8188e-07, -1.4003e-06, -6.3295e-07, -1.8834e-07,
        -6.4694e-07, -1.8392e-04, -1.9511e-06, -1.1901e-05, -8.0048e-07,
        -9.3486e-07, -1.4699e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0662e-06, -3.1120e-07, -1.9276e-07, -1.4073e-06, -1.9336e-07,
        -2.0753e-07, -2.5596e-07, -3.3272e-07, -1.9518e-07, -1.1735e-08,
        -9.4640e-08, -4.1552e-05, -6.2996e-07, -3.4143e-06, -5.5175e-07,
        -1.2185e-07, -3.0471e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9319e-07, -3.3779e-06, -2.5353e-07, -2.5300e-07, -1.8806e-06,
        -1.9292e-07, -3.9451e-07, -2.5666e-07, -9.9992e-08, -2.3979e-08,
        -1.0004e-07, -3.4733e-07, -2.6597e-06, -1.8212e-05, -4.1335e-07,
        -2.3850e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6257e-06, -3.2812e-06, -1.3791e-07, -2.1639e-07, -1.2964e-06,
        -7.8876e-08, -2.5148e-07, -2.0254e-07, -9.0505e-08, -7.8087e-08,
        -5.4647e-08, -3.1739e-07, -3.9042e-06, -2.4265e-07, -1.0644e-06,
        -2.3466e-07, -1.6446e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
