Iter #50: [tensor([-2.5860e-03,  5.2540e-04, -2.8954e-05,  8.9942e-05,  6.6294e-04,
        -2.4774e-05, -1.5224e-05, -4.7998e-05,  6.8631e-04, -4.0777e-04,
        -4.3351e-05,  3.6300e-05,  6.8108e-05, -6.7957e-05,  6.3383e-05,
        -8.0012e-06,  6.3142e-05, -2.9216e-04,  1.8348e-03,  9.1590e-04,
         1.5870e-03,  7.0791e-04,  3.8321e-04,  7.8428e-04,  5.8241e-04,
         5.7275e-04,  5.7703e-04,  6.8456e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7342e-03,  4.2273e-04, -2.0020e-05, -2.1766e-04, -4.7083e-05,
        -1.3582e-04,  5.3857e-04, -1.8633e-04,  3.6969e-04, -2.2282e-06,
         4.6377e-08,  4.3580e-05,  2.7392e-04,  4.3595e-04, -4.3578e-05,
         1.9685e-04,  4.0385e-04,  6.9977e-05, -1.0774e-04, -3.2014e-05,
        -4.8721e-05,  6.1218e-04, -3.6331e-05, -1.4899e-04, -1.4495e-04,
         4.6564e-04, -1.5629e-04, -6.1725e-05,  1.1950e-04,  5.4694e-04,
         1.6744e-05,  3.7563e-04,  2.8820e-04,  1.3676e-04, -3.3450e-04,
         1.0899e-04,  3.9327e-04, -1.3231e-04, -1.0024e-04, -1.1601e-04,
         4.4221e-04,  4.5694e-04,  5.9133e-04,  4.6885e-04,  5.1238e-04,
         3.8559e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0092e-03,  6.5237e-04,  1.1462e-04, -6.9083e-05, -3.3074e-05,
        -8.5493e-05,  8.0731e-04,  4.8179e-06,  5.6686e-04,  1.6969e-04,
         1.2598e-05,  1.9911e-04,  3.3580e-04,  7.0306e-04,  7.1257e-05,
         1.6841e-04,  7.1398e-04,  2.9850e-04, -2.0794e-04,  1.6492e-04,
         2.0855e-05,  7.0832e-04,  2.0676e-05, -2.7518e-04, -1.1158e-04,
         6.3593e-04,  1.9372e-05, -9.6892e-06,  1.7412e-04,  6.8237e-04,
         5.4783e-05,  3.7840e-04,  4.3759e-04,  9.1331e-05, -3.5084e-04,
         1.7255e-04,  5.0178e-04, -6.3472e-06,  4.4145e-05, -1.0884e-04,
         5.6941e-04,  1.1315e-03,  3.6574e-04,  9.7276e-04,  7.2413e-04,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5875e-03,  5.1389e-04,  1.5531e-05, -2.2270e-05, -7.9038e-05,
        -5.7325e-05,  9.3635e-04, -1.4334e-04,  4.4233e-04,  1.5582e-04,
         2.5591e-06,  1.5026e-04,  2.6509e-04,  6.9537e-04,  1.0604e-04,
         1.6536e-04,  6.3833e-04,  6.5374e-05, -1.3306e-04,  1.8317e-04,
        -1.5687e-05,  8.3610e-04,  5.6362e-05, -1.8817e-04, -2.7812e-04,
         7.3417e-04, -1.1238e-04, -7.6945e-06,  1.0948e-04,  7.7099e-04,
        -3.5269e-06,  3.0510e-04,  4.2210e-04,  1.1111e-04, -4.8274e-04,
         2.0194e-04,  3.3246e-04,  6.6961e-06, -1.0933e-04, -1.4870e-04,
         5.9075e-04,  1.2267e-03,  6.5408e-04,  5.1112e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3299e-03,  7.7645e-05,  5.1724e-05, -3.6710e-04, -1.4878e-04,
         1.6893e-04,  8.8786e-05,  7.1152e-04,  1.4407e-04, -7.3504e-05,
        -1.0824e-04,  1.5573e-04,  1.2822e-04, -1.7187e-04,  2.8326e-05,
        -1.2718e-04, -5.1799e-05, -2.3435e-05,  2.0785e-04,  1.7535e-05,
        -4.5430e-05,  1.2954e-03,  4.9025e-04,  9.9273e-04,  7.8649e-04,
         8.5821e-04,  1.7350e-03,  4.9847e-04,  6.7080e-04,  5.9547e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1903e-03, -6.1750e-05, -4.8575e-05, -3.4605e-04, -2.6236e-05,
         8.3282e-05,  1.4876e-04,  6.2999e-04,  6.6469e-05, -2.6936e-04,
         5.7137e-05,  7.0111e-05,  7.9521e-05, -1.9078e-04, -7.1135e-06,
        -8.5064e-05, -1.3187e-06, -1.0364e-04,  2.5489e-04,  1.5846e-05,
        -2.3375e-04,  7.3366e-04,  5.0913e-04,  5.4254e-04,  9.3815e-04,
         1.7680e-03,  9.0355e-04,  6.2461e-04,  6.1248e-04,  5.5620e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9586e-03, -1.4945e-04,  1.4488e-05, -2.2124e-04, -1.9617e-04,
         7.3954e-05,  1.5819e-04,  6.5593e-04,  1.2039e-04, -9.8070e-05,
        -1.9268e-05,  1.1174e-04, -6.7847e-06, -8.4037e-05,  1.4132e-04,
        -1.6906e-04,  1.3754e-04, -4.9260e-05,  2.2841e-04, -6.5328e-05,
        -2.4669e-04,  1.3249e-03,  3.6042e-04,  5.2801e-04,  1.3665e-03,
         9.9301e-04,  9.1477e-04,  1.1721e-03,  5.2561e-04,  5.2090e-04,
         4.7631e-04,  4.9367e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7438e-03,  9.4087e-04,  1.0093e-04, -1.3464e-04, -6.8926e-05,
         4.9912e-05,  1.8475e-05, -2.1542e-04,  2.7775e-04, -2.4917e-04,
         4.4239e-05,  1.0683e-03, -5.4173e-05, -1.4062e-04,  7.5696e-05,
        -1.5239e-04,  7.7551e-04,  9.6227e-04,  1.5108e-03,  1.2313e-03,
         2.1577e-03,  9.8117e-04,  7.5943e-04,  6.3390e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9978e-03,  8.4292e-04, -4.4394e-05, -2.7087e-04,  7.9958e-05,
         1.0101e-05,  9.4536e-05, -2.1802e-04,  1.8435e-04, -3.1912e-04,
         1.6347e-04,  1.0357e-03,  1.8684e-04, -2.4300e-04,  9.0509e-05,
        -1.3676e-04,  1.0335e-03,  1.3003e-03,  1.6403e-03,  1.0370e-03,
         1.3206e-03,  1.8743e-03,  7.3447e-04,  9.8529e-04,  7.1970e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1781e-04,  8.2724e-04,  1.3946e-04, -6.5948e-05,  1.4106e-04,
         6.1516e-05,  2.8240e-05, -2.3322e-04,  3.7219e-04, -3.5135e-04,
         1.2149e-04,  1.1580e-03,  2.6838e-04, -5.9391e-05, -2.5615e-05,
        -2.4846e-04,  9.6790e-04,  1.3080e-03,  1.7796e-03,  1.3967e-03,
         1.8110e-03,  1.3289e-03,  7.9364e-04,  8.0253e-04,  4.5176e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0125e-04,  7.8564e-04,  4.6600e-06, -1.2704e-04,  8.4960e-04,
         2.0864e-05,  2.8220e-04,  7.5560e-04, -6.4135e-05, -7.1154e-05,
         5.5277e-05,  8.7207e-04, -6.9807e-05,  5.7153e-05, -7.1175e-05,
        -1.7488e-04,  1.8927e-03,  9.0235e-04,  1.8580e-03,  9.3242e-04,
         1.6115e-03,  6.8975e-04,  1.0282e-03,  1.6469e-03,  6.5809e-04,
         8.0712e-04,  1.6510e-03,  6.9648e-04,  7.9659e-04,  4.7854e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5715e-05,  8.6848e-04,  1.7579e-04, -7.7206e-05,  1.1171e-03,
         3.2517e-05,  5.5079e-04,  9.3584e-04, -1.5658e-04, -1.1684e-04,
        -2.7215e-04,  1.0463e-03, -5.4768e-05,  1.0440e-04, -7.5269e-05,
        -2.4430e-04,  2.3932e-03,  1.0761e-03,  3.0208e-03,  9.2179e-04,
         1.0367e-03,  7.9556e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 3.2172e-04, -3.3066e-04, -6.5008e-05,  5.2176e-04, -2.6256e-04,
        -9.9067e-06,  7.9223e-04,  1.6784e-04, -2.5042e-05, -1.1779e-04,
         7.5291e-04, -1.8723e-04,  1.0495e-04,  6.6690e-06, -2.3551e-04,
         8.0289e-04,  1.3552e-03,  2.0569e-03,  1.3947e-03,  1.0687e-03,
         2.9341e-03,  5.6623e-04,  7.5335e-04,  7.5415e-04,  5.8682e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1318e-03, -3.1469e-04,  1.1924e-05,  6.7685e-04, -2.0208e-04,
         3.0970e-04,  1.2014e-03,  1.2142e-04, -9.7608e-05,  1.3848e-05,
         1.1460e-03, -3.2150e-04,  2.6402e-04, -1.3324e-04, -2.9633e-04,
         1.1935e-03,  1.8447e-03,  3.0807e-03,  2.6898e-03,  4.6022e-04,
         5.8513e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2462e-03, -1.9172e-04,  3.1727e-05,  5.1925e-04, -3.3293e-04,
         4.7479e-05,  1.2734e-03,  2.2689e-04,  4.1604e-08,  1.2355e-04,
         9.8753e-04, -3.1513e-04,  2.3490e-04,  2.0586e-05, -4.7301e-04,
         1.2013e-03,  1.2588e-03,  2.4125e-03,  1.4098e-03,  4.2411e-03,
         1.9712e-03,  1.1432e-03,  6.9305e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6194e-03, -2.6987e-04, -2.3708e-04, -2.2977e-04, -2.2332e-04,
         6.4268e-05, -5.7706e-05,  9.0247e-05,  1.0205e-03,  1.5793e-04,
         8.7554e-05,  4.4773e-04, -1.6950e-04, -4.6816e-05, -3.3349e-04,
        -2.5982e-04,  9.8655e-04,  1.0942e-03,  1.1540e-03,  1.1130e-03,
         3.1058e-03,  1.4407e-03,  1.3621e-03,  2.9441e-03,  9.2503e-04,
         1.3710e-03,  3.1767e-03,  1.7611e-03,  8.8346e-04,  4.5005e-04],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2054e-04, -2.8164e-04,  1.2290e-04, -1.0065e-04, -1.7650e-04,
         9.6723e-05, -1.0066e-04, -3.2998e-05,  9.6643e-04,  1.7067e-04,
         2.6247e-05,  4.9747e-04, -1.9736e-04, -2.2643e-04, -4.4294e-05,
        -3.5310e-04,  8.1819e-04,  1.1571e-03,  1.1650e-03,  1.1758e-03,
         3.4183e-03,  1.7208e-03,  4.3694e-04,  1.3234e-03,  7.6996e-04,
         8.0240e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9992e-03, -2.2089e-04, -1.8179e-04, -6.6836e-05, -1.7425e-04,
         1.0614e-04, -3.4977e-05, -1.5023e-04,  7.6197e-04, -3.8278e-05,
        -1.0343e-05,  2.6510e-04, -5.8888e-05, -2.0410e-04, -4.1890e-05,
        -2.2603e-04,  1.1078e-03,  1.0940e-03,  1.0847e-03,  1.2624e-03,
         3.2166e-03,  8.6282e-04,  9.6189e-04,  1.3211e-03,  2.1701e-03,
         8.8806e-04,  1.1503e-03,  8.6137e-04,  2.5268e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2909e-04, -7.2229e-05, -8.4191e-05, -2.4486e-05, -2.9787e-04,
         1.0127e-03,  1.9554e-04,  4.3265e-05,  1.0334e-03,  3.3116e-04,
        -2.9713e-04, -3.3074e-04,  4.0812e-03,  8.8453e-04,  4.6776e-03,
         1.0101e-03,  1.4024e-03,  1.1949e-03,  1.7265e-03,  9.7755e-04,
         1.0144e-03,  1.2762e-03,  3.8816e-04,  5.9761e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1207e-04, -2.3378e-04,  7.1088e-05, -3.4339e-04, -3.9810e-04,
         1.3468e-03,  2.5239e-04, -2.0706e-04,  1.1137e-03,  1.6089e-05,
        -3.7222e-04, -2.8233e-04,  4.8353e-03,  1.1763e-03,  8.8238e-04,
         4.3255e-03,  1.6101e-03,  1.2943e-03,  3.7816e-03,  1.6218e-03,
         4.6913e-04,  5.2847e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4280e-04, -6.9480e-05, -1.8453e-04, -2.8373e-04, -2.2719e-04,
         8.3189e-04,  1.8892e-04, -5.5046e-05,  8.1100e-04,  1.0921e-04,
        -3.6347e-04, -3.3496e-04,  3.9220e-03,  1.3175e-03,  1.3964e-03,
         1.7707e-03,  1.4423e-03,  3.1673e-03,  1.7216e-03,  1.8986e-03,
         2.8693e-03,  1.7499e-03,  6.1333e-04,  6.9593e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6649e-04, -2.2905e-04, -6.9083e-05, -1.3758e-04, -3.0022e-04,
         7.9527e-04,  2.8161e-04,  1.9330e-05,  8.0048e-04, -1.7812e-04,
        -6.1207e-05, -1.1429e-04, -4.1222e-04,  1.3399e-03,  8.7433e-04,
         9.4610e-04,  1.0027e-03,  1.9782e-03,  2.9943e-03,  9.8117e-04,
         6.8098e-04,  4.1299e-03,  9.7826e-04,  9.0555e-04,  7.0745e-04,
         6.6521e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5197e-03, -1.0921e-04, -4.5442e-05, -1.7201e-04, -2.6502e-04,
         6.8129e-04,  1.1538e-04, -9.7055e-06,  5.8773e-04, -8.1379e-05,
        -4.0337e-06, -3.6377e-05, -1.6471e-04,  8.7365e-04,  2.8851e-03,
         1.0040e-03,  7.3401e-04,  6.9309e-04,  8.6422e-04,  2.2894e-03,
         7.1865e-04,  8.9734e-04,  3.9344e-04,  8.3426e-04,  1.6063e-03,
         9.7014e-04,  6.0959e-04,  3.2678e-04,  4.4533e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4246e-04, -4.2299e-05,  1.2741e-06, -1.2221e-04, -2.3814e-04,
         6.1657e-04,  7.4819e-05, -7.4281e-05,  6.3731e-04, -3.6499e-05,
        -7.6048e-05, -1.4176e-04, -1.0420e-04,  9.3407e-04,  7.7272e-04,
         2.0910e-04,  7.3665e-04,  7.2006e-04,  6.3897e-04,  7.3846e-04,
         6.4104e-04,  7.3579e-04,  1.6334e-03,  6.2235e-04,  5.8593e-04,
         4.9464e-04,  3.7363e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 7.5538e-04,  8.9990e-06,  1.7764e-04,  1.5556e-05, -2.2229e-05,
         7.6885e-05, -1.5159e-04,  1.3655e-04,  3.1348e-05,  4.6937e-05,
         2.3768e-04, -5.8990e-05,  8.0243e-05, -9.6174e-05,  6.6769e-05,
         1.0820e-05,  7.0721e-04,  3.1957e-04,  8.2468e-04,  5.8655e-04,
         4.3606e-04,  3.9185e-04,  5.7479e-04,  2.1385e-04,  1.6713e-03,
         8.4720e-04,  1.6819e-03,  9.3277e-03,  1.4043e-04,  6.7155e-04,
         2.5510e-04,  1.4092e-05, -9.2912e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2175e-04, -8.6906e-06,  2.2578e-05, -8.3513e-05,  1.0472e-04,
         2.2745e-04, -1.7938e-06, -4.4777e-05,  1.1596e-05,  1.1868e-04,
        -4.2589e-05, -9.5216e-06,  3.6363e-05,  1.2961e-05, -1.3005e-05,
         1.1245e-04, -6.2077e-05,  2.5010e-05,  3.9711e-05,  8.6693e-04,
         4.8765e-04,  1.2004e-04,  4.5377e-04,  7.8669e-03,  4.2920e-04,
         3.0912e-04,  2.1017e-04,  2.1669e-04,  7.5515e-04,  2.7054e-04,
         2.5145e-04,  2.0624e-04,  3.9736e-04,  4.3022e-04,  8.3783e-05,
        -9.9378e-06, -2.2051e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7734e-04,  1.3565e-05,  8.0440e-05, -1.5275e-04,  1.3017e-04,
         2.8437e-04, -1.0822e-05, -4.9201e-05,  3.3670e-06,  1.3136e-04,
         7.1122e-06, -9.8781e-06, -4.6625e-06,  8.0446e-05,  8.5299e-05,
         1.8159e-04, -1.0423e-04, -3.2899e-06,  1.9504e-05,  4.1267e-04,
         4.1577e-04,  1.8528e-04,  5.7985e-04,  1.0261e-02,  4.3067e-04,
         2.8323e-04,  3.3817e-04,  3.2525e-04,  5.9081e-04,  2.3308e-04,
         3.4899e-04,  2.6553e-04,  5.9164e-04,  5.2591e-04,  1.3782e-04,
         1.6456e-04, -1.2066e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6462e-04,  4.9772e-05,  2.9179e-05, -1.4456e-04,  8.3576e-05,
         2.4218e-04,  1.9633e-05, -3.6584e-05,  3.3333e-05,  1.0954e-04,
        -1.0268e-06,  3.1560e-05,  1.5070e-05,  8.6361e-05,  5.8513e-05,
         1.0548e-04, -8.4020e-05,  2.6065e-05,  3.1957e-05,  4.1805e-04,
         1.6753e-04,  8.9981e-05,  5.0746e-04,  5.5773e-03,  5.6844e-04,
         2.1406e-04,  2.3554e-04,  8.2001e-04,  1.3959e-04,  7.7010e-04,
         2.5595e-04,  1.7815e-04,  2.1566e-04,  4.3674e-04,  4.6348e-04,
         1.2701e-04,  9.1537e-05, -1.5489e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5192e-04, -5.4243e-05,  3.2645e-05, -5.8445e-06,  1.0570e-04,
        -5.6411e-05,  7.2984e-05, -1.0464e-04, -2.4923e-05, -2.5281e-04,
         8.6178e-06,  2.1422e-06, -1.0982e-04, -5.0786e-06,  1.8273e-04,
        -1.1318e-05, -1.9780e-05,  7.5723e-03,  4.4996e-04,  4.0813e-03,
         8.7744e-04,  1.7133e-03,  2.0454e-03,  8.2071e-04,  5.9649e-04,
         1.0136e-02,  5.4258e-04, -1.3145e-04, -1.8817e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2197e-03, -4.7165e-05, -3.6857e-05,  8.2199e-05,  1.6786e-04,
        -8.1199e-05,  8.1834e-05, -1.7358e-04,  1.2718e-04, -2.9545e-04,
        -1.2131e-05,  4.8734e-05, -1.9763e-04,  8.8229e-05,  1.4497e-04,
        -1.9212e-05,  1.1151e-04,  3.5860e-04,  1.2318e-02,  1.8276e-04,
         8.4180e-05,  4.2570e-04,  1.1228e-03,  1.3649e-02,  1.0980e-03,
         4.8350e-04, -4.3270e-05,  3.6982e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3795e-03, -6.6045e-07, -4.8630e-05,  1.5410e-04,  2.3901e-04,
        -1.4032e-04,  1.8680e-04, -2.5436e-04,  1.2459e-04, -4.2787e-04,
        -4.7178e-05,  1.6344e-04, -7.2741e-05,  3.0265e-04,  2.6687e-04,
         3.5239e-05,  1.5408e-05,  1.0871e-02,  9.1064e-04,  5.4353e-03,
         1.9642e-04,  4.4938e-04,  2.1370e-04,  1.5380e-05,  1.9080e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6357e-08, -1.7583e-04,  6.3828e-04,  8.9458e-05,  2.4816e-04,
         4.1631e-04,  4.0499e-06,  2.4271e-04,  2.4355e-04, -8.8426e-05,
         2.6191e-04,  7.6626e-04,  9.8054e-04,  5.1407e-03,  1.8507e-02,
         1.0468e-03, -2.3252e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8865e-03, -1.3430e-04,  3.8571e-04,  1.4081e-04,  1.3297e-04,
         2.2661e-04,  3.1359e-05,  6.9045e-05,  1.5166e-04,  1.4651e-05,
         1.3900e-04,  8.5803e-04,  7.8364e-04,  2.0958e-03,  1.2185e-03,
         8.3287e-04, -5.2145e-06,  4.3322e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1376e-03, -1.4145e-04,  3.7085e-04, -5.4697e-05,  2.1175e-04,
         2.5307e-04,  8.1623e-05,  1.1901e-04,  7.1765e-05, -1.0687e-04,
        -9.1642e-05,  1.0520e-03,  5.3600e-04,  5.3379e-03,  2.1537e-02,
         7.8303e-04, -2.9885e-04, -6.0028e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1471e-03,  2.7105e-04,  1.5708e-04, -4.7777e-05,  3.1167e-04,
        -7.5575e-05,  1.0942e-05,  5.1116e-04,  4.0040e-05,  7.4748e-05,
        -4.1572e-05,  7.2892e-05,  1.3927e-02,  4.6725e-04,  1.0246e-03,
         6.7896e-04,  3.2900e-03,  8.5852e-04,  3.2747e-03,  1.4839e-02,
        -1.5892e-04, -1.5632e-04, -7.6815e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3414e-03,  2.5626e-04,  1.3077e-04, -3.7971e-05,  3.5108e-04,
        -2.0955e-04, -1.2228e-04,  4.9450e-04,  5.4989e-05,  4.7323e-05,
        -4.7909e-05,  4.2664e-05,  1.7477e-02,  6.8927e-04,  1.3022e-03,
         5.8415e-04,  5.4721e-03,  7.8712e-04,  2.1636e-06,  5.5354e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.0106e-03, -1.2454e-04,  2.0074e-05,  1.3545e-04, -1.4665e-04,
        -9.6357e-05,  1.3703e-04,  7.4808e-05,  3.1284e-04, -7.8408e-05,
         1.0678e-05,  3.9755e-04, -1.5573e-05,  1.0595e-04,  5.1760e-05,
         2.9834e-02,  8.2509e-06, -1.0906e-04, -2.2165e-04, -1.9558e-05,
         4.7895e-02,  2.8325e-05, -1.5819e-05,  3.7972e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0164e-03, -1.4641e-04, -4.6666e-05,  1.1177e-04, -7.3231e-05,
        -8.6807e-05,  5.5497e-05, -2.4859e-05,  2.8793e-04, -2.6811e-05,
         3.3497e-05,  1.5630e-04, -8.8635e-05, -1.3802e-05,  4.1860e-05,
         3.7895e-02, -1.2399e-04, -1.5560e-04,  9.8233e-06, -1.6894e-04,
        -2.5554e-06, -2.3129e-04,  1.7790e-04,  3.0288e-02,  1.1142e-04,
        -1.9568e-04, -1.4647e-05, -2.8772e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8477e-03, -1.5780e-04,  1.2044e-05,  9.3682e-05, -7.8205e-05,
        -9.1666e-05,  9.5032e-05, -6.9318e-05,  3.4303e-04,  3.4776e-05,
         3.5602e-05,  4.5468e-04, -9.5602e-05,  5.1389e-05,  1.4773e-05,
         3.0121e-04,  3.0491e-02,  3.2964e-05, -1.3034e-05,  1.8616e-04,
         4.7879e-02,  2.2001e-04,  6.4266e-05, -1.9603e-04,  3.1678e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5346e-04,  2.8310e-04,  8.2236e-05,  1.9141e-04, -6.2453e-05,
         1.3408e-05,  1.0975e-04,  2.2619e-04,  7.1478e-04, -2.6617e-05,
        -7.9122e-06,  8.0537e-04, -2.9329e-05,  2.1095e-05,  1.5444e-06,
         7.3479e-04,  3.7542e-02,  9.9877e-05,  1.7382e-04,  5.4489e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2256e-03,  6.0321e-06,  8.4180e-05,  1.4726e-04, -3.8513e-05,
         9.4349e-06,  1.1105e-04,  6.5646e-05,  4.3068e-04, -6.2409e-05,
         6.3310e-05,  6.3551e-04, -5.0370e-06,  2.6493e-05,  8.0302e-06,
         8.3149e-05, -3.2892e-05,  3.9304e-02,  2.7052e-04,  2.2973e-04,
        -2.0407e-04, -6.1812e-05,  4.4260e-05, -1.6273e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6727e-04, -6.1354e-05,  5.4444e-04, -7.2514e-05,  1.8328e-04,
        -8.9420e-05, -8.5314e-05,  2.9975e-05,  9.8895e-05,  4.2020e-05,
         3.8043e-02, -3.0655e-05,  1.6978e-02, -2.5319e-04,  8.7845e-04,
        -1.8725e-05, -1.6814e-04,  3.5400e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3414e-04, -1.6236e-04,  2.0643e-04, -5.0130e-05,  4.4351e-05,
        -3.1061e-04, -8.8806e-05,  9.9274e-05,  7.5557e-05,  4.0594e-05,
         2.7208e-04,  7.9885e-03,  5.4778e-02,  4.1969e-05, -6.4998e-05,
         4.1452e-04, -1.6899e-04, -1.1673e-05,  2.4561e-02,  4.9511e-05,
        -1.1113e-04, -1.4488e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1433e-05, -4.0009e-05,  4.1988e-04,  9.7982e-06,  1.4921e-04,
        -9.6601e-05, -1.1556e-04,  3.4928e-05,  2.5216e-06,  1.5390e-04,
         3.4298e-02,  1.5871e-04,  3.1374e-02, -1.9405e-04, -1.2223e-04,
         3.6469e-05,  2.9389e-05, -1.1275e-04,  6.6998e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4081e-05, -1.7775e-04,  4.1880e-04, -1.0851e-04,  1.0729e-04,
        -5.7777e-04, -2.0554e-04,  2.6091e-04,  7.6053e-05,  8.3205e-05,
         6.1579e-02, -1.7558e-05,  5.2001e-02, -5.3980e-04, -4.8354e-05,
        -1.2992e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6232e-04, -1.2618e-04,  3.7498e-04, -1.6440e-05,  9.2604e-05,
        -1.7787e-04, -8.1744e-05,  7.4030e-05,  1.6672e-04,  9.3369e-06,
        -2.5560e-04,  9.7829e-03, -7.0974e-05,  1.1997e-04,  6.7121e-02,
        -1.4304e-04, -1.3951e-04,  4.5280e-05, -1.3916e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3310e-05, -5.9568e-05,  3.7765e-04,  1.8428e-05,  4.8552e-05,
        -9.1387e-05, -6.9041e-05,  1.9277e-04,  1.4012e-04,  1.3267e-04,
         3.4986e-04,  2.7362e-02, -1.3647e-05, -1.5724e-04,  2.5887e-04,
         4.0006e-02, -6.9813e-05,  2.2487e-04, -1.6645e-04, -1.5248e-05,
         1.5333e-05, -5.6645e-05, -6.5750e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7720e-04,  1.2053e-04, -1.8325e-04, -2.9020e-05, -2.2173e-04,
         3.6391e-04,  3.0496e-05, -1.7748e-04, -1.3402e-04, -1.4288e-04,
         1.1200e-05, -1.4325e-02,  7.6464e-05,  6.4513e-05, -5.8774e-02,
        -1.5299e-04,  8.7319e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 2.3075e-04,  7.3856e-05,  4.4922e-05,  7.4666e-06, -2.3018e-05,
         6.8855e-05,  2.8244e-05,  4.4307e-05,  6.4721e-02, -1.7156e-04,
         3.6091e-04, -2.7468e-04,  3.2149e-04,  2.7424e-04,  3.5244e-04,
         1.7771e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6924e-04,  1.0900e-04,  1.0186e-04, -1.9038e-04,  2.0351e-04,
        -1.9957e-04, -8.2027e-05, -3.1684e-04, -1.1531e-01,  1.3216e-04,
         2.6691e-04,  5.6035e-05, -4.3083e-04, -1.8442e-04,  7.7780e-05,
         4.1200e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5666e-05,  1.0359e-04, -1.1770e-04,  3.9378e-05,  2.3717e-05,
         2.6290e-04, -7.0879e-05, -1.9243e-05,  4.7097e-05, -8.1357e-05,
         9.8257e-05, -1.9291e-05,  2.8578e-05,  5.9444e-05,  1.7207e-04,
         1.2669e-04,  6.8718e-02,  9.2771e-05, -5.9780e-05, -7.8715e-05,
         5.8715e-05,  1.4079e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8397e-03,  1.1606e-04,  1.9744e-04,  1.4309e-05, -1.9903e-05,
        -2.4021e-04,  1.4384e-04, -2.9512e-05,  6.8206e-05,  1.4099e-04,
        -1.5510e-04, -9.3124e-05, -1.3361e-05,  4.5400e-05, -7.0050e-02,
         2.7284e-04, -2.1345e-04,  2.5487e-04, -4.7671e-02,  3.0653e-04,
         1.0367e-04, -1.6181e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4791e-04,  3.7579e-05, -3.4881e-05, -6.3402e-05, -8.9610e-06,
         3.3608e-04,  2.9232e-05, -1.7736e-05, -4.4907e-05, -7.5420e-05,
         3.0368e-04, -8.3251e-05, -5.0361e-05, -4.6322e-05,  1.0516e-04,
         1.3126e-04,  5.4576e-02,  9.2698e-05,  1.9418e-04, -1.6193e-05,
        -9.6476e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7605e-04,  8.2578e-05,  4.1038e-05,  9.6398e-05, -8.2673e-05,
         1.3649e-05,  1.0725e-05,  5.3750e-05,  7.8087e-05, -6.4488e-05,
        -2.9519e-05,  1.5903e-02, -2.1135e-04,  2.5303e-02,  1.0378e-07,
         2.6020e-02,  1.0407e-05, -7.2038e-05,  5.9167e-05,  2.6999e-04,
         8.6142e-06, -5.6155e-05, -5.0257e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6001e-05, -1.2456e-05,  1.1716e-04,  1.1573e-04, -1.9871e-06,
        -6.7610e-05,  1.6532e-04, -4.2507e-05,  4.2912e-05,  1.3700e-05,
         1.0282e-06,  8.4301e-03, -2.8514e-04,  4.9123e-02, -3.0481e-05,
        -1.2003e-04, -3.2205e-05,  3.6302e-02, -1.1524e-04, -1.6595e-04,
        -1.8056e-05,  2.8982e-04, -6.5471e-05, -4.0159e-05,  1.4434e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2123e-05, -1.4765e-04,  4.0957e-05,  1.2094e-05, -1.5273e-05,
        -3.6456e-05,  3.7980e-05, -1.5323e-05,  6.4634e-05, -4.5228e-06,
        -1.9412e-06,  3.2596e-02,  1.1742e-04, -4.4965e-05, -3.6067e-05,
         2.6168e-05,  1.0869e-05,  7.0612e-02,  2.5528e-04, -1.4005e-04,
        -1.1622e-04, -1.5430e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2680e-04,  2.3239e-05,  3.6162e-05, -1.4163e-05,  7.2068e-06,
         1.8528e-05,  6.5865e-05,  8.5251e-05, -5.1698e-05,  6.6774e-05,
         1.3075e-04,  4.6979e-05, -3.2394e-05, -3.1262e-05, -2.8684e-05,
         1.7266e-06,  3.3034e-05,  2.8432e-05,  3.6237e-05, -1.7131e-05,
        -2.3204e-04, -7.9311e-03, -4.4123e-02,  1.0408e-04, -8.3359e-03,
        -2.6936e-02,  8.3900e-05,  8.7365e-05,  1.2630e-04,  7.9967e-05,
        -9.0816e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7065e-04, -7.3929e-06, -1.0772e-04, -1.0864e-05, -8.3251e-05,
        -4.7477e-05, -6.0105e-05, -1.2325e-04,  5.9495e-05, -1.1369e-04,
        -7.1552e-05, -9.1382e-05,  4.6112e-05, -4.5557e-07,  7.6383e-05,
         1.1152e-05, -3.6442e-05, -8.1615e-05, -1.3445e-05, -1.2630e-07,
        -1.3576e-04,  3.0547e-02, -1.5126e-05,  5.2586e-02,  3.3340e-04,
         1.0258e-04,  1.9664e-04, -1.5020e-04,  1.9506e-02,  2.6385e-04,
         9.0817e-05,  1.1039e-04, -1.1843e-04, -4.9665e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0428e-04,  3.2654e-05,  1.0890e-04,  4.3581e-05, -3.0306e-05,
         8.6327e-05,  3.2917e-05,  2.0439e-06, -1.9359e-06,  7.8912e-05,
         9.2826e-05,  9.8874e-05, -6.7485e-05, -4.0818e-05, -4.0764e-05,
         2.5289e-05,  4.5751e-05,  7.9106e-05, -6.5640e-05, -1.4282e-05,
         4.9839e-06,  3.7255e-05, -1.6304e-02,  7.0976e-05, -3.5239e-02,
         9.7889e-05, -2.2636e-05,  9.3008e-06,  1.0112e-05, -4.7638e-02,
         1.5488e-04,  3.8662e-05, -3.6177e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4260e-04, -5.3391e-06, -9.8391e-05, -4.0565e-05,  3.5753e-05,
        -2.1727e-04,  1.6704e-05, -1.7015e-05,  8.9435e-05,  2.4353e-06,
         3.5363e-02, -3.0917e-04,  5.6484e-02, -2.2272e-05, -3.6911e-04,
         8.8844e-05,  8.9481e-05,  3.7231e-05,  1.0186e-04,  7.0582e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 3.1824e-05,  7.2027e-05,  1.0006e-04, -7.4989e-06,  1.0890e-05,
         4.4234e-05,  5.9833e-05,  3.2474e-05, -1.0808e-05,  4.6983e-06,
         1.1159e-05, -4.4174e-05,  7.3389e-05,  4.6445e-05, -3.4727e-05,
        -1.8217e-05,  2.2010e-05, -3.6630e-05, -4.9275e-05, -4.4308e-05,
         7.5761e-05,  7.5360e-02, -1.3645e-04, -1.1582e-05,  8.6916e-06,
        -2.7717e-05,  1.9388e-02, -3.6322e-08, -6.2087e-05, -4.4641e-05,
         4.5690e-05,  6.2407e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0045e-04,  1.0288e-04,  9.9150e-06,  3.8962e-05,  7.0804e-06,
         6.3068e-05,  3.3222e-05,  1.7123e-05, -1.3263e-05,  3.6903e-05,
         3.3626e-05,  6.0909e-05,  2.0348e-04,  6.1275e-06,  3.2997e-05,
        -1.0606e-05,  2.7446e-05, -1.1380e-05, -4.5679e-05, -5.8294e-06,
        -1.0746e-05,  1.4254e-04,  5.9275e-04,  3.5362e-02,  3.4933e-05,
         9.0817e-05,  1.4152e-04,  1.0966e-04, -1.9603e-06,  6.1972e-05,
        -6.1743e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1899e-04, -2.9321e-05, -1.7151e-04,  5.5477e-06, -7.6727e-05,
         4.0477e-05, -3.6084e-05,  3.8510e-06,  2.7626e-05, -2.3253e-05,
         3.9903e-05, -1.9839e-05,  3.0021e-05,  5.0226e-05,  6.2449e-05,
        -6.7436e-05,  7.8086e-06,  6.8638e-05,  5.4115e-05, -1.5181e-05,
         5.8652e-05, -2.9589e-02,  1.0064e-04, -2.8750e-02,  3.0024e-05,
         3.1125e-04, -4.7550e-02, -1.8248e-04,  2.4078e-04,  8.5507e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6208e-04,  5.9303e-06,  1.3099e-04,  8.3001e-05,  1.4852e-05,
         1.0578e-04,  3.5655e-05, -7.7498e-05, -7.6442e-06,  5.6094e-05,
        -6.6883e-05,  3.4292e-05,  8.4132e-05, -4.9965e-05, -1.0020e-05,
        -6.1592e-05,  2.1872e-05,  3.0183e-05, -6.6069e-05, -8.2588e-05,
        -1.7100e-05,  8.8931e-02, -2.3227e-04,  3.1217e-02, -6.4001e-06,
         7.6093e-05, -8.5060e-05, -2.4627e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4548e-04,  6.8959e-05, -2.6342e-06,  2.2646e-05,  5.8506e-06,
         3.7383e-05, -8.4066e-06,  4.2668e-06,  3.8204e-05,  3.4361e-05,
         2.1160e-06,  2.4220e-05,  1.3644e-04, -3.5882e-06,  3.4256e-05,
        -3.4394e-05, -7.3954e-06,  1.8711e-05, -1.5742e-05, -4.8366e-05,
         6.2652e-06,  2.6949e-02,  4.9354e-05,  6.6800e-05,  8.0273e-06,
         2.3040e-02,  8.9009e-05,  4.2394e-04, -4.9917e-05,  1.7074e-05,
        -6.2615e-05, -1.7584e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5977e-05,  9.1960e-06,  2.4726e-05,  3.4659e-05, -5.6803e-05,
        -3.8274e-05,  4.8346e-05,  4.1443e-05,  4.9721e-05,  3.8695e-05,
        -1.4153e-05,  3.5501e-05,  2.8183e-05,  4.4229e-05, -1.1192e-05,
         1.1428e-05,  5.6317e-07,  1.6759e-05, -1.5300e-05,  9.1120e-06,
        -1.1505e-05,  5.0448e-05,  3.6620e-05,  6.0631e-05, -4.3335e-05,
         3.3272e-05,  1.7451e-05, -3.6559e-05, -1.5009e-05, -4.5522e-05,
        -2.3686e-02, -7.1447e-05, -1.2104e-02,  7.3204e-05, -6.3747e-02,
         4.6698e-05, -9.2603e-05, -1.8426e-04, -3.7494e-05, -7.8720e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2638e-04,  3.9001e-05,  2.0125e-05, -1.1077e-05,  1.0273e-04,
         3.8726e-05, -1.2494e-05, -1.9910e-05, -2.1536e-05,  2.2010e-05,
         2.9400e-05,  8.9513e-06, -1.0931e-05, -6.9573e-05,  4.4983e-05,
         6.5208e-05,  5.3183e-05, -1.4686e-05,  3.8070e-05, -4.2081e-07,
         3.0143e-05,  5.5663e-05,  6.0825e-06, -3.1012e-05,  2.6255e-05,
         3.6208e-05,  6.3114e-05,  3.1778e-05,  6.7622e-06,  2.7265e-06,
        -4.0658e-05,  3.1691e-04,  3.4158e-02,  1.3771e-04,  5.0742e-04,
         2.6943e-04,  2.2670e-05,  3.2047e-02,  2.6327e-05, -4.7830e-05,
         3.6803e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3444e-04,  3.2899e-05, -3.4837e-05, -3.4202e-05,  1.0106e-04,
         2.3994e-05,  1.5659e-05,  4.1667e-06, -3.4710e-05, -3.0718e-05,
         2.8508e-05, -1.9371e-05,  4.4293e-06, -4.5219e-05,  1.1528e-05,
         4.6908e-06,  1.0211e-04, -3.8798e-05,  6.0701e-06,  1.5263e-05,
         2.9025e-05,  1.3112e-05,  2.8532e-06, -5.9737e-05,  4.2260e-05,
         1.4147e-05,  6.7806e-05, -1.0715e-06,  7.0097e-06, -1.5153e-05,
         6.3729e-02,  9.3913e-05,  7.2232e-06,  1.8446e-02,  7.3439e-05,
         1.0968e-04, -2.5744e-06,  7.2845e-05, -5.8004e-06,  1.8818e-05,
         2.4302e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4403e-04,  7.2418e-05, -4.8173e-06, -5.6322e-06,  1.2172e-04,
         7.3866e-05,  5.8080e-05, -2.2320e-05,  1.0337e-05,  2.2661e-05,
         4.0142e-05,  8.4117e-06, -1.8821e-05, -3.8200e-05,  3.1195e-05,
         1.9761e-05,  4.9213e-05, -7.6854e-06,  2.9801e-05,  9.1222e-06,
         3.3865e-05,  4.8159e-05, -4.1240e-05, -2.2514e-05,  2.7284e-05,
         1.5790e-06,  1.0958e-05,  1.5857e-05, -5.6264e-06,  3.3009e-06,
         4.7306e-02, -2.0978e-06,  9.6907e-05,  1.4326e-02,  8.4692e-05,
         2.1485e-04,  6.4163e-05,  3.6655e-05,  3.8115e-05,  7.5028e-05,
         4.9955e-05, -3.2948e-05,  5.7119e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2057e-04,  9.4084e-05,  3.2652e-05,  1.1654e-05,  5.6418e-05,
         1.0230e-04,  3.3466e-05, -3.5593e-05, -2.5246e-05,  4.1019e-06,
         3.9134e-05,  3.2798e-05,  1.3991e-05, -5.9680e-05,  2.4366e-05,
         4.3856e-05,  4.0108e-05,  3.8446e-06,  5.6800e-05,  8.0407e-06,
        -4.4383e-08,  2.9688e-05,  2.7337e-05, -1.0286e-04,  2.2186e-05,
         1.2307e-05,  3.8645e-05,  2.0648e-05, -6.2198e-06,  1.6964e-06,
         2.9279e-02,  4.0670e-05,  5.2868e-05,  2.8792e-02, -5.9579e-05,
         3.5652e-02,  5.6866e-05,  3.2973e-05, -4.5440e-05,  1.1726e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5203e-04,  2.7328e-05,  1.1473e-05,  2.2664e-05,  3.4826e-05,
         4.3638e-05, -8.3070e-06, -7.0392e-06, -6.3584e-05,  3.2329e-05,
         1.9122e-05,  3.3759e-05, -3.9228e-05, -6.6207e-05,  4.5189e-06,
         3.5276e-05,  6.6559e-05, -1.2415e-05,  2.1312e-05,  3.8420e-05,
         2.8147e-05,  5.9904e-05,  2.3567e-05, -3.5328e-05,  1.1269e-04,
         5.0972e-05,  7.0782e-05,  3.5551e-05, -2.8332e-05, -1.1129e-05,
         2.4724e-02,  6.8403e-05,  4.9033e-05,  4.6132e-05, -1.1029e-05,
         6.0782e-02,  9.5427e-05, -4.1510e-05,  7.6868e-05, -1.1122e-04,
         4.1806e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6560e-04,  2.8734e-04, -1.6055e-04, -4.3604e-05,  4.2422e-04,
         1.9908e-07,  5.7908e-05, -6.1555e-06,  1.8049e-06,  4.6069e-05,
         1.5974e-04,  5.3477e-05,  6.5356e-05, -4.8438e-06,  7.6033e-05,
         6.0756e-05,  1.3403e-04,  1.0047e-04,  6.0965e-05,  4.6956e-05,
         1.0455e-04,  3.2359e-04, -6.3419e-05,  6.6384e-05, -1.1698e-05,
         1.0339e-04,  3.5285e-04,  8.8054e-05, -3.2386e-05, -1.7840e-05,
         4.9708e-04,  2.3231e-03,  6.7089e-04,  3.2381e-04, -2.2565e-04,
         1.6138e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #350: [tensor([-4.1715e-04,  4.3632e-05, -9.6093e-05, -2.1724e-05, -5.5643e-05,
        -1.4688e-04, -3.0862e-05, -3.6698e-05, -2.5471e-05,  1.3272e-06,
         3.2910e-05, -8.1575e-05, -1.4665e-06,  1.0544e-05, -6.9470e-05,
        -4.8604e-05,  1.4062e-06, -3.0582e-05,  4.0374e-02,  2.6426e-05,
         2.5792e-04,  1.7073e-05,  1.0088e-01, -3.4974e-05,  1.0575e-04,
         1.0168e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9954e-04, -8.2763e-05,  1.3585e-05,  4.5507e-05,  1.3179e-04,
         9.8349e-05,  3.8554e-06,  1.5011e-04,  1.5737e-04, -7.4481e-06,
         5.2767e-05, -9.9428e-05,  7.2702e-05, -2.4076e-05,  1.0130e-01,
         3.1748e-05, -1.1387e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1716e-04,  4.7273e-06,  1.6550e-05,  5.2946e-06, -3.1449e-05,
        -8.4687e-06,  3.1899e-05,  7.1014e-05, -1.5871e-04, -8.6935e-06,
         5.7482e-06,  3.9743e-05,  4.3712e-05,  8.1928e-05,  7.2956e-02,
         3.9064e-05,  5.6750e-04,  5.8596e-02,  9.3247e-05,  1.3056e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1744e-04, -8.3440e-06,  7.8746e-05, -3.8765e-05, -9.5211e-05,
         3.8335e-05,  3.2881e-05,  1.0543e-05,  1.0400e-04,  7.5534e-05,
         3.6243e-05,  6.1681e-05,  1.7240e-05,  1.9748e-04, -3.1945e-02,
         7.5111e-05, -2.5294e-04, -8.4061e-02, -1.2625e-05, -4.5003e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9333e-05, -1.4299e-05,  3.0460e-05, -2.6558e-05,  6.2518e-05,
         8.0843e-05, -4.9460e-05, -5.9489e-05, -7.1142e-05, -7.7651e-05,
        -4.5689e-05,  1.2346e-04,  7.3941e-06, -1.1411e-06, -1.1324e-01,
         6.2435e-05, -4.5885e-05, -3.3059e-04,  7.4073e-05, -2.2717e-04,
        -1.4038e-05,  8.2200e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5064e-05,  2.3477e-05,  2.1617e-05,  1.2695e-04,  7.9569e-05,
         8.0799e-05, -5.2113e-05,  1.0505e-05, -5.8796e-05,  7.0708e-05,
        -9.2550e-05,  9.9928e-05, -4.7204e-06,  6.0936e-05, -1.2603e-01,
        -4.9822e-05, -1.4900e-04,  8.8355e-05,  1.5642e-04, -2.4338e-05,
        -7.8433e-05,  1.1539e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1574e-05, -1.1596e-04, -4.3308e-05, -9.2579e-05,  1.2396e-05,
        -7.5512e-05, -1.0005e-04, -9.4605e-05, -8.5900e-05,  1.1282e-04,
        -6.8634e-05, -4.8420e-05, -1.3511e-06, -1.0588e-04, -8.6927e-02,
        -6.1106e-05, -2.8393e-04, -2.1335e-03, -7.1285e-04, -2.9750e-05,
        -7.4991e-05,  1.2799e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2641e-04, -4.2450e-05, -1.2114e-05, -5.1649e-05, -3.8653e-05,
         1.5529e-05,  1.7828e-05,  2.0227e-05, -1.7068e-05, -1.3858e-05,
         1.8274e-05,  7.0491e-06,  5.1810e-05, -1.3492e-04, -2.7435e-02,
        -2.0572e-04, -4.5578e-05, -1.5415e-04, -2.0538e-04, -1.0959e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0795e-03, -1.0419e-04, -6.4305e-05,  7.2838e-05, -4.9890e-05,
        -7.2441e-06, -8.1377e-05,  2.1062e-05,  1.5478e-04, -4.8892e-05,
         4.3926e-05, -1.8032e-05, -1.0001e-02, -1.6155e-04, -9.7889e-02,
        -5.2886e-05,  9.7003e-05, -3.1738e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0257e-04,  2.6096e-05,  4.4973e-05, -6.2988e-06,  3.2932e-05,
         1.0345e-04, -5.2196e-05,  3.1624e-05, -1.5966e-05, -4.6309e-06,
         6.3352e-06, -7.4819e-05, -5.3316e-02, -8.5842e-05, -1.8588e-02,
        -3.5260e-05,  3.4766e-05, -1.8679e-04, -5.7049e-02, -1.6077e-04,
         1.2178e-04, -5.9572e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2687e-04,  4.7925e-05,  5.9187e-05, -2.5731e-05,  5.3851e-06,
        -6.4804e-06, -5.3284e-05, -2.1517e-05, -8.0267e-06,  6.0295e-05,
        -2.5812e-05, -8.9062e-06,  2.4159e-06, -8.5142e-02,  2.1708e-05,
        -1.0545e-04, -6.6611e-05, -3.7134e-06, -2.2701e-04, -3.7811e-02,
        -5.3706e-05,  6.9075e-05,  1.5166e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5112e-04, -9.7643e-05, -8.4281e-05,  3.4801e-05, -2.8113e-06,
         5.9371e-06, -4.0099e-05,  6.2916e-05,  2.8306e-06,  1.0228e-04,
        -1.0493e-04,  8.9241e-05, -4.3456e-05, -8.7505e-02, -1.1404e-04,
        -2.7018e-04, -6.4958e-05, -2.4775e-04, -3.9594e-02,  9.6995e-05,
         1.1204e-04, -8.5672e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 5.7811e-04,  2.2160e-06, -3.6317e-05, -3.9236e-06, -1.3983e-05,
         1.5038e-05,  2.5676e-06, -1.6960e-05,  1.7299e-06,  1.9791e-05,
         1.6593e-05,  1.6892e-05, -3.2995e-05,  5.5716e-06,  1.0510e-05,
        -2.6160e-05,  8.9948e-05, -7.2971e-07,  1.0364e-05, -5.8263e-06,
        -1.7124e-05, -2.1670e-05,  2.1082e-06,  2.6091e-05,  5.0825e-06,
         5.7281e-05,  2.9668e-05,  3.2226e-05,  2.4582e-05, -2.0192e-06,
        -4.7970e-05,  4.7710e-05,  1.1390e-04, -2.0545e-05,  4.7849e-02,
        -4.3961e-05,  1.5201e-04,  7.1143e-02,  2.1295e-05, -3.6414e-05,
         2.3645e-05,  8.5775e-06,  1.1570e-04,  9.9143e-05, -3.6110e-05,
        -1.2237e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7676e-04,  2.2100e-05,  2.3909e-05,  6.9120e-06,  2.2274e-05,
        -2.2819e-05, -1.1319e-06, -3.6437e-05,  3.9929e-05, -6.5754e-06,
        -1.0989e-05,  2.5432e-05,  3.0233e-05,  1.6415e-05,  2.8892e-05,
         2.9577e-05,  3.0376e-05,  4.0631e-07,  6.6634e-06, -1.2902e-05,
         2.5461e-05,  1.6118e-05,  5.2098e-05, -1.5792e-05,  1.3761e-05,
        -2.7738e-05,  2.1978e-05,  2.0786e-05, -7.1870e-06,  1.0919e-05,
        -1.7913e-05,  1.0837e-05, -5.7096e-05, -4.3006e-05, -1.1424e-02,
        -9.7166e-06, -5.0882e-04, -3.1142e-02, -6.3016e-05,  4.5648e-05,
         2.0034e-05, -5.6140e-05, -4.8261e-02, -6.6993e-05, -6.1011e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6623e-04,  9.6482e-06,  2.2562e-05,  1.4612e-05, -3.5193e-05,
        -6.1998e-05, -5.2629e-05, -1.3181e-05,  7.8242e-06,  3.8671e-05,
         1.3906e-05, -3.5465e-05,  3.1963e-05, -1.1836e-05, -2.8815e-05,
        -4.9556e-06, -8.3099e-06, -2.0603e-05, -2.0095e-05,  1.1636e-05,
         2.7862e-05, -1.9521e-05, -5.7171e-05, -1.2312e-05, -3.5376e-05,
        -5.5322e-05, -1.6211e-05, -1.6753e-05, -5.8047e-05, -4.0689e-05,
         2.0926e-05,  2.4227e-05, -9.7781e-05, -4.9732e-06, -2.5099e-05,
         7.6808e-05, -9.1029e-02,  2.4688e-04, -1.7862e-05, -3.4759e-02,
        -5.7963e-05,  7.4959e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9232e-04, -7.1065e-05,  9.2238e-06, -2.7003e-05, -5.6236e-05,
        -5.8628e-06, -3.0024e-05, -2.4203e-05,  2.9135e-05, -5.6624e-06,
        -3.8626e-05, -7.8922e-05,  1.2071e-05, -2.1676e-05, -8.8279e-05,
        -2.7758e-05, -9.4407e-06, -2.1871e-05,  2.5747e-05, -3.6274e-05,
         8.9624e-06,  1.2277e-05, -1.0895e-04,  1.0710e-05, -2.9514e-05,
        -1.2573e-04,  3.4639e-05, -1.6629e-05, -8.6112e-05,  1.0670e-05,
         6.6486e-07,  6.3970e-05, -3.6242e-05,  1.5516e-04, -1.2624e-05,
        -1.8014e-04, -4.0984e-02, -9.2422e-05, -1.6481e-05, -2.4731e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6427e-04,  3.0833e-06,  1.3338e-05, -3.0588e-05,  4.1264e-05,
         2.8837e-05,  1.3225e-05,  1.4379e-06, -7.4578e-06, -4.7825e-05,
         3.6227e-06, -6.1627e-06, -5.4712e-06,  3.5519e-06,  6.0566e-06,
        -2.2697e-05, -3.3506e-05, -1.7450e-06, -1.0820e-05, -5.3736e-06,
        -1.8330e-05, -3.3201e-05,  1.8026e-06,  2.4397e-05,  4.3524e-05,
         2.7837e-05, -5.6861e-05,  1.0083e-05,  6.0124e-06, -3.8910e-05,
        -6.4543e-06, -2.2082e-05, -3.1180e-05,  4.7616e-05,  6.0362e-05,
         6.0027e-05,  8.2819e-02,  1.3983e-04,  3.2829e-04,  3.4027e-02,
        -4.2490e-05,  3.2300e-05, -1.0288e-05,  1.7382e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1222e-04,  2.6381e-05,  2.3720e-05,  2.8372e-05, -1.6014e-05,
         8.6535e-06,  3.4216e-05,  7.4596e-06, -4.9175e-06,  5.8739e-05,
         2.2957e-05, -2.2251e-05,  4.0737e-05,  4.8849e-05,  1.9640e-05,
         6.7639e-05,  4.0225e-05,  1.2799e-05,  2.6781e-05,  3.0814e-05,
         8.1989e-05,  2.1447e-05, -3.3193e-05, -3.3253e-05, -9.0558e-06,
        -1.5324e-05,  1.9331e-05,  2.7308e-05,  1.9390e-05,  6.6741e-05,
         2.1664e-05,  3.8926e-05, -1.4589e-04,  4.2030e-05, -4.4097e-02,
         1.4563e-05, -8.9853e-02,  9.1293e-06, -1.4145e-04,  4.8904e-05,
        -8.8876e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1854e-04, -5.4968e-06,  3.0905e-05, -5.0784e-06, -1.0774e-06,
         2.4722e-06,  6.9580e-05, -2.1089e-06,  4.6144e-05,  1.7212e-05,
         1.5449e-06,  1.6983e-05,  9.9650e-06, -8.5024e-06,  3.6243e-05,
         7.2244e-05, -6.6395e-06, -4.1268e-06, -8.0065e-08,  2.2942e-05,
         8.4271e-06, -3.0681e-05, -9.3857e-07,  2.8128e-05,  3.9196e-05,
         2.4266e-05,  6.8327e-05, -3.3312e-05,  6.3319e-05,  5.6888e-05,
        -7.2082e-06,  9.3579e-06, -5.3508e-06,  9.5547e-05, -9.2734e-02,
        -1.3701e-05,  9.9286e-05, -8.5120e-05,  4.4886e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6322e-04, -7.6075e-05,  5.5545e-06, -6.3951e-05, -1.1077e-05,
         7.1881e-07,  2.8160e-05,  7.2052e-06, -1.5826e-05, -1.7238e-05,
        -7.3753e-05, -2.6484e-05, -1.4283e-04,  3.3227e-05, -2.4459e-05,
        -8.2596e-06, -2.5339e-05,  9.1652e-05, -9.4161e-05, -4.5463e-05,
         1.5451e-04, -1.6715e-04, -1.2291e-01,  1.6505e-04, -1.6175e-04,
        -2.5312e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5258e-05,  1.0729e-04,  4.9917e-06,  3.3226e-05,  9.5528e-05,
         2.1035e-05,  2.8073e-05,  2.0052e-04, -2.7687e-06,  2.4585e-05,
        -7.0452e-06,  5.8517e-05, -2.3745e-05,  2.9137e-05,  2.3840e-04,
         7.6407e-05, -1.2586e-05,  1.1052e-06, -2.1252e-05, -4.2390e-05,
         6.0541e-05,  1.9146e-04,  7.8260e-04,  3.2167e-04, -4.9833e-05,
        -7.7770e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0888e-05,  1.3650e-04,  1.7434e-05,  3.0184e-05,  1.4993e-04,
         6.5407e-05, -6.9165e-06,  1.3946e-04, -1.7451e-05, -1.6876e-05,
        -2.7744e-06,  6.5067e-05, -2.1860e-05,  1.3873e-05,  2.5184e-04,
         4.9988e-05, -1.1330e-05, -1.2523e-05, -2.7534e-05,  1.4990e-04,
         6.6108e-04,  1.5519e-04, -1.7774e-04, -4.2947e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3031e-04, -2.3432e-05,  7.9279e-05,  6.1399e-05,  5.5254e-06,
        -6.8167e-05, -1.6505e-06,  2.4934e-05, -6.3856e-05,  1.7439e-05,
        -4.7023e-05, -1.2665e-05,  9.2123e-05, -2.9173e-05, -1.9772e-05,
         5.0770e-05,  1.0538e-05,  5.0524e-04,  6.5245e-02, -5.8216e-05,
         2.9143e-02,  6.8542e-04,  1.0187e-03,  8.1301e-03,  1.0510e-04,
        -6.9264e-05, -2.3601e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8300e-04, -2.6014e-06, -4.7212e-05, -3.5803e-05,  3.5785e-05,
         4.9043e-05,  6.9653e-05, -3.1602e-06, -1.9595e-05,  4.7663e-06,
        -2.9475e-05,  2.1377e-05, -6.9135e-02, -3.4902e-05,  9.4909e-05,
         3.2761e-05, -7.1129e-03,  3.0922e-05,  1.8284e-04,  3.1220e-05,
         3.0527e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([-3.6122e-04, -9.4692e-05,  3.3980e-05,  5.2859e-06, -4.0689e-05,
        -3.8118e-05, -3.4230e-07, -4.7869e-05,  2.1101e-05, -8.3463e-05,
        -5.2369e-02,  9.8279e-05,  8.8964e-06, -3.2614e-06, -7.0225e-02,
         1.6540e-05, -6.5139e-03,  1.0188e-04,  1.4534e-04,  1.1408e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6259e-04, -1.4365e-05,  2.2833e-05,  4.1293e-05,  7.1125e-06,
         3.6451e-06,  2.5233e-05, -2.2864e-05,  1.5200e-06, -4.3165e-05,
         2.3566e-05, -2.2248e-05, -5.5638e-05,  2.3061e-05, -4.0491e-05,
         1.3914e-05,  1.7939e-05, -5.9626e-06, -5.5405e-05,  4.4809e-05,
        -3.7159e-05,  2.7037e-05, -2.1599e-05,  6.8332e-06, -3.1530e-05,
        -3.7601e-02, -4.0978e-05, -1.3394e-04,  4.4722e-05, -8.5781e-02,
        -9.8719e-05, -7.3497e-05,  1.0093e-05,  1.1429e-05,  4.5345e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8275e-04,  6.2060e-06, -1.0901e-05, -2.0668e-05,  4.4874e-06,
        -4.1428e-05,  2.4276e-05, -1.1593e-05,  3.3792e-05, -1.0297e-04,
        -5.9560e-05, -2.2741e-05, -4.9509e-05, -7.4468e-05, -3.7197e-05,
        -5.4115e-05, -1.6538e-05,  2.4488e-05, -1.2884e-05,  5.7610e-05,
        -2.6497e-05,  1.7326e-05, -1.1978e-05, -2.5691e-05, -4.3075e-05,
        -4.2712e-02,  1.4822e-04, -8.0042e-02, -1.3153e-04,  5.6938e-05,
        -5.6088e-05, -6.0883e-05, -1.4227e-04,  7.1172e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9707e-04,  1.1201e-05,  3.4140e-05,  3.7812e-05,  2.7066e-05,
        -2.5856e-05, -5.8784e-06,  3.5091e-06, -5.0876e-05, -5.1381e-05,
         1.0434e-05,  1.4063e-05, -5.9480e-05, -2.2602e-07, -1.9058e-05,
        -4.7197e-05,  2.0251e-05,  5.8435e-05,  1.4135e-05, -2.1673e-05,
        -3.6979e-05,  3.4274e-05, -6.1964e-05,  3.1020e-05,  1.1713e-05,
        -2.6574e-02,  4.0818e-05, -5.5654e-02,  3.3868e-05, -5.3506e-02,
         3.4244e-05,  4.7790e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4120e-04, -8.5464e-06, -1.9952e-06,  5.2343e-05,  2.8829e-05,
         2.8393e-05,  2.3098e-05,  4.0603e-05,  1.7202e-05,  1.2411e-04,
        -4.3704e-06, -1.0529e-05, -1.7119e-05, -1.8724e-06,  2.8844e-05,
         6.0877e-06,  4.5911e-05,  2.7844e-06, -2.2998e-05, -1.5458e-05,
         1.1425e-05,  6.1003e-05, -4.4227e-06, -1.5778e-05,  1.4767e-05,
         2.2419e-02, -2.7913e-05,  1.8652e-04,  5.5707e-02, -4.4661e-05,
        -8.3905e-05, -4.8356e-05,  2.1214e-04,  4.0636e-02, -6.4857e-05,
        -4.3165e-05,  5.6776e-02,  3.1804e-05, -1.3449e-05, -1.8143e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9693e-04, -5.8514e-06,  1.8528e-06,  2.3265e-05,  2.6469e-05,
        -4.9681e-05, -1.0593e-05, -3.3605e-05, -2.3663e-05,  3.7483e-07,
        -6.3265e-05,  1.4771e-05,  9.5132e-06,  1.8467e-05, -1.7156e-05,
         4.6720e-05, -2.1940e-05,  1.1696e-07, -1.6860e-06,  1.0639e-05,
         2.4248e-05,  2.5773e-06,  5.9611e-05, -1.8142e-05, -8.2766e-06,
        -5.1905e-02, -1.7403e-05, -2.3408e-02, -5.0588e-05, -7.4271e-03,
        -7.8183e-03, -3.7014e-05, -7.2397e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1889e-04,  8.7033e-05, -9.1391e-07, -4.4131e-05,  2.0970e-05,
         2.5029e-06, -6.3620e-05,  4.5748e-05,  2.7531e-05,  5.0937e-06,
        -2.9594e-05,  6.5005e-05,  2.1414e-06, -1.1638e-05, -4.7146e-05,
        -5.0774e-06, -3.0367e-06, -6.0034e-05,  7.0027e-05, -3.4478e-05,
         3.3746e-05, -1.0664e-05, -1.1244e-04, -2.7873e-05,  6.2033e-06,
         3.5961e-03,  3.8396e-05,  1.3072e-01, -2.5831e-04,  1.6847e-04,
         1.6313e-04,  1.1526e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2286e-04,  4.4783e-05, -1.5892e-05, -2.7391e-05, -2.7089e-05,
         3.2283e-05,  1.2267e-05,  4.2330e-05,  1.1636e-05,  7.7493e-05,
         9.0395e-05,  1.0749e-05, -7.6488e-07, -1.2962e-05,  7.7001e-05,
         5.9313e-05, -3.8823e-06, -1.4072e-05,  5.2438e-05, -1.7518e-06,
         1.9552e-05, -1.3517e-06,  4.0651e-05, -3.2008e-05, -3.7253e-06,
         7.5818e-02, -5.3619e-05,  2.8001e-02,  1.9469e-04,  9.6501e-05,
         7.9861e-02,  2.1273e-04, -5.9631e-05,  2.5017e-05,  6.6372e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3762e-05, -3.8837e-05,  8.6270e-06, -1.1428e-05,  3.0901e-05,
        -5.3941e-06, -5.1209e-05, -1.2003e-05, -5.7598e-05, -1.2903e-04,
        -8.8204e-05, -5.9940e-05, -8.5521e-06, -3.0786e-06, -4.4280e-06,
        -7.3457e-05, -2.3045e-05, -7.9904e-05, -4.8211e-06, -2.0578e-05,
        -3.4770e-05, -8.0637e-05, -3.7650e-05,  1.0650e-05,  2.3026e-05,
        -8.3408e-02,  2.9898e-05, -2.2668e-04, -1.1876e-05, -5.9907e-02,
        -1.4903e-04,  3.3519e-05, -6.7352e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7590e-04,  5.6486e-05,  3.4872e-05,  4.4929e-05,  8.5362e-07,
        -7.6938e-05,  2.4028e-05, -6.7616e-06, -1.0816e-04,  8.7761e-05,
         1.1491e-04,  7.0390e-06, -4.4327e-05, -8.3729e-06,  9.7854e-05,
         1.3825e-04,  1.1271e-04, -2.2708e-05,  6.0511e-05, -4.5395e-06,
        -3.1781e-07, -6.6931e-05, -3.2360e-05, -2.5442e-05,  1.7350e-05,
         1.5469e-01, -8.0699e-06, -1.1560e-05, -8.6339e-05,  1.7336e-04,
         1.4832e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7361e-04, -2.5287e-05, -3.5485e-06, -6.8869e-07, -3.1163e-05,
         1.8163e-06,  1.8870e-05,  2.1302e-05,  3.2179e-05,  2.8211e-05,
         3.8790e-05,  1.7169e-05, -8.9120e-06,  7.8808e-06,  3.2267e-06,
        -6.0677e-06, -1.9299e-05, -2.6186e-05, -1.9022e-05,  3.2804e-06,
        -2.4664e-06,  9.6614e-07, -2.1240e-05, -2.9874e-05, -1.0617e-05,
         4.6786e-02,  5.6819e-05,  1.8046e-04,  1.4706e-05, -1.7354e-05,
         2.3081e-05,  9.3832e-05,  5.1087e-04,  1.2021e-02,  5.2197e-06,
        -5.1236e-05, -2.7816e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0014e-04, -6.0151e-05,  1.9440e-05, -6.1200e-05, -7.0656e-05,
        -3.3849e-05, -2.7814e-05, -2.3723e-05, -2.7362e-05, -1.1219e-04,
        -6.2428e-05,  9.8684e-06, -7.8382e-05, -3.5007e-05, -6.4513e-05,
        -7.8972e-05,  9.7321e-07, -2.8258e-05, -8.0912e-05,  5.7415e-06,
        -2.0475e-05,  3.7316e-05, -5.9490e-05, -2.2868e-05, -2.1500e-05,
        -1.0916e-01,  1.7491e-05, -6.0135e-04,  5.2950e-07, -5.5011e-05,
        -2.1732e-04, -6.8989e-05, -2.8302e-02, -1.8166e-04,  6.4850e-05,
        -3.9434e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.5313e-04, -4.8157e-05,  2.3363e-05,  7.6782e-05,  2.9488e-06,
         6.9348e-05, -3.9302e-05, -2.4145e-05, -6.7367e-05,  6.4332e-05,
         6.6358e-05,  7.0013e-05, -6.3952e-05,  7.1925e-05, -1.2982e-04,
        -4.8830e-05,  1.9829e-05, -2.1289e-04,  1.1455e-01,  1.6899e-04,
         1.1167e-05,  6.3514e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4962e-04,  4.3055e-05, -1.6304e-05,  4.7953e-05,  3.2791e-05,
         2.0293e-05, -6.3874e-05, -8.8693e-06,  1.0275e-05,  5.4474e-05,
         6.0951e-06, -4.9590e-05, -4.5584e-05,  8.4630e-05, -4.0630e-05,
        -9.1697e-05, -4.5607e-05,  1.0923e-04, -4.0560e-05, -7.3618e-06,
        -6.2352e-02,  3.2263e-05, -6.5264e-05, -8.6899e-02, -4.8748e-05,
         7.6245e-05,  6.5825e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8693e-04, -6.8424e-05, -1.7737e-06, -3.4228e-05,  1.0493e-05,
        -9.6250e-05, -6.2738e-05, -1.3248e-05, -2.8503e-05, -5.5816e-05,
        -5.0057e-05,  2.1772e-05,  2.6900e-06, -2.4837e-02, -5.1239e-05,
        -4.9903e-05,  1.3221e-04, -2.6207e-02, -1.0866e-04, -4.4814e-05,
        -2.2605e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2769e-05, -6.7291e-05, -1.0450e-04,  8.0917e-05,  5.8494e-05,
        -3.9551e-05,  3.9935e-05,  3.4030e-05,  2.3475e-05, -4.4565e-05,
        -9.3633e-05,  9.5338e-06, -7.4159e-05, -6.0403e-02, -4.6861e-06,
         3.2656e-05,  1.9399e-04, -6.7468e-02, -3.6096e-05, -1.5591e-04,
        -2.5055e-06, -1.3534e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3413e-04,  1.0276e-04, -2.4913e-05, -3.4103e-05,  9.3263e-05,
        -9.6770e-05,  6.9269e-06,  1.9937e-05, -4.6010e-05,  7.6886e-05,
        -1.4087e-05,  1.4177e-05,  3.4381e-06,  4.4043e-02, -4.0014e-05,
        -4.6980e-05,  4.3735e-05,  1.0841e-01,  5.0675e-04, -8.7756e-05,
         6.5014e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9389e-04,  9.6997e-05, -4.5912e-05, -4.6823e-05, -4.2040e-05,
         1.4423e-06, -1.0303e-04, -7.4180e-05,  6.2743e-06,  1.7297e-05,
         7.1121e-06, -1.0665e-04,  8.8702e-05,  3.5130e-05,  2.7299e-05,
         1.2222e-01,  5.2954e-05, -3.1321e-05,  6.0284e-05, -9.7535e-05,
         5.4543e-04,  3.8002e-05, -9.1634e-06, -1.0641e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3679e-04,  1.7642e-06, -1.6233e-05,  1.4748e-05,  2.4226e-05,
        -1.4284e-05, -4.3319e-06, -9.7367e-06,  1.5463e-05,  4.0541e-05,
        -5.7361e-05,  1.9104e-05, -6.0197e-05, -1.0346e-06,  2.4642e-05,
        -4.3340e-02,  3.3056e-05,  3.0159e-05, -1.9656e-06, -4.1905e-04,
        -4.7309e-02, -2.2826e-05,  1.0119e-05,  7.9409e-05, -7.9808e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7343e-05, -1.3924e-05,  7.8064e-06, -1.4084e-05,  5.2798e-05,
        -5.4535e-05, -2.3729e-05, -6.7983e-06,  5.5991e-05,  8.8739e-06,
        -1.5200e-05, -7.4689e-05,  2.6382e-05,  2.4050e-05,  3.2973e-05,
         6.4682e-02, -4.4925e-05,  5.9316e-05,  3.8528e-04, -9.8005e-06,
         1.4363e-03, -1.9350e-05,  1.6958e-05, -6.6732e-05, -6.3134e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4039e-04, -9.6794e-06,  9.9704e-06, -9.3359e-05,  4.6193e-07,
         5.4273e-05, -2.1953e-05,  4.7216e-05,  8.9216e-06,  2.3522e-05,
         3.7807e-05,  5.8183e-05,  6.3045e-05, -5.1741e-05,  1.2422e-05,
         6.0312e-02,  1.1029e-05,  6.7388e-07, -5.5284e-05, -6.8787e-05,
        -8.7985e-05,  7.2905e-04,  6.6101e-05,  5.0139e-04,  9.1222e-05,
         4.5109e-04, -1.7613e-05,  4.6369e-06,  8.0096e-02, -2.4969e-06,
         1.2503e-05, -8.4483e-05, -7.3413e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5687e-04, -3.6981e-05, -1.0338e-05,  1.6062e-05, -4.1524e-06,
        -7.1947e-06,  1.7892e-05,  2.6894e-05,  5.5262e-05, -1.2544e-05,
         1.0330e-05,  4.1486e-05, -2.7559e-06,  5.6374e-05, -1.5087e-05,
        -5.8117e-02, -1.7698e-04, -1.0050e-04, -3.1745e-04, -3.5465e-05,
         1.3307e-04, -5.8021e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5438e-04, -1.2025e-05, -2.2315e-04,  1.5883e-04, -1.2874e-04,
        -1.0952e-04, -1.0079e-04, -7.8892e-05, -7.4116e-05, -2.0418e-05,
        -5.1022e-05, -6.8757e-05,  4.8830e-06,  9.3181e-06, -5.1855e-06,
        -1.3680e-01,  2.8715e-05, -4.4099e-04, -4.3358e-04,  9.9767e-05,
        -2.2659e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8343e-04, -2.2633e-05, -9.7363e-06, -7.8901e-06, -3.7842e-05,
         1.3028e-05,  1.5759e-05,  6.2091e-06,  4.2225e-05,  5.4791e-05,
        -3.4276e-05, -2.6161e-05, -3.5424e-05, -7.7190e-06,  1.0490e-05,
        -3.0324e-05, -2.2823e-02, -4.4425e-05, -4.2334e-02, -1.1273e-04,
        -3.2339e-04, -2.6569e-02,  8.6450e-05,  4.3669e-05, -1.2219e-02,
        -1.2842e-04,  5.4911e-05,  4.3796e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-6.5132e-04,  1.8135e-06, -8.6203e-05, -1.3949e-05, -4.7035e-05,
        -8.1439e-06, -2.3544e-05,  1.5042e-05, -7.8630e-06, -1.5125e-05,
         1.9413e-06, -1.0649e-04, -6.6701e-07,  1.3117e-05, -8.6143e-05,
        -6.1481e-05,  8.0486e-06,  4.6902e-02,  2.3236e-04,  7.5248e-02,
         1.0858e-04,  7.3900e-04,  4.3913e-05, -7.7847e-05,  3.4897e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4587e-04, -1.1427e-05, -1.1103e-05,  7.3984e-06,  3.6343e-05,
        -7.5277e-06,  4.3668e-05,  1.9882e-06, -1.6874e-05, -1.5758e-05,
        -1.6257e-05,  3.8315e-05,  3.0001e-05, -8.7531e-06,  1.2013e-05,
        -1.1114e-06, -2.5155e-05, -8.7882e-02,  5.4089e-05, -2.7268e-02,
        -1.4791e-05,  2.6246e-05, -4.7910e-04, -6.2097e-05, -3.7856e-04,
        -1.1605e-02, -2.5418e-05, -7.0924e-05,  1.5259e-05,  6.3626e-05,
         1.3827e-05,  4.2849e-05, -1.8694e-02,  2.7476e-05, -5.3854e-04,
        -1.4637e-04,  2.8874e-05,  4.6861e-05, -4.1133e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5843e-05, -1.3135e-05, -2.7712e-05, -4.7730e-05, -7.9833e-05,
        -4.0238e-05, -1.4719e-05,  3.1015e-05,  8.1345e-05, -2.7248e-05,
        -1.0095e-05, -3.8960e-05, -3.3366e-05, -1.2709e-05,  1.7070e-05,
        -5.1225e-05, -1.1759e-02,  3.9637e-05, -3.7270e-04, -2.9799e-02,
         1.7430e-05,  5.7796e-05, -6.1263e-02,  7.3919e-05, -6.1703e-05,
        -8.8108e-05,  1.4006e-05, -2.1744e-02,  6.0110e-05, -9.1020e-06,
         3.8774e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3159e-04,  1.3752e-05,  1.4117e-05,  1.7556e-05, -5.2606e-05,
        -1.5604e-05,  2.0421e-05,  3.7560e-05,  5.5262e-05,  6.4570e-05,
        -2.4658e-05, -5.6125e-05, -2.0593e-05,  4.3242e-05,  3.8950e-05,
        -9.3683e-02,  2.4060e-06, -9.2262e-03, -1.8715e-04,  2.7024e-05,
        -9.8068e-05, -1.3617e-04, -4.0309e-02,  8.2964e-05, -1.6420e-03,
        -8.1530e-05,  8.8034e-06, -5.3013e-05,  7.9936e-05,  1.3643e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3500e-04, -2.5692e-05,  2.4804e-05,  5.2235e-05, -6.6217e-05,
        -7.3740e-05, -1.2547e-05, -3.1758e-05, -1.7328e-05, -3.9642e-05,
        -7.9968e-06, -3.2749e-05, -9.6113e-06, -2.2239e-07,  4.2447e-05,
        -7.9570e-02,  5.5057e-05,  1.1537e-04,  6.4184e-05, -4.0863e-04,
        -4.2500e-02, -8.9804e-05,  7.1355e-05,  7.2660e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4545e-05, -5.5371e-05, -1.0663e-04,  1.1103e-05,  7.9667e-05,
        -2.3918e-05, -4.5188e-05, -2.5567e-06,  6.1388e-05,  3.2657e-05,
        -8.0502e-06, -2.8260e-06, -5.4903e-05,  1.5280e-04,  2.8202e-05,
        -1.2449e-05, -1.1997e-01, -1.0006e-04, -2.9305e-02, -5.9372e-05,
        -2.5753e-04, -2.0782e-05,  1.2160e-04,  3.5821e-05, -5.4137e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1523e-04, -7.2019e-05,  2.6873e-05,  5.3618e-05, -1.4447e-05,
        -5.9827e-05, -1.3033e-05, -7.6904e-05,  1.8680e-05, -5.2394e-05,
         4.0692e-05, -6.4884e-05, -2.0935e-05, -5.2429e-05,  1.1880e-05,
        -4.3452e-06, -1.4259e-02,  8.1951e-06, -6.8431e-02,  1.7892e-05,
         1.0480e-04,  4.6936e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3072e-05, -3.9829e-05,  9.4338e-06,  4.8142e-05,  3.4098e-05,
        -1.9611e-05,  4.4171e-06, -3.9792e-05, -4.7641e-06,  3.9496e-05,
        -4.1411e-05,  1.4882e-05, -2.2277e-05,  5.6325e-05, -3.8024e-06,
         2.6363e-05, -2.9357e-02,  1.6895e-05, -2.2719e-02, -3.0702e-05,
        -4.2494e-03, -4.7036e-02, -2.1955e-05,  2.9812e-05, -3.3021e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2724e-04, -9.1779e-05, -5.1045e-05, -1.1473e-04,  4.1928e-05,
        -7.9552e-05,  5.3181e-05, -1.1744e-05, -7.8234e-05, -1.5824e-04,
        -1.6388e-04,  8.1873e-05,  1.2122e-05, -6.1012e-05,  1.5476e-05,
        -3.8360e-05,  1.1784e-01, -2.6329e-04,  9.0524e-02, -1.8674e-04,
         1.1984e-04, -1.0180e-04,  1.8294e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9940e-04,  4.4768e-05,  4.4302e-05, -2.8810e-05, -8.3562e-06,
         1.0617e-05,  1.9213e-05,  2.1552e-05,  2.4950e-06, -1.6294e-05,
         1.0422e-05, -6.4346e-05,  6.0706e-05,  3.0730e-05, -1.8471e-05,
         5.2206e-05, -6.8353e-02,  7.5430e-05, -3.4075e-02, -1.0061e-04,
        -1.0434e-02,  1.5189e-05, -1.4985e-04, -4.6826e-03, -6.3911e-05,
         4.0206e-05,  5.5193e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2788e-04, -4.8840e-05, -1.0845e-04,  2.3471e-06, -1.5656e-05,
        -6.0506e-05,  2.2486e-05, -4.6962e-05, -5.5624e-05, -2.7828e-05,
        -6.9836e-05, -1.3250e-05, -1.8162e-05, -2.1362e-05, -1.8020e-05,
         3.2498e-05,  1.2189e-03, -6.2667e-05,  1.0360e-01,  2.9422e-06,
         3.1163e-02,  3.9304e-05,  3.3336e-06, -1.5955e-04, -9.0891e-05,
         9.1626e-05,  2.4379e-05, -1.1038e-05, -5.2014e-06,  2.0500e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8779e-04, -1.0057e-04, -6.1779e-05, -4.6255e-05, -1.6022e-04,
         8.2928e-05, -2.0108e-05, -3.7242e-05, -2.0635e-04, -9.9349e-05,
        -4.0749e-05,  1.6349e-04, -1.9287e-04,  6.9056e-05, -1.7413e-05,
        -1.3933e-04, -7.0360e-05,  9.2664e-05,  1.6965e-01,  2.9934e-05,
        -1.8498e-05, -1.0355e-04,  1.0703e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #600: [tensor([-5.4949e-04, -7.5486e-05, -3.0965e-05, -2.2711e-05, -8.2293e-06,
        -2.4208e-05,  1.6319e-07, -1.3152e-05, -5.7449e-05, -3.6272e-05,
         6.0769e-06, -4.9595e-05, -2.0393e-05,  2.2635e-06, -3.9966e-02,
        -6.1039e-05, -3.0355e-05,  4.3420e-05, -4.2418e-02, -3.9057e-02,
        -1.5027e-04,  1.1127e-04,  6.1470e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2744e-04, -2.6998e-05, -2.0503e-04,  6.0055e-05, -1.0886e-04,
        -9.9027e-05, -5.9190e-05,  1.6051e-04,  1.5006e-04, -7.2231e-05,
         1.6056e-01,  1.8123e-04,  8.3926e-05,  2.4846e-05,  5.1077e-06,
         9.3221e-05,  1.4941e-04, -2.6035e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2638e-04,  1.1609e-05,  6.8555e-06, -7.9660e-05, -2.3573e-04,
        -1.9491e-05,  3.3906e-06, -2.2923e-05,  6.5728e-05, -7.2606e-05,
         1.7003e-01, -3.6682e-05,  3.3217e-05,  4.1674e-04, -6.6212e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9877e-05,  7.1242e-05,  1.0013e-04, -3.3473e-05,  1.1872e-05,
         4.4772e-05, -8.9626e-05, -1.0742e-04, -5.3840e-05,  5.9481e-05,
        -2.9628e-02,  4.6571e-05, -2.4810e-04,  2.0065e-04,  1.8620e-04,
        -1.4738e-01, -9.2232e-05, -2.4037e-04,  3.0155e-04,  6.2603e-05,
         1.0366e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3006e-04, -4.5410e-05,  2.7275e-05,  4.4175e-05, -3.5131e-05,
         1.4160e-05, -1.3208e-05,  1.4102e-05, -1.7560e-05,  1.8772e-06,
        -3.8574e-05, -4.7611e-02,  9.8385e-05,  4.6820e-05, -2.2189e-03,
        -6.4169e-02,  9.6956e-05, -1.1468e-04, -2.4750e-06,  5.0848e-05,
        -4.6593e-05, -8.3335e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0096e-04, -5.9727e-05, -8.1421e-05, -9.9215e-05,  3.5890e-05,
        -2.5542e-05, -6.3805e-05,  1.0076e-04, -3.5684e-05, -1.2125e-05,
         2.5440e-05, -6.7113e-02,  1.2379e-04,  1.6589e-04, -6.8866e-03,
        -7.9113e-02,  7.8318e-06, -6.6739e-05,  8.1091e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3577e-04, -4.6312e-05, -5.0886e-06,  6.0869e-05, -8.6942e-05,
        -1.0323e-04, -3.8946e-05, -1.6629e-04,  2.9599e-05,  9.8289e-05,
        -8.4704e-05, -1.2387e-01,  1.9917e-04, -4.0167e-03,  2.9603e-04,
        -9.8742e-05,  5.2516e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8503e-04,  8.2282e-06, -8.3220e-05, -1.0710e-04,  3.6228e-05,
        -7.7906e-05, -5.4625e-05,  2.5307e-05,  8.1962e-05,  4.2149e-05,
        -3.7831e-05,  6.9920e-07,  2.4937e-05, -3.3693e-05,  2.2657e-05,
        -1.9377e-05,  4.3131e-05,  1.3985e-05,  1.3674e-01, -2.6878e-05,
         3.0930e-02, -7.6878e-05,  7.2140e-04,  2.7118e-05, -1.3521e-04,
        -6.2488e-05,  3.7605e-06, -7.0675e-05, -2.6807e-05,  3.6526e-05,
         6.2533e-05,  1.9354e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7830e-05,  4.3500e-05, -1.2115e-04, -9.3753e-06,  3.1116e-05,
         3.3504e-05,  4.8426e-06, -6.8299e-06, -3.4226e-05, -7.1777e-05,
         1.3877e-04,  4.0567e-05,  8.2336e-05,  4.0058e-05,  7.4262e-07,
        -1.7052e-04, -9.2420e-06,  4.2804e-06,  1.0796e-01, -1.0480e-04,
        -1.7040e-04, -1.8464e-05, -2.0589e-04, -1.2979e-04,  1.8352e-03,
         1.2134e-01, -8.2738e-05,  4.8584e-06, -1.8306e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2036e-04, -7.2644e-06,  5.1371e-07,  2.9032e-05, -5.3276e-06,
         2.8305e-05, -2.3159e-05, -6.1651e-06, -7.6666e-05,  9.8321e-05,
        -1.5967e-05, -1.4348e-05,  1.4585e-05,  7.5629e-06, -5.8756e-05,
        -2.8361e-06,  1.7963e-05,  1.8883e-05, -1.2973e-04, -1.0617e-01,
        -2.1795e-02,  5.0340e-05, -8.9540e-05,  1.1381e-06, -1.7433e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0710e-04, -5.6592e-05, -1.2345e-04, -2.9274e-05,  9.2776e-05,
         5.2163e-05, -5.3498e-05, -1.7387e-05,  5.5557e-05,  3.2311e-05,
        -7.4921e-05,  4.8222e-05,  1.3849e-05,  7.2702e-05, -3.3490e-05,
         9.5815e-03, -1.6476e-04,  1.1223e-01, -5.4949e-05, -4.2908e-05,
         2.3295e-05, -4.9285e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6287e-05,  7.1994e-06,  3.5228e-05,  7.5369e-05,  2.5845e-05,
        -4.3515e-05,  2.4892e-05,  2.8406e-05, -7.6158e-05, -7.5565e-05,
        -8.7583e-06,  8.6986e-06, -1.0476e-04, -1.9550e-05, -2.3286e-05,
        -6.9753e-02,  1.8965e-05, -8.2042e-02,  4.1742e-06,  1.9352e-05,
        -1.1613e-04,  8.2587e-05,  4.0539e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #650: [tensor([ 3.7473e-04, -8.1693e-05,  5.5689e-05, -4.5187e-05, -4.3243e-05,
        -1.5986e-04,  3.0484e-05, -8.1233e-05,  2.5119e-05, -4.6700e-05,
        -2.6229e-05, -2.5782e-04,  4.1862e-05, -1.0169e-04,  4.5233e-05,
         3.8904e-05, -1.4279e-04, -9.7426e-06, -3.7735e-05, -1.5060e-04,
         1.1564e-04, -2.9475e-05,  1.0221e-04,  3.0252e-05, -1.5456e-04,
         2.7644e-01, -1.9948e-05,  6.4563e-05, -2.5949e-04, -1.5666e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3325e-04,  3.4204e-05, -6.9206e-05, -1.7615e-05,  8.0907e-05,
         3.7251e-05,  9.2015e-05, -4.2805e-06,  1.1700e-04,  1.9036e-05,
        -2.4097e-05, -1.0499e-04,  1.3646e-04, -1.3666e-05,  1.6009e-05,
        -3.5051e-05,  4.4743e-05,  5.0408e-05,  1.2139e-04,  2.4516e-05,
        -1.1829e-04, -4.6923e-05, -1.8424e-05,  1.1300e-05, -5.0343e-05,
         2.4494e-01, -1.0547e-04, -1.5044e-04, -8.5208e-05, -2.2297e-04,
         6.8403e-05,  4.9578e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3701e-05,  3.4297e-05, -1.2910e-07,  9.6935e-05,  9.3136e-05,
         4.0428e-06, -2.3830e-05,  1.4223e-04,  1.2238e-05,  1.6990e-04,
         4.1816e-05, -3.1117e-05,  1.5586e-04, -2.8250e-05,  2.0548e-04,
         1.2361e-05,  4.2263e-06,  5.1910e-05,  2.5670e-05,  1.5504e-05,
         1.7464e-04, -1.2427e-05, -3.2789e-05, -1.3366e-05,  1.8222e-05,
        -3.8916e-04, -1.7437e-04,  1.0539e-04, -1.1814e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4453e-05,  2.6103e-06, -1.6774e-05, -1.9399e-06,  3.5776e-07,
        -4.9718e-05, -1.6592e-06, -1.4505e-05, -2.9052e-06, -2.7462e-05,
        -2.4106e-05, -3.8464e-06,  9.2287e-07,  8.4269e-06,  4.8982e-05,
         4.3716e-05, -3.7041e-05, -4.1688e-05, -2.6008e-05, -2.5509e-05,
         2.8301e-05,  1.4615e-05, -1.4315e-05, -3.5751e-05,  1.7883e-05,
        -7.7564e-02, -2.7760e-05, -5.0267e-02, -2.8984e-06,  4.7548e-06,
        -4.0070e-05, -2.6648e-05, -1.7131e-02,  5.1093e-05, -6.7467e-05,
         2.5240e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2028e-04,  3.0904e-05,  3.8264e-05,  2.2617e-05,  3.0458e-06,
         1.3974e-05, -5.9784e-06,  2.1867e-06,  3.0602e-06,  1.0256e-06,
         7.7857e-06,  1.8098e-05,  1.6288e-06,  4.5755e-05, -9.7787e-07,
         5.4716e-06, -1.1655e-05, -2.7342e-05, -4.6059e-06,  5.4828e-06,
         2.4696e-05,  2.8736e-05, -3.5259e-05,  2.2348e-06, -1.2517e-05,
        -3.8508e-02,  1.9208e-05, -8.8715e-05, -9.2874e-03,  8.5090e-07,
        -2.2290e-02, -5.0237e-03, -2.2504e-05,  4.9250e-05, -3.0298e-02,
        -3.1084e-05, -2.7820e-02,  4.5001e-05,  1.0099e-04, -5.3546e-05,
        -2.7961e-05, -1.8924e-05, -1.9027e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3121e-04, -4.2734e-05,  3.7383e-05,  6.9521e-05,  3.9051e-05,
        -7.9422e-05, -3.8298e-05, -3.5099e-05, -1.7068e-05,  2.2412e-06,
        -2.0593e-05, -5.5833e-02,  1.0852e-05, -2.5509e-02, -1.1828e-04,
         1.1533e-04, -1.8962e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4455e-04, -5.5080e-07,  9.0101e-05,  1.1996e-04,  4.4506e-05,
         6.3767e-05,  5.9658e-05,  6.2894e-06, -6.0833e-06,  9.7534e-06,
         7.5016e-06, -1.5748e-02, -1.5722e-05, -1.2744e-01,  1.8418e-05,
        -3.4780e-05, -5.5184e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3540e-05,  1.8332e-05,  6.8168e-05, -1.9516e-05,  2.2460e-05,
         1.3847e-04,  3.3133e-05, -1.0451e-04, -3.6480e-05, -6.2974e-05,
        -5.7869e-05, -2.2335e-02,  1.3237e-04, -1.2009e-01, -5.2341e-05,
        -1.1275e-03,  4.4999e-05,  1.5331e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2751e-06, -1.0857e-04,  4.3313e-05, -2.5806e-05,  7.8112e-05,
        -2.2126e-04, -4.8454e-05, -1.6688e-04,  2.0853e-05,  1.2455e-01,
         1.2918e-04,  2.3652e-05,  1.4852e-04, -1.4148e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3831e-05, -6.3270e-06,  2.5151e-05,  4.1282e-05, -1.5187e-04,
         5.9501e-05,  6.5124e-05, -1.9426e-07, -1.5601e-01,  1.6693e-04,
        -3.8397e-03,  5.5788e-06, -1.8143e-03, -1.2096e-02,  2.0176e-05,
        -8.5555e-05, -1.2267e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1181e-04, -2.1400e-05,  1.0138e-04,  1.6457e-05,  1.3943e-05,
        -2.6528e-06,  2.6461e-05, -1.2472e-05,  1.7590e-04,  9.0241e-02,
        -5.5008e-05,  9.2954e-04,  2.7337e-03,  1.2590e-04, -5.3632e-05,
         3.7576e-05, -6.0460e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3133e-05,  4.1687e-05, -3.1149e-05, -5.9610e-05,  2.6460e-05,
         2.6435e-06, -2.1629e-05, -2.6758e-05, -1.2546e-04, -3.3046e-02,
        -1.6018e-04, -2.5735e-02, -1.8349e-04, -5.3790e-07, -6.7576e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #700: [tensor([ 2.8114e-04,  1.3067e-05,  1.1750e-05,  2.0061e-05, -1.7161e-05,
        -1.5891e-05,  4.3117e-05, -1.2775e-05, -4.8230e-05,  1.8785e-05,
         7.7703e-05,  1.4428e-05, -2.6606e-06, -1.0325e-01,  1.8571e-05,
        -4.5277e-03, -2.6118e-05, -5.4368e-02,  1.9120e-04,  3.5649e-05,
        -6.5616e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4289e-04,  2.0460e-05,  1.4272e-04, -3.7716e-05, -3.7045e-05,
         1.1585e-04,  3.3057e-05,  3.3978e-05, -7.6034e-05, -6.1498e-05,
         6.8132e-05,  7.5477e-05,  8.1353e-02,  3.4813e-05,  4.5173e-03,
         6.6304e-05,  6.5338e-04,  2.2728e-04,  3.3364e-04,  1.6722e-05,
        -5.2577e-05,  1.1231e-01, -2.3367e-04, -4.1794e-05,  8.8517e-05,
         1.1266e-06,  5.8306e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1110e-04,  1.8650e-05, -1.3257e-05,  1.2393e-05, -1.9303e-05,
        -4.9564e-05, -8.0868e-06, -3.3243e-05,  1.5440e-05, -7.2816e-05,
        -2.5099e-05,  8.5144e-06, -4.5823e-02, -9.0483e-06, -4.8651e-02,
        -1.8375e-05,  3.0824e-05,  8.4213e-06,  4.4710e-05,  2.0803e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6599e-04, -2.4106e-05,  6.3837e-05, -4.4720e-05, -1.9366e-04,
        -4.6653e-05,  1.7943e-04, -1.2374e-04,  2.2147e-04, -9.7569e-05,
        -1.9559e-04,  4.2829e-05, -2.2627e-01,  2.4786e-04,  1.2046e-04,
        -1.9262e-03, -7.2582e-05, -4.1668e-06,  1.7734e-04,  1.3494e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3589e-04, -5.3391e-06,  1.3222e-06, -5.1400e-05, -1.9266e-05,
        -3.8257e-05, -2.5842e-05,  4.7768e-05, -7.3284e-05,  2.9739e-05,
         4.9110e-05,  5.0255e-05,  3.8138e-05,  4.6182e-05,  4.3144e-05,
         2.6115e-05, -6.3169e-05,  7.4469e-06, -1.6760e-01, -5.1609e-07,
        -1.0472e-02, -1.4603e-05,  2.7739e-04, -1.5176e-05, -1.0045e-04,
         1.7420e-05,  3.8475e-05,  4.3935e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9593e-04,  1.9960e-05, -3.6510e-05, -4.7162e-05,  3.8151e-05,
        -4.1103e-05, -1.7670e-05,  2.1629e-05, -1.0174e-05,  4.5702e-06,
        -6.6059e-07,  3.9516e-05, -4.5497e-05, -6.9733e-05,  4.5410e-06,
         2.6840e-05,  5.4061e-05, -9.1553e-06,  1.2392e-01, -1.6508e-05,
         9.7864e-03,  8.5260e-05,  3.2626e-02, -6.8022e-05,  9.3443e-05,
        -3.8456e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7969e-04,  5.0860e-05,  4.3211e-06, -3.3555e-05, -6.1752e-05,
         1.8001e-05, -3.3152e-05,  2.1129e-05, -3.7547e-05, -4.1412e-05,
        -3.2576e-05,  7.5583e-05,  2.9374e-05,  4.5025e-06, -8.6750e-05,
         6.9447e-06,  4.9726e-06,  1.7677e-05,  9.7450e-05, -8.0232e-05,
        -1.6177e-04,  2.3727e-03, -1.1358e-04,  1.7154e-01,  4.9200e-05,
         1.6502e-02,  1.1083e-02, -2.3355e-05,  6.2407e-06,  4.1681e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2706e-04,  1.3447e-05, -6.1803e-06,  2.3333e-05, -3.3973e-06,
         4.2980e-06, -7.5922e-06, -2.0308e-05,  1.9624e-05,  1.2517e-05,
         4.6708e-07,  3.4524e-06,  5.6269e-06,  2.4532e-05,  4.3177e-05,
        -5.7259e-06, -1.1528e-06, -1.4710e-06, -5.4301e-05, -6.4160e-05,
        -1.0691e-05,  1.2262e-05,  4.5311e-04,  6.2143e-03,  7.6384e-05,
         3.0363e-04, -1.8870e-05,  1.0738e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9875e-05,  4.5547e-05, -3.2743e-05, -1.8575e-06, -1.1205e-05,
        -5.7085e-06, -5.2062e-05, -6.1919e-07,  1.6503e-05, -2.5608e-05,
        -2.4561e-05, -2.0256e-05, -2.4965e-05, -3.1328e-06, -5.0951e-05,
        -2.7794e-06,  1.0032e-05, -1.0414e-05,  4.1290e-05, -3.9015e-05,
         7.5141e-02,  8.0402e-05,  1.5426e-02,  2.9525e-02, -9.5014e-05,
         7.5368e-05, -1.4080e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8224e-05, -2.0383e-05, -2.4825e-05,  5.2805e-05,  6.6281e-05,
        -2.5525e-05,  6.1961e-06, -3.9079e-05, -2.3069e-05,  3.3108e-05,
        -4.1227e-06, -5.3503e-05, -2.1896e-05, -1.7936e-05, -2.4504e-05,
         2.9901e-08,  1.9515e-05,  1.8650e-05,  3.8685e-05,  3.1479e-05,
        -5.6709e-02, -2.3060e-05, -1.0421e-02, -1.0006e-01, -5.9188e-05,
         2.4317e-05,  8.3759e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9414e-05, -1.9165e-05, -2.5239e-05, -1.2454e-05, -8.3395e-05,
        -1.5211e-05, -4.2726e-05, -1.6470e-05,  4.2187e-05,  2.9288e-05,
         3.4087e-05, -2.3870e-05, -1.2072e-06, -9.6688e-06,  3.9474e-05,
         4.6573e-05, -1.4909e-05, -4.0425e-05, -3.5841e-02, -2.2018e-05,
        -4.5731e-03, -3.6133e-02, -7.6013e-02, -9.6832e-05, -6.7474e-07,
        -8.0321e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8711e-04,  4.4604e-06,  2.9357e-06, -3.1301e-05,  7.4353e-06,
        -7.1129e-05,  3.3742e-05,  8.9182e-06, -2.6557e-05,  1.0013e-05,
         2.5977e-05, -3.7026e-05,  3.6522e-05,  4.5382e-05,  2.5478e-05,
         6.8720e-05, -3.5504e-05,  7.9959e-05, -3.5156e-02,  6.8292e-06,
        -6.5501e-02,  1.1075e-04, -7.6626e-02, -4.5012e-05,  1.1551e-04,
         1.1801e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #750: [tensor([ 2.9116e-04,  1.0778e-04,  2.2947e-05,  1.2606e-05,  1.1834e-04,
         1.6462e-04,  9.0492e-05, -1.1628e-04, -1.1204e-02,  4.0261e-05,
        -3.5065e-04,  1.4281e-04, -1.0558e-01, -1.1267e-03, -1.0885e-04,
         1.4204e-04,  4.1907e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6761e-04, -5.9340e-06, -2.8354e-05, -1.7706e-04, -1.3747e-04,
        -1.1146e-05,  1.4489e-05,  9.1843e-04, -3.1305e-05,  4.6110e-02,
         3.3012e-02,  1.0029e-01, -1.8527e-04,  2.2633e-05, -9.2189e-05,
        -2.7093e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3254e-04, -1.2539e-04, -3.7699e-06, -1.2311e-04, -1.8670e-04,
        -4.3853e-05,  3.1338e-06,  1.8617e-04,  7.1388e-05, -4.2735e-05,
         1.1439e-01,  2.7733e-04,  1.4139e-04, -3.4019e-04,  7.9398e-05,
         7.0055e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5797e-04, -5.7261e-07, -3.2511e-06, -1.8627e-05, -2.7627e-05,
        -4.6332e-06,  2.8212e-07,  4.8124e-06,  3.1505e-05, -1.4650e-05,
        -8.1745e-06,  1.7914e-05, -9.9672e-06,  1.8657e-05,  7.7145e-06,
        -2.0340e-05,  1.1243e-05, -9.6359e-06,  2.2315e-05, -1.4016e-05,
         3.4303e-05,  4.7481e-05, -2.4389e-05,  2.5365e-05, -3.7181e-02,
         3.3177e-05, -7.1069e-02, -2.2030e-05, -2.7377e-06, -1.2098e-02,
         4.5393e-05, -5.0976e-05, -3.8950e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2017e-04,  2.7282e-05, -4.0104e-06,  2.2641e-05,  3.1442e-05,
         5.8429e-05, -4.4022e-05,  1.3803e-05, -4.0874e-05,  7.1129e-05,
         6.2244e-05,  7.0338e-05,  3.7754e-05,  5.0675e-05,  5.5281e-05,
         1.1996e-04, -9.1496e-06,  4.6687e-05, -5.1104e-05,  5.5077e-05,
        -1.7450e-05,  5.4675e-05,  5.0712e-05,  2.4897e-05,  1.6677e-01,
         7.5331e-05, -6.0426e-03,  8.3928e-05, -1.6212e-04,  8.0809e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6802e-04, -6.9218e-05, -1.2838e-04, -1.6912e-04, -1.2125e-04,
         2.5102e-06, -6.8519e-05, -4.4651e-05, -1.2792e-04, -9.6862e-05,
        -7.0746e-05, -3.0793e-05, -1.6348e-04, -4.1835e-05, -9.2559e-05,
        -2.2734e-05, -6.8630e-05, -6.1998e-05, -6.3499e-05, -1.1418e-04,
        -8.3523e-05, -5.1877e-05, -7.1486e-05, -1.5196e-04,  7.0890e-03,
         5.4324e-05,  1.8783e-01, -1.4729e-04,  1.1330e-04, -1.0938e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0749e-04,  5.4166e-06,  5.4121e-05,  2.3279e-05,  2.0426e-05,
         3.3149e-05,  9.3358e-06, -4.2949e-06,  3.1922e-05, -7.1188e-07,
        -1.0870e-05, -1.2717e-06, -2.1368e-06, -2.9502e-05,  2.7727e-05,
         1.5629e-05, -5.7036e-06, -4.5061e-06,  1.7126e-05,  6.4247e-05,
        -5.3671e-05, -6.0981e-06, -1.2585e-05, -8.7409e-02,  9.0331e-05,
        -2.3891e-04, -7.4673e-03, -1.7850e-05, -3.2730e-03, -1.6763e-05,
         4.3860e-05, -2.6274e-04,  1.7164e-05,  3.0863e-06,  4.6081e-05,
        -4.3222e-05, -2.0771e-02, -9.4565e-06,  1.2547e-04, -2.6062e-02,
        -5.8110e-05,  4.5001e-06,  2.6547e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4101e-04,  1.6138e-05,  2.5465e-05, -1.7353e-06,  3.0875e-05,
         3.6408e-05, -6.3427e-06,  8.5399e-05, -4.6862e-05,  4.5396e-05,
         5.3432e-05, -4.5771e-05,  1.1517e-05,  7.8224e-05,  2.6128e-05,
         6.1254e-05,  4.8658e-05, -2.8590e-05,  1.0738e-04,  3.7169e-06,
         6.2400e-06,  4.7566e-06,  1.9053e-05, -1.0814e-01, -1.2034e-05,
         1.6942e-06, -5.6198e-03,  5.6415e-05,  4.1042e-06, -7.1705e-02,
         2.5668e-05,  1.9709e-05, -4.3578e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3755e-04,  5.2627e-06,  2.1778e-05, -4.8835e-05, -3.1788e-05,
        -8.6184e-06, -1.7316e-05,  1.9704e-05, -2.2563e-05, -7.6468e-06,
         2.1028e-07, -4.1086e-05, -1.7523e-05, -9.2640e-05,  3.9514e-05,
        -5.1495e-06,  3.1096e-05, -5.2982e-05, -2.7684e-05, -3.4906e-05,
         2.8418e-05, -4.2870e-05,  4.1392e-05,  8.3613e-02, -1.4695e-05,
         6.1608e-04,  9.8501e-02, -1.6445e-05,  1.4872e-02,  8.2094e-03,
        -1.4961e-05,  1.6646e-06,  9.5343e-07,  8.3682e-04,  4.7243e-05,
        -2.8074e-05,  6.5887e-05,  1.2353e-06, -9.0624e-04,  3.4035e-05,
         5.3855e-06,  2.9670e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3552e-05, -1.3821e-05,  5.8296e-05,  3.4594e-05, -3.5237e-05,
        -7.2616e-06, -5.8512e-05,  1.1451e-04, -5.5441e-05,  4.0341e-05,
         5.6685e-05,  4.5895e-06,  5.4215e-05,  4.8673e-05,  6.1720e-05,
        -7.0204e-02,  1.1767e-05, -2.7439e-04, -5.0783e-05,  3.8520e-05,
        -8.6979e-02,  7.9760e-05, -1.1881e-03, -7.2715e-04, -1.3138e-04,
        -1.2371e-05,  5.6130e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6987e-06,  4.2008e-06, -4.8114e-05,  6.4118e-06, -3.7201e-05,
         6.4891e-05,  9.5985e-06,  7.0166e-06,  1.3188e-05,  8.2309e-05,
        -8.2314e-05,  1.3039e-05,  5.6387e-05,  3.4165e-05,  4.0466e-05,
        -5.2974e-02,  9.1631e-05, -4.9402e-02, -1.9678e-05, -4.6039e-05,
        -8.0096e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6667e-05,  1.6433e-05,  5.6424e-05,  3.0328e-05, -2.4071e-05,
        -2.7139e-05, -7.6856e-05, -3.2689e-05,  1.2575e-05,  7.1778e-05,
        -2.9056e-05,  5.0779e-05, -3.0713e-05,  6.0658e-05,  6.2407e-05,
        -1.2001e-01,  3.9760e-05, -1.5425e-02, -1.1821e-04, -3.2909e-02,
        -9.0707e-05, -1.0493e-04,  6.8132e-06,  6.4397e-05, -1.6982e-05,
        -6.8844e-05,  6.3317e-05,  8.9893e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #800: [tensor([ 4.5439e-05,  4.6674e-05, -2.2704e-06,  1.3111e-06,  1.3004e-05,
         3.9066e-05, -1.1678e-05, -8.8234e-06,  6.0935e-05,  5.6978e-05,
         1.7057e-05,  2.3116e-05,  3.1323e-05, -1.1414e-05,  5.4887e-06,
         3.1788e-05, -1.5545e-05,  1.3127e-05,  9.9194e-06,  1.1402e-05,
        -1.5149e-05,  1.7334e-05,  5.2874e-04,  3.1693e-04, -2.2874e-05,
        -1.7511e-05,  1.1616e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7538e-04, -2.2173e-05,  7.6035e-05, -4.8973e-05,  3.2075e-06,
        -2.9789e-05,  1.2964e-05, -2.6150e-05,  6.3375e-05,  3.6016e-05,
         2.9634e-05, -7.5360e-05,  4.6299e-05, -2.6176e-05,  2.3705e-05,
         4.3062e-05,  3.0096e-05, -5.9108e-05, -1.0599e-05,  1.0300e-05,
         1.6635e-05, -1.2308e-06, -2.3296e-05, -3.5436e-03, -4.9539e-02,
         3.1302e-05, -2.0313e-06, -1.2926e-04,  3.2290e-05, -5.4466e-04,
        -1.2705e-01,  4.7108e-05, -1.3980e-05, -8.1226e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5021e-04,  9.8146e-05, -3.6469e-06,  3.1927e-05,  9.7685e-06,
         1.2643e-04,  7.4590e-05,  2.5776e-05, -1.5518e-05,  8.2503e-05,
         1.3227e-05,  3.0009e-05,  1.3178e-05,  1.0372e-05, -5.0064e-05,
         2.6866e-05, -2.5884e-05,  7.5903e-05,  8.0836e-06,  6.8138e-05,
        -1.2454e-05, -3.1798e-05,  1.5349e-01,  4.8358e-05,  4.6540e-03,
        -1.0993e-04,  2.9413e-05,  1.3384e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8402e-04,  2.0056e-05, -6.9292e-05,  3.9578e-06,  4.9162e-05,
        -8.3787e-07, -1.2664e-05, -5.8917e-05, -3.2878e-05, -8.4223e-05,
        -1.8940e-05, -4.6083e-05,  5.9697e-05, -8.0266e-05, -2.0435e-05,
        -3.3425e-05, -3.1523e-05,  3.3462e-05, -2.9318e-05, -1.5565e-04,
         3.5665e-05, -4.4938e-05,  2.5274e-03, -8.5746e-05, -3.6049e-05,
         6.7674e-05,  1.7388e-01,  1.1542e-04,  7.1697e-05,  7.9784e-04,
        -3.7422e-05, -9.0835e-05, -1.9258e-05, -1.0464e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8500e-05, -2.9557e-05,  6.5809e-05,  1.4691e-05, -1.7481e-06,
         7.0456e-06,  6.2751e-05, -1.4767e-05,  3.8162e-06, -3.4361e-05,
         1.7065e-05,  9.5309e-06, -1.3130e-05,  2.0720e-05, -7.9480e-06,
        -4.2745e-05,  3.6715e-05, -2.2113e-05, -2.2597e-05,  2.2332e-05,
         1.3612e-05,  1.4688e-05,  2.0242e-03, -1.1155e-04, -8.8651e-05,
        -1.8425e-05,  1.5182e-01, -1.3785e-04,  1.4005e-03,  1.4569e-05,
         3.2409e-05, -6.5316e-05,  7.0610e-02, -1.7694e-05,  6.4893e-05,
         2.9797e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3745e-04, -3.4810e-05,  3.6601e-05, -3.7479e-05,  2.3859e-05,
         4.4169e-05,  2.5681e-05, -3.7188e-05, -3.6861e-05, -7.7196e-05,
        -3.9488e-05, -5.0575e-05,  6.4431e-05,  3.1892e-05, -4.2849e-05,
        -5.0909e-05, -3.2726e-05,  1.9081e-05,  5.3890e-06,  6.9015e-05,
         5.4678e-05, -2.2061e-06,  8.1134e-02, -5.1993e-05,  1.0406e-05,
         1.4974e-04, -3.3058e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6422e-05, -2.1770e-05,  9.1589e-06,  2.8903e-05, -2.0434e-05,
         4.0233e-06,  5.4026e-05, -2.0893e-06, -5.6157e-05, -3.9304e-05,
         1.0023e-07, -1.4686e-05,  3.9870e-05, -1.1848e-05, -1.6993e-06,
         1.8586e-05,  2.6941e-05, -1.8546e-05,  3.7103e-05, -4.0594e-05,
         3.1953e-05, -3.1210e-05,  1.0385e-01, -6.9814e-05, -6.9537e-06,
         9.3909e-04,  3.6950e-02,  1.1751e-05,  1.9469e-04, -5.2570e-05,
         6.6083e-05,  4.5883e-05,  1.8663e-02,  3.3032e-05,  2.8986e-05,
         8.7904e-06, -4.8445e-05,  6.8865e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8279e-04, -4.1048e-05, -2.5926e-06,  5.3775e-05, -3.8051e-05,
        -4.2862e-05,  8.0192e-06,  3.6011e-05, -9.4588e-05, -5.5212e-05,
        -3.3912e-05, -5.7692e-06, -2.5084e-05, -5.7561e-05, -6.7016e-05,
         9.7783e-05,  7.2680e-06, -2.8812e-05,  1.4738e-05,  4.9469e-06,
         6.3213e-05, -4.5444e-06,  1.4585e-01, -3.9866e-05, -7.4473e-08,
        -2.5311e-05,  4.8223e-02,  3.2039e-05,  2.0423e-04,  4.6578e-05,
         3.0540e-07,  5.8032e-05,  1.7537e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5842e-04,  1.1410e-05,  5.9159e-05,  4.8719e-05, -1.1874e-05,
        -6.9100e-06,  6.2822e-06,  3.5679e-05,  4.7739e-05, -2.7438e-05,
         2.7488e-05, -2.5426e-05, -1.0928e-05, -9.0636e-06,  1.0370e-05,
        -2.5165e-05, -2.1648e-06,  2.7985e-05, -2.2832e-05, -5.7269e-05,
         2.8599e-05, -8.0642e-06, -3.6543e-02,  1.0565e-05,  7.2504e-05,
        -2.3641e-03, -2.9447e-05,  1.8454e-05, -9.6127e-02,  1.0458e-04,
         1.8645e-05, -1.4399e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4535e-04,  9.8229e-06,  6.0676e-06,  3.7045e-05, -2.3306e-05,
        -3.2846e-06, -4.7542e-05,  1.5942e-05,  1.2206e-05,  4.7099e-05,
        -3.1270e-05,  2.0698e-05, -2.3622e-05, -3.2514e-05,  1.4952e-05,
        -8.1393e-06,  3.3153e-06,  9.8314e-06,  2.7465e-05, -8.4927e-05,
        -1.0930e-05,  4.8462e-05, -8.9829e-02,  3.5439e-05,  6.3933e-05,
         1.2110e-04, -6.5524e-02, -1.3041e-05,  2.9780e-05, -1.8767e-03,
        -1.6891e-02,  4.4339e-05,  5.2513e-05,  4.5642e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3922e-04, -6.1701e-05,  1.2978e-05,  1.4979e-05, -5.9427e-05,
        -9.4394e-05, -7.7832e-05, -1.5024e-05,  3.1287e-05,  5.1317e-05,
        -5.0280e-06, -2.4685e-05, -1.7460e-05, -1.5645e-05, -7.0114e-06,
        -4.3606e-05, -2.9715e-05, -9.3139e-05, -8.5688e-06, -5.1498e-05,
        -2.9975e-05,  5.6256e-05, -1.2665e-01,  7.1811e-05,  5.2575e-05,
        -3.4359e-04,  4.4134e-03,  6.6463e-05, -8.8874e-05, -4.3295e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5068e-04,  9.0432e-06, -4.3414e-05,  3.7863e-05,  2.6788e-05,
        -8.5112e-06, -3.6140e-06,  4.8488e-05,  2.8220e-06, -3.6365e-05,
        -2.5823e-05, -3.1024e-05,  2.3710e-05,  2.2747e-05, -4.9541e-06,
         3.0607e-05, -2.0418e-05,  4.2718e-05, -7.2345e-05, -2.4444e-05,
        -7.3887e-06, -1.4208e-05,  5.5655e-06,  3.8045e-02,  2.2297e-02,
        -1.0060e-05, -6.0934e-05, -2.6880e-05,  2.7323e-04,  9.5895e-02,
         2.3662e-05, -1.1647e-04,  6.9216e-05,  3.7678e-05, -3.8278e-06,
         9.3632e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #850: [tensor([ 3.1748e-04,  4.9803e-05,  1.5747e-04, -1.1662e-04,  6.7600e-05,
         4.0698e-05, -9.5512e-05,  1.3753e-04, -1.2064e-04,  3.2229e-05,
         2.8918e-05,  1.4585e-04,  1.1648e-04,  1.0254e-04, -8.6712e-05,
        -1.5271e-01, -1.2361e-04, -4.2649e-05,  1.3123e-04, -4.7806e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6874e-04, -6.2302e-05, -2.9257e-06, -1.4212e-05, -3.0397e-05,
         1.3540e-05, -8.0506e-05,  1.5263e-05, -4.2300e-06, -1.3578e-05,
        -2.0003e-05, -3.7504e-05,  2.6962e-06,  1.4809e-05,  8.5296e-06,
        -3.9430e-02, -5.0189e-05,  1.0386e-05, -5.7625e-05,  5.2134e-05,
         1.9875e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3140e-04,  7.9708e-05, -1.9886e-04, -1.7745e-05,  1.4563e-04,
         6.4515e-05, -1.6162e-04,  2.3327e-06, -1.0397e-06,  3.4915e-05,
         7.0134e-05,  1.1989e-04, -3.7622e-05,  7.6004e-05,  1.3562e-04,
         1.5141e-04,  1.7163e-01,  1.0937e-04,  1.1220e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8387e-04,  2.0746e-05, -4.4692e-06, -6.2649e-06, -4.1516e-06,
        -6.7349e-05,  1.0958e-04, -5.0689e-05, -3.7275e-05, -1.0694e-04,
         2.3127e-05, -1.2249e-04,  2.4203e-05, -7.3822e-05, -1.3766e-01,
        -1.9346e-06,  2.3203e-03,  8.5055e-05,  5.8384e-05, -1.3062e-05,
         9.7066e-05, -2.7641e-05,  1.8505e-04,  1.0132e-04, -3.1461e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0407e-04,  4.4920e-05, -5.2415e-05, -1.5276e-04, -1.1184e-05,
         9.1790e-05, -1.4443e-04,  1.8844e-05, -4.6034e-05, -1.3250e-04,
        -9.2656e-05,  5.0186e-05, -1.2626e-04, -1.1125e-04,  2.0742e-01,
         4.6720e-06,  1.0739e-02,  6.6200e-05,  2.7938e-04, -4.5449e-05,
        -1.2778e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9080e-04,  2.7003e-05,  3.5311e-05,  2.1647e-05, -2.7091e-05,
         2.5357e-05,  6.1688e-06,  3.5119e-05,  1.1936e-05, -2.4441e-05,
         2.5185e-05,  8.6138e-06,  4.9820e-06,  4.5472e-05, -3.0190e-05,
         1.7560e-05,  8.8999e-06, -3.0045e-06, -1.7671e-05, -1.6609e-05,
        -1.5687e-05,  1.0755e-05, -5.6009e-02, -1.1741e-05, -8.0781e-05,
         1.4003e-05, -7.7440e-02, -3.7028e-06,  4.0781e-05,  4.5516e-05,
         9.7592e-05, -5.7523e-05, -3.7445e-05, -2.4375e-05, -9.0704e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2870e-04, -3.1021e-06,  4.5865e-06,  3.9191e-07, -7.1564e-06,
         8.3295e-07, -9.2203e-06,  1.2113e-06,  1.1518e-05,  7.1333e-06,
        -2.3038e-06, -7.8681e-06,  2.3394e-07, -9.0563e-06,  5.9976e-06,
         1.9647e-05, -2.9209e-06,  6.0580e-06, -7.2385e-06,  7.5060e-06,
         3.0362e-06, -9.1870e-06, -1.5557e-04, -5.2442e-06, -6.7167e-05,
         2.1674e-04, -9.4689e-06,  2.4961e-04, -2.8010e-04, -1.3966e-04,
        -1.4386e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.8203e-05,  2.8540e-05,  2.3534e-05,  1.0932e-05,  3.3641e-06,
         4.9468e-05, -1.3970e-05,  2.1018e-05,  1.3571e-05,  7.5005e-06,
         2.1538e-06,  2.1809e-05,  1.5500e-05, -1.7305e-05,  8.2212e-07,
         2.0995e-05,  3.5963e-05,  2.9501e-05,  4.6008e-06,  2.6750e-05,
         1.9478e-05, -2.2054e-05, -1.6021e-04,  1.6998e-05, -1.0776e-05,
         6.6317e-05,  1.0687e-05, -7.2065e-02,  4.8924e-06,  2.8821e-05,
        -3.6736e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8859e-05,  1.8833e-05,  3.8597e-06,  1.4488e-06,  2.3513e-05,
        -1.7302e-06,  1.8429e-05,  2.3907e-05,  4.3248e-06,  4.6809e-05,
        -3.3139e-06,  8.1199e-06,  7.4662e-06, -2.1143e-05,  1.0204e-05,
        -8.3698e-06,  3.3999e-05, -2.0615e-06,  5.5775e-05,  2.6067e-05,
         1.9171e-05,  2.4856e-05,  1.1155e-05, -1.3461e-05,  7.9378e-05,
         1.2306e-05,  4.3181e-05,  2.2101e-06,  2.1115e-05,  5.4927e-07,
        -1.4165e-05, -1.2259e-06, -1.3711e-06,  5.2797e-04, -3.6579e-04,
         2.6286e-04,  2.8974e-03, -1.5383e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4721e-05, -1.3505e-06, -1.2619e-05,  7.1681e-06,  3.9735e-06,
        -8.6183e-06,  8.0392e-06,  1.0953e-05,  8.5107e-06,  2.6090e-05,
        -1.7256e-08,  7.6570e-06, -8.2629e-06, -1.0829e-05,  3.4841e-06,
        -2.2335e-06,  7.5237e-06, -3.1661e-06,  8.2367e-06,  1.5146e-05,
         7.3719e-06,  1.2045e-05,  6.5026e-06,  9.2422e-06,  2.3624e-05,
         3.1554e-08,  1.7209e-05, -1.1319e-06,  6.4530e-06,  3.5380e-06,
        -1.5880e-05, -4.1610e-06, -3.4337e-06,  1.5053e-03, -2.4111e-04,
         9.9509e-04,  3.0712e-04,  4.1982e-04, -2.6688e-05, -2.8974e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8751e-05,  3.5801e-05, -8.1795e-06,  1.2739e-05,  3.9394e-05,
        -1.9799e-06,  5.3301e-06,  2.5308e-05,  1.1428e-05,  4.7077e-05,
        -2.5098e-06,  8.7884e-06,  6.5439e-06, -7.8106e-06,  1.3265e-05,
         1.7321e-05,  4.2228e-05, -4.0958e-06,  3.4250e-05,  1.2636e-05,
         2.2708e-05,  3.4936e-05,  1.2333e-05,  3.9699e-06,  7.4387e-05,
        -7.8089e-06,  2.9380e-05, -2.9806e-07,  9.0582e-06,  4.2412e-07,
         2.3392e-06,  6.7141e-06,  8.5290e-06, -2.1226e-04, -5.1420e-04,
         4.9509e-04,  5.1801e-04, -3.0187e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0220e-04,  5.3988e-06,  3.0795e-07,  3.4727e-05,  1.1820e-05,
         5.7457e-05,  1.8658e-05,  2.0701e-05,  5.4728e-05,  3.4602e-05,
        -4.5885e-02, -1.0393e-04, -3.2916e-02,  6.8642e-05,  8.0383e-05,
         7.6657e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #900: [tensor([-1.6898e-04, -1.6048e-05,  2.6848e-05,  4.4149e-05,  7.7727e-05,
        -1.3757e-06, -3.1822e-05,  5.2637e-05,  4.9452e-02,  5.7348e-05,
         7.4074e-05,  1.0665e-06,  3.2918e-02, -6.7763e-05,  7.5272e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1792e-04,  1.0651e-04, -1.0621e-05, -2.1010e-04, -1.0912e-04,
         1.8020e-05,  4.3416e-05, -2.9721e-05, -1.4912e-04,  3.5554e-05,
        -8.2140e-05, -1.5357e-04,  6.1112e-05, -1.9119e-04, -1.9541e-04,
        -1.0638e-04, -2.1520e-05,  1.7983e-01,  4.1380e-05,  4.5775e-03,
         1.8675e-04, -4.4656e-05, -4.3515e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2217e-04,  1.6265e-05,  2.2502e-05, -3.8199e-05,  4.4052e-05,
         9.0209e-06, -3.6179e-05,  4.9124e-05,  9.9152e-05,  3.3051e-06,
        -1.3082e-05, -1.1064e-04,  2.5687e-05,  1.0481e-05, -3.6214e-05,
        -2.2130e-05,  2.2590e-05,  9.2410e-02, -4.2470e-05,  4.8719e-02,
         2.4189e-05,  4.7006e-02, -1.8448e-04, -5.1060e-05,  3.2248e-05,
         4.2733e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3182e-06,  2.0176e-05, -1.6488e-05, -2.3954e-05,  4.9419e-06,
         4.7002e-06,  7.8709e-06,  2.4765e-05,  1.2461e-05,  4.2712e-06,
         1.1071e-05,  2.6417e-05, -3.0788e-07,  1.3489e-06,  7.7965e-06,
         1.5519e-05, -9.7368e-07,  2.8782e-02, -3.9057e-05,  5.7548e-02,
        -2.4449e-05,  1.3954e-02,  1.4947e-05,  8.2824e-03, -4.5018e-05,
        -3.6804e-05,  1.6077e-06,  4.6162e-05, -6.5832e-06,  4.5197e-06,
        -1.7794e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9146e-05, -9.7711e-06, -1.4777e-05, -4.9269e-05, -2.7817e-05,
        -2.1594e-05, -4.5568e-05,  3.7719e-05,  7.5086e-05, -1.5769e-05,
        -2.2813e-05,  1.0581e-04, -9.8199e-06,  2.4400e-05, -3.0733e-05,
        -1.0409e-04,  2.0409e-05,  2.2417e-03,  7.6409e-02,  1.0921e-04,
         3.9896e-05, -1.4743e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9478e-05, -1.1862e-05,  3.8747e-07,  1.7588e-05,  2.1660e-05,
         1.0362e-05, -1.5195e-05, -9.2524e-06,  1.3467e-05,  6.2691e-06,
        -4.1697e-06, -1.3536e-05,  1.1865e-05,  2.6600e-05, -1.4942e-02,
         9.4919e-06,  1.0080e-03, -3.2821e-05, -4.0027e-05,  8.2949e-06,
         3.0757e-04, -1.6130e-02,  1.9842e-06,  7.6951e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0207e-04, -1.6150e-05,  1.8285e-05, -3.9366e-05, -3.7569e-05,
         1.1052e-05,  2.6498e-05,  5.4086e-05,  4.3742e-05,  1.1539e-05,
        -8.3949e-05,  2.4083e-05,  4.4032e-06,  4.6770e-05,  3.0228e-02,
         1.0440e-01, -2.0426e-05, -7.6965e-05, -5.0654e-06,  2.5082e-05,
        -8.3971e-05, -8.4213e-05, -5.6452e-05, -9.2973e-05,  1.8714e-02,
        -5.7929e-05, -7.2261e-05, -9.3469e-06,  7.3724e-05,  2.1276e-05,
        -2.3801e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4485e-04, -3.4391e-05, -6.3411e-05, -3.3110e-06,  5.3390e-05,
         9.8668e-05, -1.0178e-04, -6.7292e-05, -3.4318e-05, -3.0471e-05,
        -2.6091e-05, -2.1025e-04, -3.9740e-05, -5.2903e-05,  1.2052e-01,
        -4.3335e-06,  1.8123e-02, -1.2299e-04, -1.1579e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0441e-04,  1.0004e-04,  5.2483e-05, -1.7463e-04, -1.0467e-04,
         9.7656e-05,  2.1999e-05,  8.2319e-05, -6.5411e-05, -1.6756e-04,
        -1.3381e-04, -1.7348e-05,  9.7658e-05, -1.9010e-05,  1.9234e-01,
         8.0065e-05,  2.0155e-02,  1.0784e-04, -2.2942e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0977e-04, -6.8377e-05,  2.3672e-05,  1.1495e-04,  6.9384e-05,
        -3.5652e-05,  1.2091e-05, -4.3552e-05,  5.4815e-05, -1.6110e-05,
         8.8590e-06,  1.1888e-04, -4.7949e-05, -2.3017e-05, -8.7435e-02,
         6.3708e-07, -7.2015e-02, -5.6561e-05,  1.1496e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6016e-05,  8.6164e-05, -5.1390e-05,  1.7776e-06,  5.1528e-05,
         7.3099e-06,  5.0304e-05, -3.5967e-05, -2.7084e-05,  7.3253e-05,
        -8.3845e-05, -8.9424e-05,  4.7115e-05, -1.7056e-05,  3.9996e-05,
        -2.3399e-05, -4.7416e-05, -3.8371e-05, -1.3379e-04, -3.7298e-05,
         1.4416e-01, -3.4756e-05,  3.1867e-02, -1.0394e-06,  1.2183e-04,
        -1.6171e-04, -4.0406e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3445e-04, -3.5674e-07, -7.5120e-05,  4.1836e-06,  2.3622e-05,
        -5.7918e-05, -2.6237e-06, -4.8723e-05, -3.8014e-05, -3.7663e-05,
         4.8162e-05, -1.9362e-05, -6.2895e-05, -1.2955e-05, -8.3154e-05,
        -5.7979e-05, -4.0360e-06, -6.5569e-05,  2.7232e-05,  2.6979e-05,
         4.7124e-05,  9.0151e-02,  1.9108e-02, -4.6961e-05,  1.3782e-03,
         3.2328e-05, -4.7516e-05,  6.3774e-06,  6.5378e-06,  6.1854e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #950: [tensor([-1.5748e-04, -2.6880e-05,  3.3792e-06,  1.7023e-05,  6.3650e-06,
        -2.0020e-05, -2.4057e-05, -3.8213e-05, -1.6604e-06, -1.2836e-05,
        -9.3187e-06, -4.1514e-05,  2.3758e-05, -1.5112e-05,  1.9085e-05,
         1.0979e-05, -4.7468e-05,  9.6722e-06, -9.2872e-06, -2.3797e-05,
         6.6361e-06, -2.1956e-05, -4.9893e-05, -1.2514e-04,  1.2468e-02,
         2.1846e-05,  6.9575e-06,  1.7977e-04,  1.5893e-02, -3.3846e-05,
         1.7524e-03, -8.3010e-06,  7.0874e-02, -2.1179e-05, -3.4832e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7466e-05,  1.7156e-05,  1.4749e-05,  7.4820e-06, -2.1073e-05,
        -4.8515e-06,  6.8908e-06, -1.2438e-05, -1.0058e-05, -4.8285e-06,
         3.2090e-05, -8.0984e-06, -1.2072e-05, -2.7024e-05, -1.8865e-05,
         1.0183e-05, -2.0190e-05, -4.3107e-06,  5.4030e-06,  6.8935e-06,
        -2.4244e-06,  3.9331e-05, -9.7381e-06, -1.2297e-05,  1.7731e-05,
         1.0532e-02,  3.9572e-05,  6.0639e-02,  2.0127e-05,  7.7218e-04,
         7.5904e-06,  1.0928e-03, -7.6480e-06,  1.8526e-03, -3.7044e-05,
        -5.9828e-05,  3.0492e-02,  1.4735e-05,  4.2766e-05,  4.5027e-06,
        -1.5157e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4074e-04,  3.2430e-05,  2.0878e-05, -9.5735e-06,  6.1837e-06,
        -7.7387e-07,  4.8053e-05, -4.5881e-05,  2.4941e-05,  4.0710e-05,
         5.0663e-05,  3.7400e-05,  1.5140e-05,  2.2641e-05,  3.3590e-05,
         1.6434e-05, -3.9357e-05, -6.5253e-02,  8.9655e-05, -2.0985e-02,
        -6.6424e-05, -3.5029e-02, -1.9500e-05, -1.7630e-04, -1.0754e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7338e-04,  1.4365e-05, -3.4377e-05,  1.7840e-05,  6.2527e-05,
        -6.1623e-06, -2.7113e-05,  3.4437e-05,  1.8342e-05, -5.0302e-05,
         7.4932e-05, -1.0204e-05,  2.7534e-05,  9.9627e-05, -4.1173e-05,
         4.3800e-05, -2.2994e-05, -1.0509e-01,  1.0015e-04, -4.5638e-02,
        -8.1742e-05, -3.3662e-04, -6.9562e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6172e-05, -1.8901e-05, -1.1369e-05, -8.3539e-06, -1.2664e-05,
        -1.5054e-05,  4.0369e-05,  7.6835e-06,  5.1893e-06, -9.7080e-06,
        -3.0746e-06,  7.0251e-06,  5.3975e-06,  5.6965e-06,  6.6706e-06,
         6.4236e-07,  2.9904e-06,  8.3785e-05,  4.7454e-05,  3.7972e-03,
        -2.6686e-04,  1.6733e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1004e-05, -4.3160e-05, -3.1283e-05, -9.6110e-06, -1.1365e-05,
        -1.2832e-05, -2.7205e-05,  5.5375e-06,  8.5899e-05, -1.3670e-05,
         2.0643e-05,  6.1199e-05, -2.1686e-05,  2.2687e-05,  3.5194e-06,
        -2.5358e-05,  3.5586e-05,  4.3869e-05,  4.2689e-05,  2.0759e-05,
        -2.9419e-05, -7.4996e-06,  2.6308e-05, -6.6610e-06, -2.2590e-05,
        -1.4640e-05,  4.9727e-05,  8.5765e-06,  2.8768e-05,  2.5105e-05,
         4.9222e-05, -2.0897e-05, -1.2129e-05, -2.5222e-06,  7.5643e-05,
         4.6032e-05,  3.9841e-05,  3.3247e-05,  1.7453e-05, -1.1143e-05,
         4.0787e-05,  1.5798e-05,  1.0762e-05, -4.2079e-05, -1.7535e-05,
         1.0677e-04,  1.4496e-01, -4.3675e-05, -7.7959e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4315e-06,  1.8929e-05, -8.1525e-07,  2.4115e-05, -9.0222e-06,
         5.1363e-07, -7.8107e-06, -1.6935e-05, -1.4465e-05,  7.4700e-06,
        -6.5690e-06, -2.6438e-05,  7.9064e-06, -3.4527e-06, -3.5703e-06,
         1.5996e-05, -3.4844e-05, -5.4330e-06, -7.8926e-06, -1.1826e-05,
        -2.0733e-05, -2.7751e-07, -1.3181e-06, -3.6911e-05, -1.9034e-05,
         4.8959e-05,  3.5055e-05, -3.7577e-05,  8.6891e-06, -4.5646e-05,
        -4.2711e-05,  3.1701e-07,  3.4289e-06, -1.3292e-05, -4.2772e-05,
        -2.2148e-05,  2.3467e-06, -2.6744e-05, -4.0557e-06, -4.5663e-07,
        -3.3005e-05, -8.8983e-06,  3.0468e-05,  6.3153e-07,  1.0626e-05,
        -6.3337e-05, -6.6167e-02, -7.9675e-05, -2.1847e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7732e-04, -1.1897e-05, -9.2032e-05, -4.2350e-06, -3.3849e-05,
        -5.8552e-05, -4.6473e-05,  3.0090e-05, -3.9309e-06, -3.2757e-05,
        -5.9569e-05,  2.8082e-05, -1.9332e-05, -2.7621e-05,  4.6694e-05,
        -4.3971e-05,  5.1496e-07,  9.4744e-05, -2.6226e-05,  3.4355e-05,
         6.7235e-05, -1.1730e-05,  6.4907e-05, -3.0389e-05,  3.7375e-05,
        -1.1176e-04, -1.7915e-05,  7.2321e-05,  4.8804e-06,  3.6088e-05,
         7.0649e-05,  1.4426e-05,  2.1175e-05,  4.6537e-05,  7.5956e-05,
         5.0245e-05, -4.8618e-05,  4.7204e-06,  3.0567e-05, -1.4076e-06,
         3.7774e-05, -1.2020e-05, -6.6189e-05,  6.9699e-06, -2.4628e-05,
        -3.4165e-05,  1.1535e-04,  1.6884e-01,  4.9747e-05,  4.9485e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6808e-04, -2.4207e-05,  3.8463e-05,  1.7664e-05, -4.4722e-05,
        -1.0428e-04,  2.5513e-05,  7.4111e-05,  6.2670e-05, -4.8171e-05,
         1.4951e-05,  8.6559e-05,  7.5675e-05, -2.6748e-05,  2.9494e-05,
         6.4805e-02, -9.8958e-05,  2.7422e-02, -2.3689e-05,  3.2676e-05,
         1.1302e-04,  8.8786e-02,  5.1407e-06,  1.5621e-05,  4.1009e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0502e-04, -4.4144e-05,  4.1763e-06,  2.2195e-05,  2.7105e-06,
        -7.4934e-05, -6.5323e-05,  3.8360e-05, -1.1405e-04, -2.9236e-05,
        -2.2960e-05,  3.1696e-05,  5.0729e-05,  1.3585e-05,  2.5807e-06,
         5.3524e-02, -4.3975e-05,  4.6157e-02,  1.3492e-04,  1.4758e-02,
         1.4777e-04,  7.8389e-02, -2.6805e-05,  6.1138e-05, -1.0593e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9545e-05,  1.5931e-06, -3.1483e-06,  9.1751e-06,  3.2488e-05,
        -2.7454e-06,  3.8218e-05, -9.2964e-06,  1.3373e-05, -4.4464e-05,
         5.6392e-05, -1.3931e-05, -3.8327e-05, -3.8636e-06,  4.2372e-05,
         7.3050e-02, -9.5130e-06,  5.3978e-05,  4.9058e-05,  1.0519e-02,
        -1.8905e-05, -5.6345e-06, -1.1409e-03, -6.8400e-05,  4.3981e-04,
         7.4738e-02,  4.8735e-05,  6.6844e-05,  2.7246e-05,  3.3873e-05,
        -4.0653e-06, -6.9482e-06,  3.6837e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3871e-06,  5.3127e-06, -5.4521e-06,  1.8655e-05,  1.0497e-05,
        -1.3710e-06,  1.1758e-06, -2.9914e-07,  1.9250e-05,  2.5195e-06,
         6.2846e-05, -1.5241e-05,  3.6255e-06,  7.1715e-06,  3.7856e-06,
         5.6647e-06, -4.6418e-06, -2.0649e-05,  1.4968e-05,  8.8921e-04,
        -7.2921e-05, -1.1102e-04, -2.0302e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1000: [tensor([ 3.1925e-04, -1.3201e-04, -3.5274e-05,  3.2071e-05, -1.1537e-04,
         7.7166e-05, -4.3694e-05, -9.7433e-05,  2.2452e-05, -3.5014e-05,
        -1.0565e-04,  2.9813e-05,  3.2818e-05, -1.3236e-04, -3.0419e-05,
         2.1954e-05,  5.8272e-05, -3.9678e-02,  3.8367e-05, -2.2508e-02,
         1.8116e-04, -7.3858e-03, -2.2641e-04,  7.6393e-05,  1.4206e-04],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5227e-04, -1.0500e-04, -7.0391e-05, -9.2682e-05, -2.5503e-06,
         8.4795e-06,  1.8315e-05,  1.8361e-06, -1.7375e-05, -2.4911e-05,
        -5.2467e-05,  2.2167e-05, -1.0374e-04, -8.6146e-05, -3.3132e-05,
         9.1123e-06, -1.9348e-05, -7.4223e-05,  1.6143e-01,  8.6348e-05,
         8.5081e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1722e-05, -2.2214e-06, -4.3317e-05,  2.3071e-06, -1.1536e-05,
         1.5269e-05,  1.7318e-05,  2.5175e-05, -3.0508e-06, -4.3144e-06,
         1.1512e-05,  8.5550e-06, -1.4078e-05, -1.9088e-06,  2.7956e-05,
         2.1622e-06,  1.9363e-05, -4.9341e-02, -5.8042e-05,  4.5940e-06,
         1.5729e-04,  6.6616e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5179e-05,  3.8542e-05, -1.7475e-06, -8.6669e-06, -1.3393e-05,
         4.9404e-05,  9.3131e-06,  2.1628e-07,  1.5196e-05,  1.2328e-05,
        -4.3405e-02,  2.4854e-05,  1.5808e-03, -5.1969e-05, -2.8720e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2844e-05, -1.5992e-05, -1.8594e-05,  6.7189e-05, -7.7879e-05,
         3.3861e-04,  5.0859e-05, -2.4023e-05,  1.4275e-04, -1.5953e-04,
         1.5449e-01, -9.4465e-05,  1.8794e-03, -5.5689e-06, -2.1866e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4755e-05, -1.3791e-05, -4.3812e-05, -1.1815e-04, -1.0636e-04,
        -5.8161e-05,  5.1199e-05,  3.3730e-05, -8.2071e-05, -3.3811e-05,
        -1.4181e-01, -3.1101e-05, -9.3862e-03,  4.7377e-05,  7.4820e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1383e-05, -8.4976e-05, -4.3712e-05,  3.7226e-05, -1.3267e-05,
         1.7838e-05,  2.1293e-05, -1.0632e-05, -3.0182e-06, -5.8974e-06,
        -1.9750e-05,  4.2994e-06, -8.2275e-06,  1.1488e-01,  1.2349e-04,
         1.3957e-02,  8.8887e-06,  3.0967e-05, -3.3388e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3637e-05, -6.3891e-05, -5.8608e-06, -2.4272e-05,  7.9400e-05,
        -2.9004e-05, -7.8639e-06, -5.9427e-08, -2.6137e-05, -7.8475e-05,
         6.6546e-06,  4.8391e-05, -1.5401e-05, -1.0363e-01, -7.1447e-06,
         3.0478e-05,  1.3029e-04, -2.1772e-05,  1.0687e-05, -1.8934e-05,
         1.4147e-04,  5.8802e-05,  1.1389e-04,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7502e-05,  9.4436e-05, -3.5541e-05, -4.5797e-05, -9.8640e-05,
        -2.3615e-05,  5.1562e-05,  3.9669e-05, -2.6364e-05,  8.1441e-05,
        -1.2262e-04, -2.9746e-05,  2.4743e-05,  1.5412e-01, -5.6137e-05,
         9.0513e-03,  1.5967e-04,  2.9912e-02, -6.3955e-06, -9.1227e-05,
        -1.5969e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9273e-05,  1.9663e-04, -3.6738e-05, -1.9494e-05,  9.2494e-05,
         2.5700e-05,  9.0009e-05, -2.5909e-05, -6.7698e-05,  5.0925e-05,
        -8.5686e-05, -4.0062e-05,  8.7142e-05,  3.5831e-05,  3.9200e-05,
         4.1831e-05,  5.6170e-05,  8.6856e-06,  1.4525e-01, -4.3523e-05,
         6.8202e-02, -4.2673e-05,  1.2523e-05,  6.7224e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0094e-04,  1.3508e-05,  4.5695e-05,  5.1796e-06,  3.6772e-05,
        -2.3603e-06, -6.0655e-05, -8.3531e-07, -1.1766e-05,  1.9616e-05,
        -7.9000e-05, -7.4150e-05,  7.1655e-05, -5.3732e-06, -3.7745e-05,
         5.6413e-05, -5.6086e-05, -6.8923e-05,  1.0369e-01, -8.7878e-05,
        -4.4127e-03, -7.3710e-05, -1.1433e-04, -1.5349e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2526e-05,  1.0792e-04,  6.0960e-05,  4.5410e-06,  1.1485e-04,
        -4.1975e-05, -7.0717e-05, -2.6400e-06, -4.3832e-05, -2.0331e-05,
        -3.3039e-05, -2.2806e-05,  2.8647e-05,  9.4498e-05, -4.4864e-06,
         6.0937e-05,  7.6216e-05, -6.0559e-05,  1.9662e-01,  1.2097e-05,
         2.0757e-02, -7.2536e-05, -2.2943e-04, -5.3704e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1050: [tensor([-1.9600e-04, -6.0177e-05, -2.5833e-05, -1.0971e-05,  2.2659e-05,
        -5.9602e-06, -3.7728e-06, -9.0831e-06, -1.4065e-05, -5.4261e-05,
        -2.1289e-05, -2.2800e-05, -5.7349e-05,  3.8059e-07, -2.7549e-05,
        -2.7692e-06, -2.6753e-05,  1.9380e-02, -7.0145e-05,  5.1966e-02,
         4.1471e-06,  1.7859e-02, -1.1036e-04, -3.7496e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2915e-04, -6.1358e-06, -6.8954e-05, -4.6933e-05, -1.8602e-05,
         1.7569e-05, -3.0921e-05, -1.2843e-05,  2.3331e-05, -1.8701e-05,
         1.3260e-05, -3.8430e-05, -3.3967e-05,  1.9406e-06, -5.1309e-05,
        -5.3968e-05, -2.3406e-05,  7.2596e-02, -2.3565e-05,  1.1877e-04,
        -1.3499e-05,  7.9157e-02, -8.3260e-06, -3.5046e-05,  7.7768e-04,
         7.0332e-05, -3.5887e-05, -7.5264e-05, -6.2673e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9628e-04,  8.8849e-06, -7.4741e-05, -1.7834e-05, -5.9145e-05,
        -1.2739e-06,  8.4936e-05, -4.9652e-05, -5.7461e-06,  9.5875e-06,
        -1.5325e-06, -7.0777e-05, -1.0360e-06,  8.7130e-05, -3.1304e-05,
        -3.4827e-05, -5.7148e-06,  6.4598e-02, -3.4796e-05, -5.5982e-05,
        -9.2702e-05,  8.6026e-02,  3.2172e-03, -3.8278e-05, -7.8731e-05,
         4.9847e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.3313e-05, -8.8814e-05,  1.5255e-04,  1.5923e-05, -5.2091e-06,
         1.4640e-04, -4.4976e-05,  2.5847e-05,  5.7409e-05, -3.8255e-05,
        -4.8853e-06,  8.3287e-05, -2.0842e-05, -3.7894e-05, -3.3493e-04,
        -2.2831e-07, -6.4062e-05,  1.3664e-01, -9.2173e-05,  3.8408e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4212e-04, -7.7162e-06,  3.1178e-05, -1.1118e-04, -5.2758e-05,
        -3.5600e-05,  1.4070e-06, -2.0108e-05,  4.4952e-05, -9.4032e-05,
         4.4795e-05, -5.8330e-05,  9.0903e-06, -6.4621e-05,  5.3398e-05,
        -1.3042e-04, -7.0939e-03, -1.1173e-01,  7.5571e-06, -1.0795e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7166e-05,  3.7030e-07, -2.0122e-05,  2.3949e-05,  1.4997e-05,
         1.8782e-05, -7.6611e-06,  1.5050e-05, -2.5857e-05, -8.1803e-06,
        -2.4689e-05, -4.8605e-06,  1.0911e-05,  5.2558e-06,  4.8581e-04,
         4.4380e-04,  1.6506e-04,  3.4585e-03,  1.4324e-03,  2.9166e-04,
         2.8090e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3811e-05,  5.2117e-05, -8.4681e-06,  3.4717e-05, -1.2135e-04,
         1.5365e-05,  1.8471e-05,  2.7730e-05, -2.2716e-05, -2.7143e-05,
         2.5254e-05, -6.4474e-05,  4.4792e-05, -4.3077e-05,  5.6869e-02,
         1.0917e-04, -1.0370e-04,  1.0329e-04,  2.9851e-05, -3.6545e-05,
         1.3337e-01,  4.8800e-05, -3.8158e-06,  1.7935e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1756e-04,  2.1116e-05,  4.1943e-05,  5.7931e-05,  2.2254e-05,
         1.4571e-04,  4.6271e-05, -3.4303e-05, -7.9370e-06,  1.0895e-05,
        -8.0889e-05, -3.2267e-05, -2.5229e-05, -4.5647e-05,  1.7452e-01,
         3.8639e-05, -3.4234e-06, -1.4705e-04, -9.8885e-05,  1.1400e-05,
         1.0355e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4958e-04,  9.2998e-06,  5.0279e-05,  1.2283e-05,  1.1101e-05,
         5.2634e-05,  9.5881e-07, -2.1094e-05, -2.6145e-05, -1.3704e-05,
        -2.2548e-05, -4.9470e-05, -5.1589e-05,  1.3582e-05,  1.1731e-01,
        -8.2606e-06, -2.5280e-05,  1.1194e-01,  6.3925e-05, -4.3870e-05,
         1.5640e-06,  6.3439e-07, -9.8471e-05, -5.5983e-06, -8.3639e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1328e-04,  2.4511e-05, -7.1781e-05,  1.1126e-04,  5.6333e-05,
         8.1726e-05,  3.0861e-05,  1.9737e-04,  6.8601e-05, -1.0961e-04,
         4.9107e-05,  4.9389e-05,  1.0279e-04,  6.5147e-05, -3.6366e-05,
         1.5939e-06,  1.7647e-01, -1.0104e-05,  1.0429e-05,  1.1357e-04,
         5.1814e-05,  6.7639e-05,  1.0316e-05, -7.5046e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0490e-04,  7.2903e-05,  2.0543e-05,  3.8917e-05,  5.1152e-05,
         1.3855e-05,  7.2295e-05, -1.1792e-04,  2.3439e-05,  6.5186e-05,
        -1.8313e-05, -9.7735e-05,  4.8751e-05, -1.6752e-05, -5.6151e-05,
         4.7603e-05,  1.6238e-01,  6.1836e-05, -8.7784e-04, -4.4200e-05,
         4.0436e-05,  3.0453e-05, -7.3067e-05, -1.5385e-04,  1.8381e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7627e-04,  5.8425e-05, -4.7757e-07, -2.1461e-05,  2.7787e-05,
         8.1026e-05,  9.7900e-05,  8.8733e-05,  1.3663e-05,  7.2720e-05,
         4.3806e-05, -5.0574e-05, -4.6530e-05,  8.7962e-05,  5.8496e-06,
        -4.1864e-05,  1.7382e-01, -2.5970e-05, -4.9348e-04, -7.4120e-05,
        -4.3490e-05,  1.8454e-05, -1.2296e-05, -3.0255e-04, -3.3188e-05,
        -7.2921e-05, -2.8168e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1100: [tensor([-6.5207e-05, -1.7942e-05, -4.0403e-07,  7.0327e-05, -4.0403e-05,
        -4.0278e-05,  1.2842e-04,  1.9835e-05,  1.1348e-04, -3.7566e-06,
        -3.0663e-05, -4.7617e-05, -3.8304e-05,  2.2772e-05, -2.9009e-05,
        -9.1346e-05,  2.9337e-05, -5.2574e-05,  1.6331e-01,  3.7569e-05,
        -8.1102e-05, -1.7728e-04, -6.3717e-05, -1.3408e-04, -8.3063e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3602e-04, -3.1304e-05, -3.8587e-05,  2.6573e-06,  2.0518e-05,
        -1.9834e-05, -1.3133e-05,  4.2547e-05,  5.9296e-05,  8.4585e-05,
         6.3036e-05,  8.5296e-05, -1.9402e-05,  1.4295e-06,  3.5997e-05,
         1.5014e-05, -4.9337e-02,  4.7928e-05, -9.9019e-05, -1.2021e-05,
        -7.3095e-02,  1.2216e-04, -8.8985e-07,  2.1615e-04, -2.4500e-05,
         5.8231e-05,  3.3516e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1759e-04,  5.7649e-06, -3.1448e-06, -2.1160e-05, -5.1316e-06,
        -4.0667e-05, -2.2299e-05, -1.2690e-05, -1.8377e-05,  1.3904e-05,
         9.3310e-06,  4.0706e-05,  3.7459e-06,  4.2245e-05,  4.7092e-06,
         1.7443e-05, -5.7123e-05,  1.1697e-02,  6.3627e-05, -6.3044e-05,
        -3.8598e-05, -4.5629e-05,  1.1304e-03,  1.1782e-05, -2.5760e-05,
         2.5313e-05,  2.2351e-06, -2.1741e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7411e-05,  2.1898e-05,  6.0981e-05,  7.7536e-05, -1.3755e-05,
         2.2759e-05,  1.6033e-05, -1.7680e-05, -1.3299e-05, -1.7700e-05,
        -4.4208e-05, -5.1453e-05, -6.4542e-06, -5.0406e-05, -1.7533e-07,
         2.0068e-05,  1.4419e-01, -1.6348e-05,  2.5060e-02, -8.7836e-04,
         6.4518e-03, -3.3015e-05,  3.2272e-05, -2.8770e-04, -9.2384e-05,
        -7.8475e-07,  2.3639e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8467e-05,  1.0463e-04,  5.0359e-05,  5.9601e-05, -5.6620e-05,
        -4.6254e-06,  7.6907e-05,  1.8120e-04, -1.2789e-05, -8.2483e-05,
         6.0343e-05,  6.7275e-05, -1.5992e-05, -5.6553e-05,  1.3334e-04,
         1.5418e-05, -3.4758e-05,  6.7473e-05,  1.8208e-01, -5.7003e-05,
         7.6537e-05,  2.0571e-04, -3.6143e-05,  1.5793e-04,  9.9688e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3784e-04,  6.0622e-05,  1.3950e-05, -3.5217e-05,  3.9648e-05,
        -1.3151e-05,  2.2043e-05, -4.8353e-05,  9.6797e-06,  1.6622e-05,
        -7.0739e-06, -2.2428e-05, -6.2256e-07, -6.3831e-05, -2.5605e-06,
        -5.2062e-05, -1.0215e-05, -4.3556e-06,  1.2940e-01, -2.6479e-05,
         3.3734e-04,  5.4126e-03, -5.8830e-05,  1.8037e-02, -3.5884e-05,
         1.8776e-05,  1.8523e-05,  4.2100e-05,  1.5529e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2806e-05, -1.6412e-05, -3.3297e-06,  6.9377e-06,  2.5477e-05,
        -1.4571e-05, -4.7181e-05,  1.1601e-05, -5.6364e-06,  2.4075e-05,
        -4.0197e-06,  8.8179e-06,  9.7056e-05, -9.9418e-06,  7.6709e-06,
        -1.2550e-05, -4.2936e-04,  1.9575e-05,  9.9104e-02, -9.5273e-05,
         3.4520e-04, -5.1491e-06,  1.4358e-05, -2.4174e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4044e-04, -1.2113e-04, -3.4203e-05, -5.7134e-05,  1.4582e-05,
         1.0657e-05, -8.1981e-06,  7.0928e-05,  5.1961e-05, -5.9480e-05,
         6.7066e-06,  1.4415e-05, -2.5523e-05, -1.4138e-07,  2.2781e-06,
        -3.8326e-05,  8.0928e-02, -2.4570e-04, -1.4494e-05,  1.1561e-01,
         8.6432e-05,  4.7146e-05, -3.4987e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6574e-04,  1.9516e-05, -1.3331e-04,  4.4576e-05,  8.4498e-06,
         3.6895e-05,  8.9145e-05,  2.0585e-05, -7.0603e-05,  9.3135e-05,
         1.3681e-05, -2.1396e-05, -7.4141e-05, -9.1039e-06, -6.0797e-05,
        -5.5989e-06,  1.6380e-01, -9.7374e-05,  3.8527e-02, -1.7712e-05,
        -5.1387e-05,  1.2735e-04, -1.9274e-05, -4.7798e-05,  9.6984e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5836e-04,  8.8312e-05, -1.6020e-05, -5.3203e-06,  3.5974e-05,
         6.4037e-07, -9.2425e-06, -2.9490e-06,  1.5418e-05, -1.9434e-05,
        -5.3458e-05,  5.6665e-05,  2.7166e-05,  4.9382e-05,  9.1980e-06,
         4.3239e-06,  5.7254e-02,  4.8173e-05,  7.9070e-03, -2.4364e-05,
         7.8954e-05,  4.3988e-05,  1.6800e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7902e-05,  1.8298e-06,  4.5696e-05,  8.0201e-05,  6.2135e-05,
         3.2686e-05,  5.9057e-05,  3.6385e-05,  6.5158e-06, -1.2342e-05,
        -4.7739e-05,  7.4168e-05,  3.4518e-05,  5.9228e-05,  2.3984e-05,
        -9.6525e-06,  3.8285e-05, -6.8004e-05,  7.7476e-02, -2.3896e-04,
         8.3937e-06,  7.0271e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7389e-04,  1.6419e-05,  3.0770e-05, -3.4146e-05,  5.2971e-05,
         2.6196e-05,  1.5215e-05, -2.5956e-05,  1.6694e-05, -2.0298e-05,
        -1.9597e-05,  2.4787e-05, -2.5901e-05, -2.7063e-07, -1.2316e-05,
         1.3287e-05, -1.3877e-05,  7.4077e-06,  1.8651e-05, -4.3090e-05,
         1.0459e-05,  2.5221e-05, -2.2385e-05, -1.7964e-05, -3.9253e-06,
        -4.8849e-03,  3.4371e-06,  4.6530e-06, -2.8943e-05, -2.4495e-05,
        -7.0012e-02,  4.3681e-05, -5.6434e-05,  1.0475e-06, -2.8678e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1150: [tensor([ 9.4930e-05,  1.0278e-05,  2.5030e-06,  1.5527e-05,  8.3700e-06,
         4.5418e-06,  9.5433e-06,  1.3071e-07,  2.0307e-06,  6.5830e-06,
         3.8859e-06, -2.1444e-06,  1.5528e-05,  2.2069e-07,  6.6608e-06,
         4.6149e-06, -3.2009e-06, -1.4839e-07, -2.7533e-04, -7.6348e-04,
         4.3448e-04,  1.3921e-04, -1.9141e-05, -1.8397e-05, -3.3256e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.8881e-05, -3.7633e-05, -1.8161e-05,  5.5759e-05, -2.8288e-05,
         1.5390e-05, -4.2551e-05, -6.8787e-07,  2.5072e-05,  8.0936e-05,
        -4.5466e-05,  2.0311e-05,  2.1382e-05,  1.7384e-05,  6.2208e-05,
        -8.8171e-06,  1.3968e-05,  7.2218e-03,  7.5771e-05,  8.2979e-02,
         8.3255e-05,  7.9742e-02,  3.1213e-05,  5.0078e-05,  7.9710e-05,
         1.3615e-05,  7.8767e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5509e-04,  5.0226e-05,  9.0063e-05,  1.0925e-04,  3.7196e-05,
         2.3337e-05, -4.9138e-05,  5.0470e-05,  3.6267e-05,  3.4039e-05,
         6.1217e-07,  3.7159e-05, -1.1932e-05,  4.7084e-05,  5.9195e-05,
         1.5927e-01,  8.4919e-05,  7.9455e-03,  1.4129e-05, -1.0295e-05,
         9.7009e-04, -1.8637e-05,  9.0331e-06,  4.3394e-02,  1.7281e-05,
         3.7173e-05, -6.0354e-05,  2.6997e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5602e-04, -4.0036e-05,  7.1503e-05, -2.1502e-05,  8.9532e-05,
        -4.0056e-05,  8.7768e-06, -3.3374e-05, -5.8501e-05, -2.3075e-05,
        -3.0904e-05, -7.5574e-05, -4.7977e-05, -6.7289e-05,  3.4132e-05,
         1.1392e-02, -3.2671e-06,  9.6991e-02, -7.1566e-05,  1.8679e-02,
         5.6123e-02, -3.6280e-05, -2.9731e-05, -3.7682e-06,  3.8637e-03,
        -6.1305e-05, -5.9871e-05, -2.4608e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5671e-04, -8.8952e-06,  5.9335e-05,  4.9265e-05,  4.6971e-05,
         6.5839e-06,  3.0598e-05, -5.7932e-06, -1.0451e-05, -8.8375e-06,
        -2.0828e-05, -8.3770e-05, -1.2587e-05,  3.5037e-05, -9.6495e-06,
         9.3094e-02,  2.3155e-05,  2.0747e-02,  4.0829e-05,  8.9921e-06,
         3.6504e-02,  6.3854e-05, -8.1975e-05,  1.8677e-02,  3.1299e-05,
         6.5742e-04, -1.1708e-04, -8.7365e-05, -6.8773e-06, -1.5226e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0403e-04, -8.2625e-06, -2.0909e-05, -5.7292e-05,  3.7466e-05,
        -7.0434e-05,  2.0710e-05, -1.7401e-05,  1.5536e-05, -4.3701e-05,
        -4.1715e-05, -2.7012e-02,  2.8162e-05, -8.9369e-02, -1.7219e-04,
        -3.4342e-05,  9.4843e-05,  5.9337e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9305e-04,  2.1515e-06, -3.1208e-06,  8.9549e-06,  6.2777e-06,
         5.5156e-06, -4.8521e-06, -1.2643e-05,  2.4559e-05,  1.6828e-05,
        -3.8397e-06, -2.3907e-02,  4.4878e-05,  5.1737e-03, -1.4308e-06,
         3.0616e-04, -2.2232e-05,  4.3000e-05, -2.4963e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0624e-04,  1.1658e-04,  1.2086e-04, -6.5583e-05,  4.3732e-05,
         8.7362e-05, -8.4967e-05,  3.3779e-05,  1.6187e-05, -4.3947e-05,
         7.8151e-06, -8.2003e-02, -5.7686e-05, -7.5104e-02, -1.1395e-04,
         1.6534e-06, -1.3054e-04, -1.8111e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1358e-04,  1.8211e-05,  4.7562e-05,  5.7317e-05,  6.7947e-05,
         7.0632e-05,  1.0100e-04,  6.0552e-06,  8.8486e-06,  9.6205e-05,
         4.5020e-05, -3.1528e-05,  2.3740e-05,  4.7570e-05,  2.9978e-05,
         1.0485e-01,  1.6355e-04,  3.1085e-05,  2.2680e-02, -2.7194e-05,
        -4.5507e-05,  1.8135e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4364e-04,  7.5780e-06, -2.9352e-06, -1.7129e-06,  1.0982e-05,
        -1.3562e-05,  1.1718e-06,  2.7111e-05, -2.3441e-05,  9.0202e-06,
         3.5088e-05, -2.7025e-05,  2.8798e-06,  1.7537e-05,  8.8381e-04,
        -4.8088e-05, -8.3601e-06, -8.4914e-02, -2.3960e-05, -9.0705e-06,
        -2.2667e-03,  4.4671e-05, -4.5649e-03, -2.8435e-05,  1.7115e-05,
        -4.8037e-03,  1.9002e-05,  6.8696e-06, -5.9253e-07,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1283e-04, -1.2058e-05,  5.0913e-05, -1.6063e-05, -4.6567e-06,
         1.5109e-05, -2.4519e-06,  5.0474e-05, -5.5114e-05,  3.0769e-05,
         6.1441e-06,  1.2795e-05, -2.1645e-06,  3.0074e-05, -3.8204e-02,
         7.6101e-05, -6.4500e-04, -2.1906e-05, -6.8515e-02,  4.8116e-05,
        -1.5299e-03, -1.0457e-02, -8.8689e-06,  1.8953e-05,  7.1146e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2211e-04, -2.2703e-05,  5.3327e-05, -1.2130e-04,  9.1250e-05,
        -2.8749e-05,  9.8554e-06,  3.4150e-05, -3.9264e-05,  4.4906e-05,
        -1.8473e-05, -6.6235e-05,  7.3494e-05,  4.8057e-05,  3.7020e-05,
         1.5824e-01, -2.2497e-05,  8.1078e-04,  9.5989e-06,  3.1626e-04,
        -1.3088e-04,  2.7639e-02,  1.2058e-06, -4.9759e-05,  9.6059e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1200: [tensor([-2.2864e-04, -3.5954e-05,  6.9564e-05, -6.9028e-05, -2.9491e-05,
         1.1848e-06,  6.0851e-07, -5.5322e-05,  3.6968e-05,  3.3683e-05,
        -6.7169e-07,  1.9169e-05,  3.3998e-05, -1.1622e-05, -1.1474e-01,
        -1.0596e-05,  8.2818e-05, -5.8678e-04, -2.5420e-05,  8.0641e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2899e-04, -3.9809e-05, -4.6995e-07, -5.6544e-05, -3.3016e-05,
         6.5568e-05,  1.3143e-05, -7.1060e-05, -4.7197e-07, -6.9747e-05,
        -5.1347e-05, -2.5443e-05,  2.0348e-05, -6.4892e-09,  3.9865e-05,
        -1.4893e-02, -7.3853e-05, -1.9755e-02,  6.4437e-05, -1.9381e-03,
         4.3409e-02, -3.3852e-05,  3.5058e-05,  1.6116e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4396e-05,  5.0750e-06,  1.1776e-05, -7.2391e-06, -1.1090e-05,
        -1.5751e-04,  1.6592e-05,  7.6524e-06,  8.4098e-05, -2.6928e-05,
         6.9966e-05,  1.1931e-05,  3.2888e-05,  5.1171e-05, -4.9248e-06,
        -2.6769e-03,  6.9542e-05,  4.2763e-03, -8.7423e-05,  7.2400e-04,
         1.7408e-01, -3.4889e-05, -4.6015e-05,  5.7844e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8153e-04,  5.7062e-05,  4.6733e-05, -4.4884e-05,  2.6706e-05,
         3.7648e-05,  6.1284e-05,  4.4055e-05, -8.9018e-05,  7.6233e-05,
         5.4218e-05,  9.6087e-05,  3.5244e-05, -2.4268e-05,  2.1383e-05,
         1.7056e-01, -1.3356e-04,  6.5130e-02, -7.8008e-05,  1.5849e-02,
         1.4288e-05, -9.2317e-05,  3.1535e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0018e-04,  4.7959e-06,  6.4733e-06,  3.1040e-05,  1.7549e-06,
         9.4485e-06,  1.3028e-05, -8.8187e-07,  5.7476e-05,  4.2580e-06,
        -2.0330e-05, -4.1871e-06,  1.6633e-05,  1.5412e-05, -5.8465e-06,
        -1.0005e-05,  2.9733e-06, -4.6983e-06, -2.9214e-05,  4.0970e-06,
         2.0321e-02, -4.3406e-05,  4.7972e-05,  5.6922e-03,  6.8338e-06,
         4.2782e-04,  3.7360e-02, -6.8256e-05, -6.4584e-05, -2.0595e-05,
         8.6815e-02,  1.8832e-05,  4.4432e-03, -3.4637e-05,  9.0127e-06,
        -1.5265e-05,  1.4413e-04, -6.6955e-05,  2.7414e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8359e-04,  7.9274e-06, -2.7774e-05,  6.1646e-05, -5.4564e-05,
        -4.6573e-05,  3.4873e-06,  1.6655e-05, -1.3953e-05, -4.0792e-05,
        -6.4685e-07, -2.4610e-06,  3.1843e-05,  2.4822e-06,  2.7594e-05,
        -2.2091e-05, -8.9767e-06,  2.5501e-05, -3.8203e-05,  3.7167e-05,
         1.2874e-01,  2.4081e-05,  1.1681e-06,  9.2904e-04, -7.4145e-05,
         6.5670e-05,  4.8019e-02, -2.7090e-05, -6.8222e-05, -3.5243e-05,
         7.1333e-02,  5.4905e-05,  1.4389e-03, -2.5510e-05, -4.6201e-05,
         3.1492e-05,  2.8675e-04, -7.5554e-05,  4.5116e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1541e-04, -1.1602e-05, -5.2193e-06, -1.2149e-05,  1.7280e-06,
        -2.6388e-05, -3.1918e-05,  7.3570e-06, -1.7026e-06, -2.5431e-05,
        -2.8458e-05,  1.2769e-05,  1.7009e-06, -1.6644e-05, -2.1589e-05,
        -3.9821e-06, -9.1058e-06, -2.8173e-05, -3.7164e-06, -1.3061e-05,
        -6.8099e-03, -2.2923e-05,  3.9166e-05, -6.6644e-05, -6.1568e-03,
        -6.6237e-06, -2.9893e-05, -3.5971e-02,  2.8115e-06,  1.1512e-05,
        -2.3069e-05, -5.5682e-02, -4.6335e-05, -1.4986e-03,  8.7300e-06,
         2.6837e-05, -3.2254e-05, -2.3982e-05,  3.5928e-05, -2.1627e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8944e-04, -1.9891e-05, -2.7725e-06, -8.8132e-06, -1.5139e-05,
        -1.3358e-05, -1.4322e-06,  3.5902e-06, -1.8221e-05, -1.9406e-06,
        -5.7652e-06, -2.0393e-05,  2.0134e-05, -1.4010e-05,  1.9558e-05,
         1.6552e-02, -4.3060e-05,  2.5683e-05,  4.6379e-02, -4.6265e-05,
         1.8020e-02,  1.9015e-04, -1.2269e-05,  3.3610e-05,  2.4165e-02,
         1.2870e-04,  2.1968e-05, -6.4794e-06,  1.7404e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1138e-04,  1.4014e-05, -2.1124e-04,  4.4669e-05, -1.4142e-04,
        -1.8622e-05, -1.3969e-05, -7.3065e-05,  1.1802e-04,  6.2046e-05,
         8.1462e-06, -2.4917e-05,  1.1430e-05, -2.6312e-05,  6.4348e-05,
         1.0793e-01, -1.1825e-04,  7.8128e-02, -5.6865e-05,  7.2617e-06,
         7.7099e-02, -7.2172e-05,  1.6300e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0863e-04,  1.5143e-05,  4.0584e-05,  1.2390e-05,  4.0479e-05,
         5.2314e-05,  2.3988e-05, -2.7666e-05, -9.7217e-06,  8.0441e-05,
         3.5936e-05,  2.5427e-05, -1.0604e-05,  4.1621e-06,  1.1517e-05,
        -9.6044e-04,  9.7136e-06, -5.5916e-02, -1.9525e-04,  1.3567e-03,
         3.9852e-05,  1.0662e-05,  1.2471e-05,  1.0134e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9734e-04,  5.5436e-06, -7.1419e-06,  2.4681e-05,  1.4762e-05,
        -3.0421e-06,  4.4854e-06,  4.5666e-06,  4.4476e-05,  1.7190e-06,
         1.8129e-05, -1.2541e-05,  1.8301e-05, -2.7528e-05, -4.2353e-05,
         2.1834e-05,  6.3094e-06, -2.1838e-05,  6.8820e-05,  3.8724e-05,
         7.0751e-05, -7.7549e-02, -2.8566e-05, -2.1870e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7163e-05,  3.9825e-06,  3.1118e-06,  5.2564e-06,  5.4369e-06,
         2.4176e-05,  1.5414e-05, -5.2369e-09, -1.7296e-05, -5.6637e-06,
         4.7844e-07, -1.3062e-05, -1.0831e-05, -1.7436e-06,  1.5479e-05,
         4.1401e-03,  7.0604e-04,  1.5272e-03,  5.6776e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1250: [tensor([ 1.2457e-04,  2.1306e-06,  5.5218e-05, -1.7463e-06,  3.0933e-05,
         3.4292e-06, -2.5080e-05,  7.4038e-05, -7.8560e-06,  1.0913e-04,
        -1.0347e-04, -4.6480e-05,  3.3660e-05,  2.6562e-05, -1.1956e-01,
        -2.6882e-06,  6.6734e-04, -5.2658e-05, -1.1398e-04, -7.6374e-05,
        -5.4168e-05, -4.9035e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9596e-05,  2.9831e-05,  3.0891e-06,  3.2216e-05, -5.7258e-06,
         9.8460e-06,  1.9455e-05, -1.0988e-05,  1.1712e-07,  9.9749e-09,
         2.1996e-05, -5.3402e-06,  1.0835e-05,  4.6847e-06, -1.5199e-06,
        -7.0887e-05, -2.0898e-04, -1.7362e-06,  4.9747e-06, -7.7708e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2323e-05, -1.4827e-05, -2.4896e-05,  4.8557e-06, -3.4077e-05,
         3.6204e-05, -4.5473e-06, -4.1633e-05, -4.9099e-05,  1.4170e-05,
         1.9624e-05, -8.1763e-06,  6.6439e-05, -2.3573e-05,  7.5569e-05,
        -4.5162e-05, -2.6573e-05, -5.0129e-05, -3.0234e-05,  3.6386e-05,
         4.7521e-05, -6.0764e-05, -1.1319e-05, -4.5173e-05,  2.5860e-05,
         1.5023e-01,  4.3558e-05,  4.3401e-03, -2.3865e-05,  5.0571e-04,
        -1.5592e-04,  7.6923e-07, -2.5017e-05,  8.9430e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4602e-05,  8.1859e-05,  2.6117e-05, -5.7555e-05, -3.0963e-05,
        -8.6648e-05, -2.3380e-05,  1.1958e-06,  1.1587e-05, -1.5443e-05,
        -3.8026e-05, -5.0748e-05, -3.4350e-05, -5.7110e-05, -2.8897e-05,
         2.8741e-05,  2.3657e-05,  1.5759e-05,  6.1025e-05, -3.9459e-07,
        -1.7233e-05,  1.3645e-05,  3.1779e-05, -5.4409e-05, -2.3958e-05,
         2.4984e-01, -7.4072e-05,  1.1416e-03, -1.1009e-05,  7.4555e-05,
        -3.4099e-03,  2.6441e-05, -2.7839e-05, -5.4326e-06,  7.1448e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6461e-05, -1.5321e-05,  7.4068e-05,  1.7695e-05, -2.0749e-05,
        -7.2592e-06, -3.7950e-05, -2.0967e-05,  1.0905e-05, -1.6674e-05,
         2.2078e-05, -5.2117e-05,  2.5056e-06, -2.4485e-05, -3.3070e-06,
         6.7086e-06,  3.3096e-06,  2.1176e-05,  7.6244e-05, -1.7789e-05,
        -1.7200e-05,  1.0715e-05,  6.0626e-05, -7.9721e-06, -1.0219e-05,
        -8.9452e-02,  8.9758e-05, -6.1517e-03, -1.0727e-04,  1.0417e-04,
        -4.2649e-05, -1.4093e-06, -3.6691e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7634e-04, -5.7256e-05, -6.0558e-06, -2.4303e-05,  3.4069e-05,
        -7.7474e-05, -9.9898e-06, -3.8281e-05, -1.4773e-05,  4.5635e-05,
        -3.4986e-05,  3.4489e-05,  1.0290e-04, -1.7866e-05, -2.9253e-05,
        -2.1772e-05,  2.7051e-05,  1.9938e-05, -6.2814e-05,  1.4368e-05,
        -1.0428e-05, -4.8423e-05, -5.0227e-05,  2.8125e-05,  5.1410e-05,
         1.4009e-01,  2.6784e-05,  4.6610e-03, -2.9854e-05,  1.4382e-01,
         4.0083e-05, -1.1131e-04, -1.1795e-04,  4.1206e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2085e-04, -1.2796e-05,  3.3020e-06, -6.7959e-06, -1.0036e-05,
        -2.8444e-05, -2.2972e-05, -6.5928e-05,  2.3189e-06,  1.9406e-05,
         2.7192e-05, -8.4896e-06,  4.3725e-05,  3.9589e-05,  3.9833e-05,
        -2.4869e-05,  2.3998e-05,  7.6982e-06, -2.2580e-05, -3.8717e-05,
        -4.6685e-07, -1.5089e-05, -3.5951e-05,  2.4848e-05,  2.8448e-05,
         1.1770e-04,  6.4059e-05,  1.1453e-01, -5.9000e-05,  4.5527e-02,
        -5.0977e-07, -2.6705e-05, -2.6783e-06,  1.1039e-03,  2.1264e-05,
         2.5137e-05,  1.6357e-05,  2.7565e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6087e-06,  9.3182e-06, -3.9956e-05, -3.1542e-05, -5.1961e-06,
         1.0599e-05, -1.3564e-05,  3.5843e-05,  2.4331e-05,  2.8055e-05,
        -1.1258e-06, -5.1406e-05,  4.5758e-05, -1.8975e-05,  2.1358e-05,
         4.9604e-06, -6.3442e-06, -2.4289e-05,  2.9798e-05, -4.2450e-06,
        -2.3955e-05,  3.8476e-05,  2.1390e-05, -3.0029e-06, -2.2857e-05,
        -1.4465e-05,  1.5376e-04, -2.0953e-05,  9.5454e-02, -9.8449e-05,
        -2.1342e-04,  1.5591e-04, -8.5260e-06, -2.9239e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9395e-05, -3.4362e-05, -6.1428e-05,  2.0751e-06,  2.6842e-05,
        -2.6879e-05,  3.8173e-05,  2.1913e-05, -3.8093e-05,  2.5823e-07,
        -2.3479e-05, -1.5154e-05,  2.2688e-05, -4.2960e-05, -5.0950e-05,
        -1.6337e-05,  5.7435e-05,  1.8499e-02, -4.6330e-06,  1.0865e-01,
        -1.7052e-05, -4.4994e-05,  4.7729e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3485e-05, -1.0327e-04, -8.9531e-05, -6.7262e-05,  3.0440e-05,
        -2.1846e-05,  4.2625e-05,  7.1469e-05, -1.0252e-04,  3.3821e-05,
        -1.2167e-05,  2.6120e-05, -1.4236e-05, -1.4573e-05,  3.1845e-05,
        -1.0779e-04,  1.2712e-04,  1.5611e-01, -8.5394e-05,  3.8239e-02,
         1.3300e-04,  2.9308e-02, -2.1609e-03,  1.1300e-05,  5.0443e-06,
         2.2023e-05,  1.3676e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3117e-05, -2.6500e-05, -9.6005e-06,  4.8649e-05,  6.8669e-06,
        -3.8394e-05,  5.2134e-05, -6.2204e-05, -3.5968e-05,  2.7255e-05,
         1.5583e-05,  2.0168e-05,  3.0534e-05, -3.9913e-06,  8.7307e-06,
         3.6655e-05,  2.4484e-05,  6.4522e-02, -7.5544e-05,  1.6098e-02,
        -2.1133e-05,  1.0736e-04,  3.9769e-02, -2.3922e-05, -4.3870e-05,
         9.8419e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4534e-05,  2.4719e-05, -4.9550e-06,  6.4381e-05, -4.3896e-05,
         5.0122e-05, -1.1720e-05, -1.7547e-05,  2.9480e-05, -3.8818e-05,
         1.4166e-05,  4.5691e-05,  1.0093e-05,  1.9888e-05,  3.8792e-05,
        -9.3153e-02,  7.2549e-03,  3.1880e-05,  4.2032e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1300: [tensor([ 3.6685e-04,  1.7975e-06, -1.1412e-06, -4.8228e-06, -1.0078e-05,
         2.2768e-05, -9.9241e-06,  2.1035e-05, -8.0252e-07, -6.8380e-07,
        -6.3648e-06,  5.3888e-06,  2.1649e-06, -1.3690e-05, -7.8009e-06,
         1.5736e-05,  2.4911e-07, -1.7822e-05, -1.6768e-06,  1.3841e-05,
        -1.8006e-05,  1.1345e-05, -7.4572e-06,  7.5167e-06,  1.5568e-05,
         5.5971e-04,  1.0041e-06,  9.7437e-06,  9.4135e-06, -2.7064e-03,
         2.5244e-05,  1.1746e-05,  4.8466e-05,  1.4173e-05,  5.8680e-03,
        -1.1585e-05, -1.2362e-05,  7.0803e-05,  5.0410e-05, -5.9798e-02,
         9.7905e-06,  2.8094e-05,  2.6960e-05,  1.2238e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1643e-04,  2.6548e-05,  2.4151e-05,  2.6554e-06, -1.7325e-05,
        -9.1721e-06,  2.8752e-05,  3.8702e-05, -1.1081e-05,  4.5457e-05,
         4.8883e-05, -2.8296e-06,  2.8352e-05, -2.3662e-05, -3.6942e-05,
         1.2840e-05, -1.1360e-05, -4.6830e-05, -2.9158e-05, -3.0406e-05,
         4.0205e-05,  4.9155e-06, -2.1064e-05, -4.0258e-02,  2.8741e-05,
        -1.3426e-03, -4.9960e-06,  2.1391e-04,  3.2314e-05, -5.8362e-02,
         3.2668e-05,  6.8937e-05,  6.3007e-07, -6.7902e-02,  2.7043e-05,
        -1.8498e-05, -2.6559e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0836e-05,  2.5078e-05,  1.2125e-05, -2.0477e-07, -1.5770e-05,
        -1.1662e-05, -3.7188e-06,  9.4755e-07, -3.4042e-06,  2.2046e-05,
         4.6820e-06, -2.5795e-06,  6.0521e-06, -1.6848e-05,  1.0370e-05,
         2.9900e-05,  1.6622e-05, -1.0325e-05,  2.7246e-06, -1.2538e-06,
         1.0507e-05, -1.8111e-06,  3.6281e-06, -9.8908e-03,  5.4079e-06,
        -2.9472e-05, -1.5218e-05, -7.7347e-06, -4.6580e-02,  3.2254e-05,
        -6.1988e-06,  9.3615e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0630e-05,  7.1673e-06,  3.8730e-05,  1.9874e-05,  4.8514e-06,
         1.0520e-05,  6.9151e-06,  2.4464e-05,  1.3395e-05,  1.3771e-05,
         5.3352e-06,  2.1874e-06,  1.6909e-05, -2.0996e-05, -1.7309e-05,
        -1.6646e-05, -2.2038e-05, -3.5644e-05, -1.2795e-06,  7.9550e-06,
         2.1829e-05,  3.5508e-05, -1.2765e-05,  1.4612e-05,  6.6394e-05,
        -1.0843e-02,  2.5311e-05,  2.5270e-05,  1.0995e-05, -8.0647e-02,
         1.1802e-05,  3.2604e-06,  1.9123e-04,  7.3903e-06, -4.2062e-02,
        -9.5948e-05, -2.6962e-05,  6.7551e-06,  1.8578e-03,  1.7748e-05,
         2.5380e-05,  3.5564e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6649e-04,  4.5480e-05,  1.7540e-05,  1.4302e-05,  2.1073e-05,
        -8.3559e-06, -1.9148e-05, -4.0840e-06,  1.9924e-05, -7.3340e-08,
        -8.2381e-05,  2.2575e-05,  4.3225e-06,  1.4392e-05, -5.1953e-06,
         1.4278e-05, -2.6741e-05,  4.5019e-05, -4.6995e-07, -2.2574e-05,
        -8.8285e-05,  1.1419e-05,  1.6992e-05,  6.5741e-05, -1.4217e-05,
         7.7010e-02, -5.0221e-05, -3.1762e-04,  4.2941e-05, -9.3212e-03,
        -3.5259e-06,  1.4970e-04, -3.5905e-04,  1.0594e-01,  2.7359e-05,
         2.6107e-05,  2.9052e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1455e-05, -9.0245e-06, -4.9212e-05, -4.7137e-05, -8.8902e-05,
        -1.0507e-05, -5.9537e-06, -1.2580e-04,  5.0566e-05, -1.1978e-04,
         5.7105e-05, -3.7357e-05, -5.3426e-05,  2.4200e-05, -5.2352e-05,
        -9.1707e-05,  1.9817e-05,  8.1823e-06,  9.4488e-06, -1.9475e-05,
         5.1850e-05, -2.2503e-05, -4.2560e-06,  1.8945e-01, -7.7866e-05,
         6.8634e-03,  2.5714e-05,  7.6922e-04,  6.4507e-02,  7.9586e-05,
        -1.5913e-05, -3.9066e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6291e-05,  5.2347e-05,  3.7257e-05, -4.4883e-05, -6.5129e-05,
        -2.2087e-05, -8.7226e-05, -2.8923e-05,  7.1042e-05, -3.3825e-05,
         3.2297e-05, -1.8753e-05, -2.4458e-05, -4.9988e-05,  1.3982e-05,
        -3.6728e-05,  3.8248e-05, -1.9428e-05,  2.5127e-05,  1.3509e-05,
         5.2690e-05,  2.1606e-05,  2.8660e-06,  7.8545e-05,  4.5143e-05,
         1.0769e-02, -1.3692e-04, -5.6452e-05,  1.4257e-01, -6.6356e-05,
         2.7406e-02, -8.1929e-06, -9.0644e-08,  1.9476e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6432e-04, -4.8881e-06,  8.2593e-06,  1.7412e-05,  1.0718e-05,
         1.2987e-05,  7.3308e-06,  1.8299e-05, -1.7255e-05,  2.5759e-06,
        -1.8520e-05,  3.7130e-06,  2.2122e-06, -1.1006e-05,  2.8388e-05,
         2.1153e-05, -1.4598e-05, -5.3863e-06, -6.7747e-06, -9.2103e-06,
        -3.4254e-06, -2.7511e-05,  1.4573e-06, -2.1570e-05, -1.8209e-02,
        -4.0666e-02,  1.3197e-05, -9.9779e-03, -3.1528e-02, -5.1823e-06,
         2.8294e-05,  3.8483e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1262e-04, -3.3570e-05, -2.2991e-06, -3.6412e-05, -2.4334e-05,
         1.1889e-05,  2.1443e-05, -9.3636e-07, -2.3565e-05, -2.9152e-05,
        -3.0850e-05, -6.0166e-05, -4.8517e-05,  3.6937e-07, -3.0137e-05,
         3.3264e-05, -2.1135e-05,  9.9453e-06, -5.2225e-05, -3.7077e-05,
         1.3548e-06, -1.7297e-05,  1.3664e-05, -1.3688e-02, -3.8464e-05,
        -2.5254e-06,  4.1283e-05, -7.0358e-03,  1.3154e-01,  1.1125e-04,
        -1.3804e-05, -8.3294e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7009e-05, -3.9482e-05, -3.8228e-05, -1.1267e-05,  1.2061e-05,
         1.8051e-05,  2.1354e-05,  8.2548e-06,  6.3353e-05, -1.5362e-05,
        -4.8740e-05,  1.9175e-05, -6.0968e-05,  7.5134e-07,  1.9151e-05,
         8.0498e-06, -4.0229e-05,  1.1883e-05, -2.4652e-05,  8.0620e-05,
         7.8558e-05,  5.7798e-05,  8.4260e-05,  1.2765e-03, -1.6171e-01,
         2.7864e-05, -9.1482e-05,  1.0260e-04, -9.0449e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3326e-04,  2.4585e-05,  5.7378e-05, -1.4113e-05, -3.8480e-05,
         6.0884e-05, -1.4594e-05, -2.9029e-05, -5.1728e-05,  2.7176e-05,
        -1.5932e-06, -5.8208e-06, -5.1952e-06, -4.8030e-06,  6.6810e-05,
         2.0855e-05, -9.2359e-06,  2.8453e-05,  6.2383e-06, -5.9865e-05,
         3.3678e-05,  6.1193e-05, -9.8267e-03,  1.1317e-04, -3.1133e-05,
        -2.0144e-05,  1.6656e-05, -2.6342e-04,  1.7559e-01,  5.8076e-05,
         1.5748e-04,  2.7866e-05,  1.6555e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9834e-05,  2.7186e-05,  2.0040e-05,  2.8284e-05,  4.0113e-06,
         3.2170e-05,  7.2057e-05, -1.4298e-06,  2.1149e-05,  1.4683e-05,
         5.4246e-06,  2.6869e-05,  2.8992e-05,  8.9009e-06,  1.8021e-05,
         8.5310e-06,  1.0409e-04,  7.1011e-05,  3.6544e-05,  3.7354e-05,
        -3.6399e-05,  2.4852e-06,  1.0879e-01,  3.9058e-05,  2.5044e-02,
         5.1931e-05,  5.3635e-06, -1.6978e-04,  3.8566e-04, -2.4993e-04,
         9.5557e-07, -5.2739e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1350: [tensor([ 4.4762e-06,  9.2123e-06,  1.1051e-05,  2.1146e-05,  5.1420e-05,
        -1.7179e-05,  9.2454e-07,  3.6605e-06, -2.0861e-05, -1.4199e-05,
        -2.5892e-05,  1.1315e-05,  2.4347e-05,  3.3225e-06, -5.8048e-05,
        -7.4836e-05, -4.5823e-05,  8.4636e-05, -1.0503e-05, -2.4418e-05,
         6.0289e-05, -2.4151e-05,  1.7080e-05, -1.1317e-01,  1.4091e-04,
        -1.0464e-05,  4.0084e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9481e-05,  3.7543e-05, -3.4822e-05, -5.6676e-06,  3.6301e-05,
         6.9012e-06,  3.9934e-05, -5.1113e-07,  1.0852e-05,  4.4574e-05,
        -1.0829e-05,  1.3677e-05,  2.0866e-05, -1.9357e-05,  1.8252e-05,
        -2.7239e-05,  3.6378e-05,  4.8229e-05, -7.0791e-07,  1.3119e-05,
        -2.1619e-05, -6.3538e-05, -2.2198e-05,  3.5483e-02, -8.4718e-06,
         1.2806e-01,  4.7331e-05,  4.6782e-05,  1.7849e-02, -7.7452e-04,
         9.4550e-05,  9.1116e-05, -1.7838e-05, -1.1131e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5773e-05, -1.9019e-05, -1.4491e-05, -3.1726e-05, -1.6742e-05,
        -5.8666e-05,  5.0414e-06, -4.5811e-05,  1.2729e-05,  3.2357e-06,
        -3.5530e-05, -3.7474e-05, -1.5169e-05,  9.5514e-06,  2.2028e-05,
        -3.9372e-05, -2.5565e-05, -2.7954e-05, -2.9665e-05,  1.7921e-05,
         4.3511e-05,  8.4352e-05,  8.1793e-05,  8.8489e-02,  1.4301e-02,
         1.3374e-05,  1.0946e-04, -3.5041e-06, -2.1595e-05, -1.1479e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3094e-04,  1.5439e-05,  6.8678e-06,  9.7436e-06, -4.6512e-05,
        -3.5701e-05, -5.3554e-05,  4.6397e-05,  3.1122e-05,  1.9541e-06,
         6.9486e-05,  3.3902e-05,  4.9749e-05,  2.2927e-06, -3.5020e-05,
         6.2174e-05,  4.4686e-05,  6.7327e-06,  3.4684e-05,  1.1731e-05,
        -2.9439e-05, -1.6591e-05,  5.9350e-06, -1.0017e-01, -2.9366e-05,
        -7.2204e-02, -3.5995e-06, -1.8044e-05,  6.3851e-05, -5.3215e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8443e-05, -5.3251e-05, -3.1255e-05, -4.1696e-05, -2.1973e-05,
        -5.1063e-06, -4.9556e-05,  3.7348e-05, -2.3997e-05, -7.7921e-06,
         3.1028e-05, -1.3842e-05, -6.3305e-05, -2.4686e-05,  1.1485e-05,
         2.2513e-05,  1.0750e-05, -2.3181e-05, -5.7731e-06, -1.3515e-05,
        -1.4400e-05, -4.3572e-05, -6.3276e-05,  2.0451e-02,  3.1660e-04,
         1.0634e-01,  1.1355e-04, -7.6652e-05,  1.1006e-05,  3.0247e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6587e-04,  9.1139e-05,  1.1003e-05,  1.2944e-05, -5.2454e-05,
         1.6393e-05, -8.2661e-05,  4.9192e-05,  5.7619e-05,  1.6186e-04,
         3.1795e-05,  2.7398e-05,  1.6008e-05,  2.9461e-05,  1.5086e-04,
         1.4750e-05, -3.1049e-05, -7.2108e-06, -5.3281e-07, -3.3389e-05,
         4.8418e-05, -7.2766e-05,  1.2153e-05,  2.2309e-01, -5.2680e-05,
        -7.4825e-03,  8.0581e-05, -2.0079e-04, -1.2698e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7799e-04,  8.6188e-05,  4.1972e-05,  2.3335e-05, -7.1593e-06,
         1.4758e-04,  3.9655e-05,  9.0168e-05,  4.8647e-05, -3.3745e-05,
         1.0092e-04,  1.0905e-04,  1.1833e-04,  7.8720e-05, -6.2191e-06,
         3.3798e-05, -2.6328e-05,  9.3341e-05, -3.9393e-05,  4.8947e-05,
         2.4308e-01, -8.2394e-06, -1.2219e-04, -1.7616e-04,  1.2266e-04,
         1.4903e-04, -4.4360e-06, -1.3056e-05, -3.5410e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1031e-06,  1.2643e-05, -1.2983e-05,  4.4538e-05,  4.8104e-05,
        -1.4497e-05,  4.6588e-05, -1.2767e-05,  3.5678e-05,  1.3318e-05,
         6.1942e-05,  2.2528e-05,  3.4242e-05, -5.5422e-05,  2.8217e-05,
         1.8291e-05,  3.5010e-05,  5.8441e-05,  5.8145e-05,  2.6313e-05,
        -2.1859e-05, -1.3413e-05,  1.5196e-01, -5.1365e-05, -1.0360e-04,
         3.2625e-03, -3.7574e-03,  4.7270e-05, -7.0031e-05, -1.8764e-05,
         1.6979e-05,  4.4251e-05,  1.0904e-05, -1.7742e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9736e-04,  5.5572e-05, -5.1517e-06,  1.6889e-05, -9.6346e-05,
         8.9473e-05, -2.1783e-05,  4.7829e-05, -3.7339e-05, -4.6120e-06,
        -2.5527e-05,  1.0237e-05, -7.1692e-05, -1.0566e-05, -5.1229e-05,
         6.6237e-05, -4.5196e-05,  1.0077e-05,  7.6813e-05,  5.2117e-05,
         6.8038e-05,  2.9140e-06, -3.2614e-05, -5.7896e-05,  1.5664e-03,
        -6.2939e-05, -5.8248e-05, -1.4514e-01,  5.8428e-05, -5.8272e-05,
         7.7612e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7583e-05,  1.5525e-05,  3.7034e-05, -2.4722e-05, -2.0236e-05,
        -8.4568e-05, -1.6100e-04, -1.1621e-05, -1.2913e-05, -1.9139e-05,
        -2.3271e-05, -5.2458e-06, -5.7164e-05, -3.0948e-05, -4.2562e-05,
        -9.7679e-05,  4.4048e-06, -2.5729e-05,  1.3187e-04,  3.1854e-06,
         8.1936e-06,  1.4552e-04,  7.1932e-05, -1.5391e-01, -3.7881e-05,
         6.9136e-05, -8.8966e-05, -1.3639e-04, -6.5713e-05, -9.7113e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2213e-05, -2.9302e-05,  6.9074e-05,  8.1456e-05,  1.1341e-04,
         2.1654e-05,  1.4210e-04,  6.4417e-07,  1.0974e-04, -5.6685e-05,
         2.9421e-06,  4.7014e-05, -7.3060e-05,  5.8286e-05,  2.6704e-06,
        -1.6676e-05,  7.8053e-05, -6.4716e-05, -3.2664e-06,  4.6426e-05,
        -2.1854e-05, -1.1646e-04,  9.4735e-05,  9.5826e-03, -5.3071e-05,
         2.7770e-01, -1.3992e-04, -1.3446e-04, -1.8545e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0760e-06,  1.1372e-05,  2.0618e-05, -9.4652e-06,  7.9649e-07,
        -4.7157e-06,  1.5856e-05, -3.7501e-05, -6.3109e-06, -1.2908e-05,
         6.3813e-07,  7.3857e-06,  2.5297e-06, -6.3278e-05,  8.7735e-07,
        -8.9566e-06,  1.2853e-05, -1.0627e-05, -3.5105e-05,  8.2300e-06,
        -2.2476e-05,  2.6685e-05, -7.8075e-06, -1.5774e-05,  4.2052e-05,
        -2.8021e-05,  1.7125e-05, -7.4108e-06, -7.1355e-07,  4.4279e-06,
         1.6168e-05,  1.0631e-05, -2.6182e-05, -9.2185e-06,  1.1518e-05,
        -3.7594e-06,  1.1752e-06, -5.6576e-06, -3.0484e-05,  2.5186e-05,
        -3.3991e-04,  1.0235e-01,  2.9024e-03, -7.1912e-05, -1.4738e-05,
        -4.4624e-05,  5.8708e-05, -4.9934e-05,  1.0858e-05, -5.4420e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1400: [tensor([-9.1964e-05, -3.8714e-05,  6.1034e-05,  7.0683e-05, -2.9037e-05,
         6.1119e-05, -3.8185e-05, -4.6049e-05,  4.3516e-05,  6.8381e-05,
        -1.6783e-04,  1.0281e-04,  1.8380e-01, -8.9076e-05,  7.2406e-02,
         6.1637e-05, -1.9196e-04, -2.9834e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9122e-05,  2.3747e-06, -2.0892e-05, -6.2841e-05, -1.4818e-05,
        -4.4164e-05, -7.0074e-06,  3.5579e-05, -1.5871e-05,  4.3209e-05,
         4.0636e-05,  6.8978e-05,  1.2805e-01,  4.3926e-05,  1.7264e-02,
         1.9858e-05, -2.4639e-03, -2.2040e-05, -1.6046e-03,  9.0843e-02,
        -3.9281e-06, -3.8955e-05,  1.8996e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1033e-04,  5.2666e-05,  6.3933e-05, -3.2695e-05, -2.2012e-05,
         5.5944e-05,  1.9737e-05, -5.2673e-05,  1.2091e-05,  3.3930e-06,
         3.4236e-05, -3.7631e-05, -6.9055e-02,  8.4912e-06, -5.5962e-02,
         1.8166e-05,  3.5506e-05, -7.1386e-06,  9.1238e-06, -6.6447e-05,
        -6.9549e-05, -6.4732e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9091e-04,  3.9018e-05,  5.0220e-05, -4.3038e-06, -7.5008e-05,
         4.5059e-05, -5.0075e-05,  3.9121e-05, -3.1701e-05, -2.8063e-05,
         1.8974e-05,  2.5615e-05,  1.8173e-01,  6.1094e-05, -5.1294e-05,
         4.3523e-02, -1.6195e-05, -1.2222e-03, -2.8449e-05, -8.2360e-04,
        -4.5895e-04, -9.6748e-06, -2.8891e-07, -9.2582e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7885e-05,  3.9627e-05,  3.0295e-05,  9.3056e-05, -3.1069e-05,
        -5.4269e-06, -7.1925e-06,  2.3813e-06,  9.5942e-05,  1.7603e-04,
         6.1933e-05,  5.8354e-05,  1.5028e-01,  1.6138e-04,  6.7477e-02,
         1.0050e-04,  1.0775e-02,  9.4847e-05, -3.9690e-05,  3.0166e-05,
        -3.9417e-05,  1.4546e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2432e-05,  8.1145e-05,  2.1580e-05, -1.0665e-04,  7.2799e-05,
         2.1552e-05, -2.8767e-05, -3.3681e-05,  2.1943e-05,  3.6580e-05,
        -4.6597e-05, -8.5186e-06, -1.3300e-01, -5.4646e-05, -3.8551e-05,
         6.0998e-05,  3.9411e-04,  1.0027e-03, -1.1580e-04, -5.3930e-05,
         3.0501e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3222e-05, -9.7073e-06,  1.8048e-05,  1.0725e-06,  7.1886e-05,
         1.1045e-04,  2.0907e-05,  3.4155e-06, -4.6386e-05,  1.5480e-05,
         1.7537e-05,  7.7298e-05,  5.1364e-05,  3.9583e-03, -8.9220e-05,
        -1.0983e-01, -1.4516e-04, -9.2580e-05, -1.5193e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0928e-05, -2.6525e-05, -3.0284e-05,  2.1508e-05,  1.2400e-06,
        -3.3722e-05,  3.0006e-05, -4.8828e-05,  1.1296e-05, -2.0600e-05,
        -4.7855e-05, -7.9843e-06,  2.4630e-05, -5.9493e-02, -1.0993e-05,
        -2.5821e-02,  1.8983e-06, -3.5441e-02, -2.4782e-05, -3.7758e-07,
         6.7855e-06,  7.2191e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9862e-05,  1.7252e-05,  5.7945e-05, -2.9542e-05,  1.0388e-05,
        -3.9950e-05, -4.0968e-06, -6.6484e-05,  5.2356e-05, -1.2373e-05,
         9.5601e-05, -9.4887e-06,  3.9099e-05, -1.2429e-01,  1.0094e-04,
        -1.0407e-02,  1.9469e-05, -1.4646e-04, -4.8158e-05,  1.0861e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3919e-05,  1.5655e-05,  3.1520e-05, -2.2843e-05, -1.9660e-05,
        -8.0072e-06, -3.6263e-05, -3.2358e-06,  7.1483e-06, -1.9478e-05,
        -2.9041e-05,  2.4964e-05, -1.0817e-05, -2.7628e-05,  2.8564e-06,
        -3.3895e-05,  1.1760e-05, -4.9364e-05, -6.4371e-06,  2.5511e-05,
        -5.9260e-05, -4.7291e-02,  5.6568e-05,  7.7128e-05, -5.5080e-02,
        -1.9657e-05,  3.9148e-05, -3.3506e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2337e-05,  3.2821e-05,  2.3670e-05,  9.1759e-06, -5.9878e-06,
         2.7578e-05, -2.2012e-05,  1.7579e-05,  7.7943e-06,  2.0520e-05,
        -4.3138e-05,  3.3932e-05,  3.3721e-05, -4.9198e-06, -1.9824e-05,
        -2.2243e-05, -2.0801e-05,  1.2434e-05,  3.5024e-06,  3.3107e-05,
        -1.8397e-05,  1.4846e-03,  1.6077e-05, -1.1152e-05, -1.1823e-01,
        -1.6629e-05, -8.8159e-05,  2.6205e-05,  1.7827e-05, -2.2288e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2211e-04, -2.7485e-05, -9.3868e-05,  8.9887e-06, -6.0440e-05,
         3.6016e-05,  1.2611e-04,  6.3541e-05, -3.4720e-05,  2.9768e-05,
         6.1760e-05,  1.6624e-05,  4.4688e-05, -3.1132e-05, -1.0637e-04,
        -3.2091e-05, -8.7098e-05, -2.7464e-05, -1.5052e-04, -9.5344e-05,
         7.2534e-05,  2.2466e-01,  2.8229e-05, -1.1404e-04,  5.4506e-03,
        -1.4990e-05, -1.5146e-05,  8.3592e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1450: [tensor([ 4.2078e-05, -3.7631e-05,  4.4053e-05,  9.9983e-06, -1.0926e-05,
        -6.6951e-05, -2.4559e-05, -3.9585e-05, -3.1320e-05, -2.4745e-06,
         1.1554e-05, -7.5933e-03,  3.5126e-05, -3.0221e-02, -2.5616e-02,
        -4.6184e-02,  6.8519e-05, -6.4535e-03, -6.3622e-02,  1.2170e-05,
         1.0070e-04,  2.1571e-05,  1.5390e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4539e-04,  4.5265e-06,  5.4361e-05, -2.9169e-05,  3.2100e-05,
         1.8349e-05, -2.1656e-05, -4.8952e-06,  4.7872e-05,  1.3678e-05,
         2.1890e-05, -5.9796e-02,  5.1166e-05, -7.8570e-02, -9.1208e-05,
        -2.9821e-04,  5.6964e-06, -4.8460e-05,  2.2444e-05, -8.0166e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6736e-05, -8.8292e-06,  2.9283e-05,  5.1071e-05, -7.3441e-06,
         2.2139e-05,  2.8619e-05,  4.7331e-05,  6.0689e-05,  2.7886e-05,
         5.9822e-05, -1.9707e-02, -4.3267e-05, -1.0830e-01, -1.5392e-05,
         6.0356e-06, -3.5201e-05,  9.6348e-05,  4.8825e-05,  1.1655e-05,
        -3.3666e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5029e-06, -1.2864e-05, -1.8579e-05, -5.1717e-07,  2.7325e-05,
        -2.4952e-06,  2.4490e-05, -7.9729e-06,  1.1444e-05,  1.7710e-06,
        -1.0286e-06, -2.4845e-02, -3.3798e-05, -8.4645e-03,  1.6009e-05,
         3.3606e-04,  2.2421e-06, -1.1709e-05,  6.7085e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9852e-04, -3.4993e-05, -5.9893e-05,  3.0810e-05,  3.7941e-05,
        -2.5619e-05, -8.6292e-06, -1.2081e-06,  4.9768e-05, -2.6666e-05,
         4.3486e-05, -1.7372e-02, -2.9339e-05, -8.3215e-02,  1.9108e-05,
         3.2981e-05,  5.2627e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6860e-05, -1.8660e-05, -1.8210e-05,  4.3232e-05, -1.7469e-05,
        -1.0422e-05, -3.8042e-05, -1.0240e-05,  2.5240e-05,  1.0201e-06,
        -1.9783e-06, -4.2757e-02,  7.9671e-06, -2.0243e-02,  4.0981e-05,
        -1.2798e-04, -3.3406e-04, -1.1549e-05, -4.7659e-06,  1.3363e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6591e-05,  6.7868e-06,  5.1593e-05,  2.0131e-06, -2.9145e-05,
         4.3472e-05,  3.8524e-05,  4.2827e-05,  3.7582e-05,  2.8916e-05,
         4.4908e-05, -8.4499e-06, -4.5672e-06,  3.2293e-05, -3.9049e-02,
        -3.3406e-05, -6.0016e-02, -1.5295e-04,  3.5704e-05,  2.8693e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8356e-05,  1.5178e-05,  3.1119e-05,  4.5574e-05,  5.1809e-05,
         1.4421e-05,  2.8734e-06, -9.2155e-06,  1.2637e-05, -2.4741e-05,
         1.4063e-05,  1.6842e-05,  1.2628e-05,  1.2031e-05,  1.3873e-03,
         4.9244e-05, -6.1245e-02,  4.7860e-06,  1.3537e-04, -1.3202e-02,
        -3.5809e-05, -1.1033e-05,  1.5080e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7075e-05, -1.9064e-05,  3.7412e-05,  2.2904e-05,  5.3951e-05,
         1.3953e-06,  2.5199e-05,  4.0631e-05,  4.7344e-05,  3.1362e-05,
        -5.8894e-06,  1.4862e-05,  5.1754e-06,  4.4093e-05, -5.8154e-02,
         4.0611e-05, -7.7305e-02, -1.1001e-04, -2.3480e-02, -3.0900e-05,
         7.0775e-06,  3.6880e-05,  5.9570e-05, -4.4415e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2807e-04, -3.8654e-05,  1.8709e-05,  4.9915e-06,  5.8689e-05,
        -4.5834e-05,  2.9452e-05,  2.0663e-05, -1.3854e-05, -4.6648e-05,
         1.0143e-05,  1.2543e-05,  9.0924e-06,  6.9811e-05, -1.4633e-01,
        -1.0734e-04, -1.8083e-03,  2.4265e-06,  4.8728e-05,  8.5614e-04,
         4.0001e-04, -2.9430e-05,  3.2811e-05,  4.3833e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5129e-05,  4.0091e-07,  3.6566e-05, -1.1649e-06, -2.3166e-05,
         2.5753e-05,  3.3053e-05, -5.0515e-05, -2.0084e-05,  2.3737e-05,
        -6.3995e-05,  2.3241e-06, -8.7110e-05,  4.2043e-06,  4.0928e-02,
         1.2326e-04,  5.1811e-03,  9.2808e-02,  4.6507e-05, -2.2442e-05,
        -3.3198e-05, -9.7303e-05,  4.8870e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5699e-05,  1.0052e-05,  1.2595e-05, -4.1391e-06,  3.3563e-06,
         6.2773e-06, -1.4906e-05, -1.5582e-05,  1.1850e-05,  5.9904e-06,
         2.4417e-05, -1.9104e-05,  1.0331e-06,  6.7036e-06, -5.5048e-02,
         1.0394e-05, -2.4646e-02, -1.4284e-05, -6.5437e-05, -1.2651e-05,
        -9.9475e-06, -4.2510e-04,  9.4281e-05,  1.1765e-05,  2.3708e-05,
        -6.7813e-06], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1500: [tensor([-4.1129e-06,  3.2898e-04, -1.7244e-05, -6.8022e-05, -5.0132e-05,
        -1.2718e-04, -8.4781e-06, -1.5840e-04,  2.0916e-04,  2.9994e-01,
        -2.5531e-04, -3.1693e-07, -6.5271e-05,  1.7137e-04,  1.1552e-05,
         2.3137e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5517e-05, -6.2128e-08, -3.5941e-06,  3.7550e-06, -3.1039e-05,
         9.1739e-06,  2.8798e-05, -8.6845e-06, -5.7780e-06,  1.4283e-02,
         1.6110e-04,  2.3696e-03,  2.7457e-05,  5.0552e-05,  1.5910e-05,
         2.8647e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6197e-05,  1.4720e-05, -1.6818e-04,  1.5055e-04, -4.6089e-05,
         4.8446e-06, -1.0217e-05,  8.8181e-05,  2.3854e-05,  1.3581e-01,
         6.6973e-05,  8.7183e-02, -1.0578e-04,  1.5703e-03, -8.6490e-05,
         3.0465e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0948e-04, -6.6816e-06, -2.4203e-05,  8.6897e-06,  1.7669e-05,
        -5.2423e-05, -1.7400e-04, -2.7424e-05,  2.6375e-05, -2.4372e-04,
        -1.1770e-01,  1.0531e-04, -8.9769e-05, -4.0744e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1868e-04, -1.0033e-06,  2.3415e-05, -9.0748e-06, -1.2134e-05,
         8.7901e-06, -4.7785e-06,  6.6965e-06,  2.1130e-05, -3.6874e-02,
        -4.9873e-05, -1.0663e-02, -1.0904e-05,  1.6309e-05, -9.7375e-06,
         1.0604e-05, -1.6788e-04, -5.2569e-05, -1.0534e-05, -1.7867e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4821e-04,  4.1328e-05, -1.6265e-05,  1.3776e-05,  6.1011e-05,
         1.4907e-06, -8.3299e-05, -1.1691e-05,  2.2723e-05,  4.5457e-05,
        -7.0388e-02, -5.0643e-05, -2.9003e-02,  1.1025e-04,  3.1560e-05,
         3.6632e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8612e-05,  1.6948e-05,  5.0415e-06, -6.0296e-05,  2.9237e-06,
        -1.7454e-05,  6.2277e-05, -3.0195e-06,  7.2028e-06, -7.1222e-02,
         2.6876e-06, -6.9821e-02, -8.1462e-05,  1.2808e-03, -2.5429e-06,
        -6.3461e-05,  4.2207e-05,  3.7983e-06,  1.4221e-05,  5.7016e-05,
        -2.8954e-05, -3.1067e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0651e-05, -1.9523e-05, -5.7180e-05, -3.7707e-05, -3.1606e-07,
        -1.5014e-05,  4.8735e-06,  1.5384e-05,  3.3625e-05, -6.1058e-02,
        -6.8009e-05, -1.5749e-02, -1.0930e-04,  8.2113e-04, -2.8540e-02,
         3.3452e-05, -2.6187e-05,  2.6778e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4569e-04,  2.8212e-04, -1.2521e-04,  3.4527e-04, -2.9437e-06,
        -2.5774e-04, -1.2108e-04,  1.1896e-04,  1.7998e-04,  1.3138e-04,
         2.3193e-01,  3.8400e-05,  4.9420e-04,  2.8936e-06, -6.8904e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5740e-05, -1.6958e-05, -1.7783e-04,  4.4846e-05, -1.3243e-04,
         6.8188e-05,  2.2921e-06,  1.3706e-05, -4.5225e-05,  1.3692e-01,
        -2.9659e-05,  1.1965e-01,  1.2905e-04,  1.8559e-04,  1.6670e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6702e-05,  1.0143e-05, -4.6897e-05,  1.0757e-05,  2.7779e-05,
         8.7403e-06,  5.9106e-05,  4.2345e-05, -3.5265e-05, -4.1969e-02,
        -4.2523e-05, -2.2290e-02, -8.4312e-05,  4.1045e-04, -1.1998e-04,
         1.4244e-05, -3.7481e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0566e-05, -1.3589e-04, -1.7973e-04,  1.0211e-04,  9.8215e-05,
        -1.0492e-05,  7.1010e-05,  4.1535e-05,  5.7594e-05,  2.2219e-01,
        -3.9781e-04,  3.4608e-02, -1.4594e-04,  1.3092e-04, -4.7132e-05,
         6.3132e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1550: [tensor([-5.6089e-06, -2.0819e-05, -6.8222e-06, -1.5192e-06, -2.5236e-05,
         5.2433e-06, -1.5586e-05,  2.5813e-06, -1.6446e-07, -1.1947e-05,
        -1.5419e-05,  5.5829e-06,  8.7693e-06,  1.5304e-05,  4.4490e-06,
        -1.3109e-05,  6.6117e-06, -1.1454e-05,  2.5901e-05,  1.0562e-05,
        -1.1022e-05, -1.2557e-05,  6.6830e-06, -3.6778e-05, -3.8706e-06,
         9.0268e-06,  4.1978e-07,  2.7870e-06, -9.8668e-05, -1.3712e-05,
        -1.9837e-05,  3.9765e-02,  1.9184e-02,  1.0150e-05, -2.3964e-05,
         1.7188e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2976e-04,  4.5588e-05, -1.6996e-05,  2.9587e-05, -7.0604e-06,
        -2.4033e-05, -2.1231e-06, -4.2485e-05,  2.5381e-06,  2.6782e-05,
         8.7800e-06,  1.4561e-05, -1.8177e-07,  4.2862e-06,  3.8764e-05,
         3.4331e-06, -1.5374e-05, -2.1827e-05,  6.8933e-06, -1.2132e-05,
         3.1929e-05,  2.1810e-05, -1.4836e-05,  1.0857e-05,  3.9906e-06,
         1.2797e-05, -3.2805e-05,  2.9915e-05, -1.5496e-02, -1.1689e-05,
         5.3347e-05, -1.9542e-05, -3.7212e-05, -9.1682e-02,  5.4272e-05,
         2.0552e-04, -1.8395e-05, -1.8988e-05,  8.6715e-06, -2.4071e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0625e-04,  3.9586e-05,  2.5218e-05, -1.5390e-05, -6.9548e-05,
         3.6749e-05, -5.0181e-06, -2.4111e-05,  2.2533e-05,  4.9181e-05,
         1.7275e-06, -3.3468e-05,  4.2606e-05,  3.1514e-05, -3.0512e-05,
        -1.1576e-05, -1.9901e-05,  1.4742e-05, -3.7882e-06,  1.2280e-05,
         4.6672e-05,  1.5320e-05,  1.7270e-05, -2.1295e-05,  3.4472e-05,
        -3.3959e-05,  4.2338e-05,  1.6958e-01, -1.3806e-04, -4.0117e-03,
        -1.4163e-04, -9.5998e-05,  2.5555e-05, -1.4850e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7272e-05, -3.5476e-05,  1.6104e-05,  4.7660e-06, -3.8601e-05,
         2.8036e-05,  1.8972e-05, -4.7755e-05,  4.2879e-05,  3.1481e-05,
        -3.8767e-05,  4.3891e-05,  2.4389e-06, -1.0229e-05,  2.5915e-05,
         7.9131e-06, -2.2295e-06, -1.7951e-05, -4.1000e-05, -3.0298e-05,
         5.7920e-06,  4.4823e-05, -1.8737e-05,  4.0230e-06,  2.3523e-05,
         1.8209e-05,  1.6627e-05, -2.6468e-05, -1.3523e-01, -1.1182e-04,
        -4.9221e-05,  4.9241e-05,  4.2954e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7745e-05, -1.1069e-06, -1.6431e-06,  4.1701e-06,  3.5675e-06,
        -9.1555e-07,  2.2876e-06, -9.0322e-06, -4.4755e-06,  6.3464e-06,
        -2.2699e-05,  2.0389e-06,  1.5218e-05,  2.9041e-06,  8.3335e-06,
         6.6136e-06, -3.3626e-06,  1.5081e-05, -5.5191e-06, -9.9697e-06,
        -6.3215e-06, -4.7327e-06,  4.1898e-06,  1.0094e-05,  1.1913e-05,
         8.9115e-06, -8.9852e-08, -1.5645e-02, -1.2574e-05,  1.0995e-05,
        -2.6244e-02, -2.1577e-05, -5.0897e-05, -2.4255e-02, -2.1674e-02,
         1.1839e-05, -1.2372e-05, -1.9017e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5995e-06,  3.5099e-05,  6.4689e-05,  2.2406e-06, -3.8759e-05,
        -3.6670e-05, -2.5210e-05,  3.7183e-05, -5.9195e-06, -1.4264e-05,
         1.1050e-05,  1.1613e-05, -3.7064e-05, -3.8677e-05, -2.6582e-05,
        -1.1403e-05, -1.3750e-05,  3.7474e-05, -4.7647e-06,  1.9521e-05,
         5.9618e-06, -2.6568e-05,  6.7786e-05, -3.7388e-05, -1.7975e-05,
        -3.2253e-05,  3.1816e-05,  1.5702e-01, -5.0134e-05, -3.1806e-05,
         2.8001e-05, -1.0178e-02,  2.1927e-05,  1.2097e-04, -2.3492e-04,
         3.0348e-05,  6.5371e-06, -2.3505e-03, -3.6276e-07, -1.5134e-05,
        -5.9037e-05, -1.5702e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2026e-04,  4.9725e-05, -1.4330e-05,  3.1361e-05,  1.8138e-05,
        -3.4184e-06,  1.6313e-05, -1.6445e-05, -1.8752e-05,  2.3641e-05,
         7.8332e-06, -1.7322e-05,  2.9961e-05,  6.2210e-05,  3.4240e-05,
        -3.9369e-05,  2.9439e-05,  1.6873e-05,  1.2367e-05,  5.9107e-05,
         3.4660e-06,  3.8023e-05,  1.3192e-05,  2.3313e-05, -1.2764e-05,
         2.9800e-05,  4.6000e-06, -9.5211e-05, -2.7400e-05, -4.8852e-05,
         3.9700e-05,  2.1200e-05,  2.3426e-05,  6.0165e-03,  1.4971e-01,
        -7.8323e-06, -1.0890e-05,  6.9423e-05, -5.8778e-05,  7.7662e-03,
        -3.2139e-05, -1.4933e-03,  2.0283e-05, -3.3319e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9481e-05,  1.7196e-06, -1.2897e-05,  1.4827e-05, -2.7130e-05,
         3.1135e-06, -1.4847e-06, -7.7292e-06, -1.7203e-05, -4.3097e-06,
         2.8575e-05, -8.6577e-06,  7.3931e-06,  4.6414e-06,  8.4012e-06,
         3.7630e-06, -6.9777e-06,  5.4536e-06, -8.4583e-06, -1.8415e-05,
         2.0400e-05,  8.9020e-08,  8.3126e-06,  2.9319e-06, -5.2902e-06,
         3.8235e-06,  7.0755e-07, -6.5772e-06,  2.8587e-03,  4.2923e-05,
        -1.8469e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4323e-04, -9.2317e-06, -8.4549e-07, -3.6106e-05,  1.8382e-05,
        -2.4750e-06, -4.9884e-05, -1.3805e-05,  6.4646e-05,  5.9443e-05,
         4.9888e-05, -5.5229e-05,  4.7368e-05,  8.0648e-06, -1.2210e-05,
         2.6543e-05,  1.2677e-05, -3.9842e-05, -9.2699e-05, -4.1454e-06,
         7.2711e-05, -2.7332e-05, -7.9414e-05,  1.5891e-01, -1.1962e-05,
         5.5703e-04, -8.6087e-05, -6.6539e-05,  2.5062e-04,  7.5604e-05,
        -6.5089e-05,  2.0648e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5329e-05,  5.8993e-05, -4.9581e-05, -4.5776e-06,  4.0068e-05,
        -1.7094e-05, -8.0496e-05, -4.6239e-05,  5.6355e-05, -4.0975e-05,
         9.6879e-05,  6.0505e-05,  3.3275e-05,  6.0035e-05,  8.4939e-05,
         9.7090e-06, -8.5569e-05,  2.5432e-05,  4.7009e-06, -4.5269e-06,
        -2.5318e-05,  1.0949e-04, -2.8894e-05,  2.6878e-05,  1.9243e-02,
         9.5531e-05,  2.2213e-01,  3.1683e-05, -4.8766e-05,  8.3389e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0377e-05, -1.7276e-05,  1.3707e-05, -2.3089e-05, -2.9631e-05,
         2.2302e-06, -1.4837e-05,  1.8011e-06, -8.6794e-06, -3.3833e-05,
        -6.9574e-06, -1.1863e-05, -1.3731e-05, -1.3977e-06, -8.8034e-06,
        -3.3026e-05, -2.2654e-05,  7.2043e-06,  4.5047e-06,  5.3313e-06,
        -1.4173e-05, -2.9928e-05,  2.9996e-05,  1.3195e-05, -5.0020e-02,
        -3.6187e-05,  8.0969e-06, -1.7503e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5502e-04,  1.0364e-05, -6.9561e-05,  1.3025e-07,  3.1229e-05,
         3.1145e-05,  3.0649e-05, -4.2525e-05,  2.0566e-05,  1.6299e-05,
         1.0367e-05,  1.4599e-05, -4.1333e-05,  1.3228e-05,  3.8493e-05,
         2.6052e-06, -3.8213e-05, -7.4175e-06, -2.0490e-05,  5.2323e-05,
        -6.5862e-05, -4.7504e-05,  5.4378e-05,  4.0228e-05, -1.6162e-05,
         5.9771e-03,  2.1890e-05,  1.3300e-01,  4.4417e-05,  5.5903e-05,
         5.1280e-05, -1.2745e-05, -1.6685e-06,  4.9930e-05, -1.9886e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1600: [tensor([ 9.6135e-05, -6.0555e-05, -1.3559e-05,  7.3008e-05,  3.7554e-05,
         6.0746e-05,  6.9541e-07,  1.4444e-05,  1.4672e-05, -2.8031e-05,
         6.2471e-05,  2.9891e-05, -1.0232e-01, -5.9126e-05, -5.5785e-02,
         3.7806e-05,  1.0505e-04, -7.2954e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9357e-05,  6.5854e-05,  6.2818e-05,  8.5920e-05,  4.2507e-05,
         7.0010e-06,  2.9325e-05,  9.0830e-05,  6.4479e-05,  9.1849e-05,
        -2.0294e-05,  2.1282e-05, -5.3831e-02,  5.7937e-05, -6.7970e-02,
        -7.8073e-05, -5.4931e-02,  6.8420e-05, -1.1160e-04, -5.7625e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6972e-05,  1.1176e-05,  2.9642e-06,  2.8218e-05,  7.0667e-06,
        -2.5327e-05,  5.3268e-05, -2.5037e-06, -8.9803e-05,  1.7962e-06,
        -6.8294e-02,  9.0101e-05,  1.3629e-04,  7.1658e-05, -4.0703e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.2927e-05, -1.5267e-05,  1.9941e-05,  2.0202e-05, -8.7350e-06,
        -9.1340e-06, -1.4539e-05, -3.2945e-05,  1.3912e-06,  1.2189e-05,
        -6.4187e-02, -8.2020e-06,  4.0829e-05,  1.1526e-05,  7.1132e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1651e-05,  3.5745e-05, -2.0075e-05, -1.3867e-05,  8.8289e-06,
         9.9604e-06,  4.3948e-05,  2.8248e-05,  3.0176e-05, -9.5086e-06,
        -4.9949e-02,  6.1897e-05, -1.4250e-04, -2.8983e-03,  1.4034e-05,
        -5.2572e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2040e-05, -8.8189e-06,  9.2652e-05,  1.8524e-05,  1.4890e-05,
         1.1230e-04, -5.8011e-05,  1.2412e-05, -9.4251e-06,  3.0745e-05,
        -5.4464e-05,  2.1933e-05,  2.1647e-05,  2.1034e-05,  8.6286e-06,
         7.8922e-02, -2.5139e-05,  5.6922e-05,  9.0883e-03,  2.2011e-05,
        -9.1894e-06, -4.2175e-04,  4.6056e-05,  2.8069e-04,  1.0243e-01,
         8.3025e-05, -3.8389e-05,  1.7401e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9857e-04, -1.7689e-06,  2.5173e-05,  2.7988e-05,  2.2706e-05,
         9.5883e-08,  3.0224e-05,  2.6457e-05,  3.8100e-05,  1.5804e-05,
        -3.0721e-05,  4.7927e-06,  1.1193e-05, -9.0130e-06,  2.9824e-05,
        -3.2150e-06,  3.1636e-05, -6.6452e-06,  3.8553e-05, -6.0348e-02,
        -4.3907e-06,  2.6897e-05, -2.0423e-03,  4.9357e-05, -1.6048e-02,
         3.5487e-05, -2.8515e-02,  3.7769e-05, -3.2305e-04, -2.6807e-02,
         4.7172e-05,  2.1208e-05, -3.2651e-05,  1.8081e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9668e-05,  7.7507e-06, -1.1390e-07, -3.0176e-05, -6.4654e-07,
        -3.6849e-05,  9.6264e-06,  6.1118e-06, -2.4674e-05, -1.2960e-05,
         3.6764e-05,  7.6221e-08, -2.5166e-05,  7.2505e-05, -4.3943e-06,
        -7.7027e-02, -4.3273e-06, -7.4852e-03, -2.6816e-05,  3.2143e-05,
        -4.1840e-02,  1.5016e-05, -1.3604e-02,  2.8916e-05,  1.8361e-05,
         1.8108e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1085e-04,  4.0026e-05,  2.9571e-05, -3.9800e-05,  1.9031e-05,
         2.7603e-05,  1.5220e-05,  2.2178e-05, -1.8798e-05, -4.7192e-02,
        -4.2317e-06, -2.5950e-02, -1.6873e-05, -8.7622e-04, -5.0905e-05,
        -3.4315e-06,  8.3327e-06,  7.8029e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4512e-05,  2.3518e-05, -1.3986e-06,  8.7634e-06, -2.9749e-06,
        -1.6896e-05,  1.5779e-05,  2.2346e-05, -2.1273e-06, -2.5874e-05,
        -2.5246e-02, -2.5218e-05,  3.1984e-05,  3.8460e-05,  1.1252e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2838e-04,  4.9913e-05, -1.7178e-05,  2.4056e-05,  6.4386e-06,
         4.7571e-06,  1.9137e-05,  1.0484e-05, -1.8999e-05,  8.0939e-03,
         4.4206e-05, -5.6307e-02,  5.9023e-05, -2.6298e-02,  1.8316e-05,
         7.0508e-06,  1.4372e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4090e-04, -2.1763e-05, -9.5514e-05,  1.1553e-05, -3.6587e-05,
         6.9293e-05, -2.1533e-05,  8.9206e-06, -1.4942e-05, -3.6674e-05,
        -3.8608e-03, -7.1558e-05, -4.2895e-05, -6.6420e-05, -1.0872e-01,
        -1.5455e-05,  4.0222e-05,  3.4630e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1650: [tensor([ 4.7259e-05, -1.5180e-06, -6.8791e-06,  1.8872e-05,  7.7390e-06,
        -8.5358e-06,  8.2283e-06,  2.7477e-05, -1.4935e-05,  4.9153e-06,
         3.6554e-06, -2.6987e-02,  9.2997e-06, -2.0121e-06,  4.2745e-06,
         2.1912e-05, -1.9462e-02,  1.3138e-05,  1.3601e-05, -3.4674e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0681e-04,  2.2484e-05, -1.3743e-04, -2.7579e-05, -7.6419e-05,
        -3.0489e-05, -2.7616e-05,  3.9013e-05,  4.9227e-05, -2.2692e-05,
        -8.2383e-06, -6.0706e-06,  9.9064e-05, -3.3857e-05, -1.7573e-05,
         3.4319e-05,  1.5292e-01, -1.5524e-04,  1.0732e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2025e-05, -1.6999e-04,  7.1706e-05, -6.4730e-05,  8.3739e-05,
        -1.0993e-04, -7.1428e-05, -5.9186e-05,  4.5988e-05, -7.9095e-05,
        -2.0103e-05, -8.2736e-05, -1.5139e-04,  3.0241e-05,  1.5661e-04,
         2.5695e-01,  1.1553e-04,  7.8400e-07,  1.2213e-04,  1.2638e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6009e-05,  5.3704e-06, -1.8626e-05, -4.5711e-05,  2.1646e-04,
         3.7257e-05,  8.5442e-06, -9.9480e-06,  2.2519e-04, -1.1680e-04,
         1.2279e-04,  1.0454e-05, -2.9383e-05, -1.5843e-04,  1.0011e-04,
         2.6788e-01,  1.6498e-04,  8.3452e-05,  1.5389e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2572e-04, -1.1440e-05, -6.1343e-07,  1.8847e-05, -3.2116e-05,
        -8.6534e-06,  3.8107e-05, -2.4162e-05,  4.9930e-05, -3.7121e-05,
        -6.7980e-05, -7.5564e-05, -2.3185e-05, -4.6298e-05,  2.5084e-05,
        -8.0147e-05, -6.0753e-05,  6.2603e-05, -2.0986e-05,  1.6918e-01,
         1.3348e-05, -5.1582e-03, -4.8441e-05, -1.1278e-04, -8.7493e-05,
        -1.4395e-02,  2.4008e-05,  6.9904e-05, -9.4804e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8824e-05, -1.9692e-05,  4.6174e-05,  1.1276e-04, -5.1930e-05,
        -1.4833e-05,  6.9059e-05,  1.1300e-06,  4.8607e-05,  9.7239e-06,
         8.2253e-05,  4.5223e-06,  3.4790e-05,  1.7524e-04, -3.2890e-05,
         5.2012e-05, -1.2225e-05,  6.0579e-05,  9.1252e-05,  1.5552e-01,
         1.0835e-04,  1.0392e-02, -7.9763e-05, -8.4447e-06, -5.8432e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9839e-05, -7.3771e-05,  1.4454e-05,  2.3322e-06, -1.2433e-05,
        -1.3136e-05, -2.4026e-06, -7.7945e-05,  4.6488e-05, -6.5939e-05,
        -3.6122e-05,  9.0766e-07,  2.8730e-05, -4.3120e-05, -1.0933e-05,
        -3.4941e-05, -3.6104e-05, -2.1410e-05, -3.1402e-05,  1.3757e-01,
         1.7531e-06,  8.2137e-02,  1.4386e-05, -4.3861e-04,  6.4656e-05,
        -9.8820e-06, -1.1996e-03,  4.8827e-05, -1.6476e-04, -2.9645e-05,
        -3.9037e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4570e-05,  5.2968e-05,  8.7182e-05,  1.9692e-05, -1.4564e-07,
         3.0806e-05, -4.4977e-06,  7.1986e-05,  3.2940e-05,  1.5522e-05,
         8.6052e-05,  6.9988e-05,  3.2775e-05,  1.6543e-05,  1.2098e-01,
        -2.8124e-05,  5.9518e-05, -1.3685e-05,  2.4178e-02, -5.4126e-05,
        -4.3057e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2811e-04,  2.4455e-05, -5.1949e-05,  7.0028e-05,  4.3931e-05,
        -6.9978e-06,  2.0062e-05,  5.7179e-05,  8.0125e-05,  1.8950e-05,
        -1.6289e-05, -3.3527e-05,  3.9303e-05, -4.2432e-05,  8.6569e-02,
        -3.7779e-05,  3.3632e-05,  9.1189e-05, -2.0678e-05,  3.4818e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4438e-04,  2.7263e-05,  5.0007e-05, -5.3129e-05, -6.7869e-05,
         2.6019e-05,  1.8309e-05, -3.8623e-05,  6.3705e-06, -3.8147e-05,
         4.8838e-05,  1.4453e-05, -9.6184e-06,  1.5317e-05, -9.7892e-02,
         9.6752e-05, -4.9720e-05, -5.6774e-05, -1.7769e-05, -1.4364e-05,
        -5.8196e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1152e-05,  1.3823e-05, -1.4517e-04,  3.4462e-05, -1.0655e-05,
         1.0168e-05,  8.1569e-05,  7.7221e-07, -3.0122e-05,  2.3253e-05,
         9.4101e-05, -2.2869e-05,  1.3839e-04,  3.0622e-01,  2.0232e-04,
        -3.6546e-03,  2.3160e-05, -1.0012e-04,  9.2553e-03, -3.4476e-05,
        -6.6879e-05,  1.5190e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6802e-05,  1.2322e-05,  2.5902e-05, -1.0532e-05,  8.3006e-06,
         6.6416e-06, -4.7124e-06,  1.5627e-05,  9.1186e-06, -8.8067e-06,
         1.2373e-05,  4.7730e-06, -9.8067e-06, -2.7321e-02,  2.3138e-06,
        -2.6268e-03,  5.9817e-06, -2.1233e-02,  2.6824e-06, -2.4942e-06,
        -4.2626e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1700: [tensor([-9.9063e-05, -1.6910e-05, -1.0088e-05, -1.0470e-05, -3.3927e-05,
         2.2663e-05,  2.3738e-05, -2.0418e-05, -1.8484e-05, -4.5091e-05,
        -1.2707e-06, -5.6417e-02,  1.4079e-05,  3.7572e-05, -3.8452e-02,
         7.3999e-07,  1.8252e-03,  3.2680e-05, -5.0038e-04, -2.4004e-02,
        -5.0136e-05,  3.0093e-05,  4.2247e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7329e-05, -5.2821e-05, -1.1128e-04, -1.4146e-05,  9.2193e-06,
         1.0838e-04,  6.0514e-06, -1.4123e-04,  1.0613e-04, -3.6125e-06,
        -5.5146e-07,  6.5550e-05, -8.4435e-04,  8.9752e-05,  1.1911e-01,
         8.2543e-05,  1.3870e-01, -1.0182e-04, -6.4154e-05, -8.0288e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5827e-05, -3.5639e-05, -1.1981e-05,  4.7499e-05,  4.1197e-05,
         4.6257e-05, -2.3016e-05, -1.0036e-05,  3.5740e-05,  1.0318e-04,
        -3.6473e-05,  2.5306e-06, -2.1876e-05,  1.3807e-01, -4.9036e-05,
         5.0137e-05,  5.5287e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6878e-05,  7.2186e-06, -1.1666e-05,  2.6620e-05, -1.9988e-05,
        -1.5473e-06,  7.6264e-06,  1.8699e-05, -3.2207e-05, -5.4599e-06,
         7.0465e-06,  4.2157e-05,  4.7826e-05, -1.9136e-04, -3.7047e-02,
         4.2589e-05, -7.1261e-05, -6.8433e-02,  3.6045e-05,  6.7728e-06,
         5.5416e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9790e-05,  5.2892e-05,  3.1405e-05, -2.1212e-05,  4.6338e-05,
        -2.1878e-07,  2.3866e-06,  1.3744e-05,  1.4088e-05,  4.0776e-05,
        -8.7593e-05,  1.9181e-04,  8.7311e-05,  2.8706e-05,  3.8013e-03,
         4.2280e-05,  1.7092e-01,  2.0642e-04, -5.0602e-05,  6.4839e-05,
         4.4173e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9325e-05, -8.9047e-05, -3.4076e-05,  9.5023e-05,  4.0464e-05,
         1.8434e-05, -1.1644e-04,  1.8940e-05, -7.6300e-06, -8.3423e-06,
         2.6733e-05, -9.5308e-06,  7.1723e-05,  1.9043e-02,  2.1759e-01,
        -2.8249e-05,  1.2211e-04, -5.3290e-05, -4.7033e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0657e-04, -3.5219e-05,  2.3585e-06,  1.9927e-05, -3.8850e-05,
        -1.0318e-05, -1.8703e-05,  8.2400e-06, -1.2829e-05, -1.2537e-04,
         1.2844e-05, -1.7474e-04,  3.5386e-05, -1.1566e-02, -1.1684e-01,
        -1.1839e-04, -8.3823e-05, -6.2914e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2615e-06, -7.2925e-06, -3.8987e-05,  1.5983e-04, -1.8011e-04,
         5.3548e-06,  7.4150e-05, -4.7690e-05, -1.0656e-05, -2.7475e-05,
        -4.5788e-06, -1.1055e-04, -4.6231e-05, -1.2266e-04, -1.0184e-04,
        -3.7405e-05,  2.6976e-05,  7.4707e-05, -1.5606e-01, -2.3843e-05,
        -1.0963e-05,  5.6603e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0991e-05,  1.5953e-05, -1.2116e-04,  1.2658e-04,  9.4527e-05,
         7.9098e-05,  2.3418e-05,  2.5470e-05,  3.9746e-05,  1.5868e-05,
        -2.4515e-05,  3.5401e-05, -2.7355e-05,  1.2055e-04, -9.5955e-06,
        -2.8693e-05,  7.1276e-05, -1.0394e-04,  3.6933e-02, -1.9890e-05,
         1.6392e-01, -6.2674e-05,  7.4423e-06,  1.0995e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6232e-04,  1.6409e-04, -7.0010e-05,  1.4147e-04, -4.5842e-05,
         3.6303e-05, -3.7362e-06,  1.1212e-04, -4.6869e-06, -1.3531e-06,
         5.7283e-05,  7.7242e-05, -7.0073e-05,  1.0280e-05,  9.4675e-05,
        -2.7402e-05,  3.6194e-05,  3.6713e-05,  1.0334e-01,  1.0900e-06,
         1.4390e-01, -5.6618e-05, -1.2345e-04, -2.3898e-05,  4.8958e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1834e-05, -2.6210e-05, -1.2972e-05, -1.1250e-05,  2.3750e-05,
         2.0569e-06, -2.3720e-05,  7.8802e-06, -1.8742e-05, -4.4246e-06,
         1.8385e-06, -4.2590e-05, -1.9598e-05, -5.5107e-06,  7.0036e-06,
         1.1574e-05, -1.9554e-05, -3.0960e-05, -1.3946e-05, -1.7620e-05,
        -2.1848e-05,  2.8023e-05, -3.8150e-05, -4.9499e-05, -6.1462e-05,
         2.9289e-05, -7.6104e-06,  2.7005e-05, -5.0623e-06,  4.6535e-03,
        -1.9948e-05,  6.8924e-05,  3.3126e-05,  1.4886e-03,  1.1527e-04,
        -4.3217e-05,  7.1857e-05,  3.5193e-05, -2.2406e-01,  7.9299e-05,
        -2.0142e-05,  2.9236e-06,  7.6766e-05,  8.1923e-05,  7.2742e-05,
         7.0592e-06,  2.4534e-03,  7.2765e-06,  7.9619e-05,  1.4980e-05,
        -9.3256e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5661e-04, -2.8780e-05, -1.9253e-05,  5.7048e-07, -5.4330e-06,
        -5.9369e-06,  2.2041e-05,  1.0548e-05,  7.6269e-06, -5.8602e-06,
        -1.9734e-05, -3.1090e-06,  5.7375e-06,  3.1097e-05, -3.0532e-05,
        -1.7003e-05, -1.5556e-05,  5.5609e-05, -2.8724e-05,  1.6296e-05,
        -1.5454e-05, -3.9464e-05, -4.3312e-05, -4.6984e-05,  2.7582e-06,
        -2.0593e-05, -4.4619e-05,  1.9095e-05, -3.8999e-05,  1.9930e-01,
        -4.5646e-05, -1.6738e-04,  8.5146e-05, -2.0216e-05,  6.9269e-03,
        -3.7222e-04,  1.0006e-04, -1.9429e-06,  1.4881e-03,  5.6575e-05,
         1.1404e-06, -3.6277e-04,  1.8801e-05, -1.9506e-06,  1.3451e-02,
         8.0259e-05,  2.3246e-05,  2.5792e-06, -3.3378e-05,  4.5066e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1750: [tensor([ 1.6670e-05, -9.3456e-06,  5.0251e-05,  4.4348e-05,  4.0202e-05,
        -3.7819e-05, -3.5242e-05,  6.0220e-05,  7.4678e-05, -3.4200e-05,
        -3.5876e-05, -3.1179e-05, -4.5813e-05,  6.8046e-05,  3.5037e-06,
         5.8727e-05,  3.1284e-05,  6.1406e-05, -5.5771e-05, -2.7178e-05,
         5.8041e-05,  1.1274e-01, -4.5050e-05, -1.2897e-03,  1.2639e-01,
        -1.3075e-04, -2.9551e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4145e-04,  6.3809e-05,  5.3896e-05,  8.6993e-06,  8.0574e-06,
         8.3582e-05,  3.4370e-05, -6.1070e-05, -4.4139e-05, -1.3594e-04,
        -2.4999e-05,  1.6927e-05,  4.8265e-05,  5.9834e-05, -1.8497e-05,
         7.3525e-05, -6.2612e-05,  5.0714e-05, -9.6589e-06,  2.4793e-05,
         2.3290e-05,  6.9821e-02,  4.8390e-05, -7.9107e-05,  1.9600e-04,
         6.7161e-05,  1.9742e-01, -1.3769e-04,  1.0536e-04, -4.8006e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9904e-04,  2.8169e-05,  6.0240e-05,  2.2137e-05,  1.7455e-06,
        -1.5518e-05, -2.1146e-05,  5.5768e-05,  5.5247e-05, -1.2739e-04,
        -4.4087e-05, -6.7422e-05, -4.0581e-05,  2.4789e-06,  1.3516e-06,
         2.8374e-05,  8.7258e-05, -8.8584e-06,  7.2016e-05,  2.0919e-05,
         2.5553e-05,  2.2092e-02,  1.0199e-04, -1.1553e-05,  2.2687e-04,
         2.3775e-01,  8.1088e-05,  1.0063e-05,  2.5649e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8368e-05, -3.2699e-05, -5.1758e-05,  8.7587e-06,  5.0829e-05,
        -6.3839e-05,  1.7225e-05,  2.7486e-05, -1.4854e-05, -1.1767e-04,
        -8.9839e-05, -2.9968e-05,  1.1068e-04,  4.4102e-05,  1.1009e-05,
         6.7582e-05,  5.3050e-05,  4.8779e-05,  6.1188e-05, -2.6989e-05,
         4.0013e-05,  2.1542e-01, -1.2595e-04, -2.0962e-03,  1.1745e-05,
         3.1143e-02,  1.4945e-04,  5.7166e-05,  4.5698e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7018e-05, -3.5406e-05, -9.2276e-07, -4.4990e-05,  2.1361e-05,
         6.8628e-07, -8.2636e-06,  1.2681e-05, -2.7940e-05, -4.9113e-05,
        -4.0782e-05, -4.9262e-05, -5.2945e-05, -2.4402e-05, -2.3147e-05,
         1.5077e-05,  2.7143e-05, -3.9416e-05,  1.1186e-05,  6.1924e-05,
        -7.1850e-05, -5.0937e-03, -2.9868e-05, -1.1586e-01, -4.8854e-05,
         8.6223e-06, -8.6380e-02, -1.1649e-05,  4.9119e-05,  1.2355e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3530e-04,  1.5532e-05,  1.2642e-05, -6.6983e-05, -1.4156e-05,
         8.2610e-05, -6.6568e-05,  1.1843e-05,  1.5896e-05, -6.1413e-05,
        -3.1985e-06,  3.1674e-05,  4.3331e-05,  4.5207e-05,  1.0514e-04,
         4.5623e-06, -7.4161e-05,  2.3658e-05, -5.6548e-05,  5.3951e-05,
         4.8983e-05,  2.0054e-01,  1.0959e-04,  2.4978e-03, -1.4691e-05,
         6.6036e-02,  8.5417e-05,  1.8768e-04,  1.3046e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2818e-04, -2.2850e-05, -5.7077e-05,  5.0017e-05, -1.5375e-04,
         9.0687e-07,  1.1457e-05, -1.0422e-04,  3.4069e-05, -1.0905e-04,
         8.2483e-05,  9.1617e-05, -1.1663e-05,  4.6040e-04,  9.7795e-05,
         2.0691e-01,  5.4539e-05,  6.5197e-05,  5.6599e-04,  1.7849e-04,
         2.3655e-04,  6.6309e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7812e-05,  6.3805e-05,  5.3564e-05, -1.3811e-04, -1.3726e-05,
        -8.6508e-05, -3.2958e-05,  5.3202e-05, -7.2198e-05,  1.2943e-05,
        -1.2657e-04, -2.9065e-05, -8.1287e-06,  2.7525e-03, -2.1394e-04,
        -1.6129e-01, -2.2159e-05, -6.0631e-05,  6.8798e-05, -4.6248e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0853e-05, -5.1713e-05, -2.4120e-05, -1.9971e-05,  3.3427e-05,
        -7.4415e-05,  2.6628e-05,  5.2370e-06, -2.7401e-05, -3.4078e-05,
        -3.0651e-05,  2.0681e-05, -1.1785e-05, -4.1106e-05,  3.7099e-05,
        -6.5617e-06,  5.8742e-03,  1.7214e-02, -8.0857e-05,  4.3186e-03,
         1.2462e-01, -6.8094e-06, -4.0322e-05,  1.7477e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0863e-05,  9.7516e-05, -2.6166e-05,  6.8366e-05,  7.6261e-05,
        -1.1905e-04,  1.0581e-04,  1.7525e-05,  2.4047e-05,  1.7664e-05,
         7.5031e-05,  1.7489e-01,  9.3037e-05,  6.4279e-05,  3.1821e-02,
         1.0313e-04,  6.6240e-05,  1.6620e-03,  2.6994e-05, -4.1930e-03,
         1.3197e-04,  7.4866e-05,  6.8470e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3286e-04, -2.3904e-05,  5.6312e-05,  1.1194e-04, -1.9281e-05,
        -4.7080e-05,  1.3115e-05, -3.5281e-05, -5.9989e-05, -4.6713e-05,
         1.7893e-05,  6.4373e-02,  4.4541e-05,  2.0843e-04,  5.0317e-05,
        -1.0184e-04,  1.6448e-01, -2.9923e-05,  1.9441e-05, -3.5159e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7437e-04, -1.1296e-06,  4.7987e-06, -2.8441e-05,  9.9722e-06,
         1.4012e-05,  1.4525e-05, -5.0673e-07, -1.7985e-05,  2.6338e-06,
        -1.8566e-05, -3.9757e-02, -1.4098e-05, -1.3942e-05,  1.8058e-05,
         1.9155e-03, -9.1087e-06,  3.4699e-05, -3.7845e-05, -8.0602e-06,
        -1.5452e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1800: [tensor([ 1.0291e-04, -1.2761e-05,  1.1629e-05,  1.2875e-05,  8.5191e-05,
        -9.1007e-06, -1.5418e-05, -9.5495e-05, -3.1371e-05, -7.4708e-06,
        -4.2429e-05,  3.8039e-05,  1.0090e-02,  2.0750e-05, -5.9326e-06,
         2.4906e-05, -6.5837e-04, -2.3554e-05, -1.8278e-04,  1.5939e-01,
        -3.9898e-06, -3.3198e-05,  2.4244e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1882e-04,  2.4572e-06, -3.1251e-05,  4.5629e-05, -3.0845e-05,
        -5.4547e-05, -3.8191e-05, -1.2472e-05,  6.1874e-06, -1.7665e-05,
        -1.2858e-05, -1.8591e-05, -9.9972e-02,  9.8533e-06, -1.1565e-05,
         2.8388e-05,  3.7000e-05, -2.6000e-04, -5.4901e-05, -3.4895e-05,
        -1.6870e-03,  4.6829e-05, -4.3839e-05,  1.4600e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7306e-05, -1.8374e-05,  3.9726e-06, -1.5859e-05,  1.3482e-05,
        -4.5402e-05, -2.5958e-05,  3.8125e-05, -2.6160e-05, -2.5074e-05,
        -1.6875e-05, -1.1032e-05, -7.2758e-02,  4.1338e-05,  1.7741e-06,
         3.9437e-05,  9.8950e-05,  3.1241e-05, -1.9243e-03, -4.1931e-02,
         4.8626e-05,  5.2493e-05, -1.7121e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4261e-04, -4.0241e-06, -1.6039e-05, -8.8180e-06,  7.4347e-06,
        -7.7035e-06,  1.4991e-05,  1.2882e-06,  2.2787e-05, -1.8276e-06,
         1.8569e-05,  9.6562e-06, -3.5206e-05,  2.7426e-05,  9.6213e-06,
        -1.4022e-05, -2.1675e-05, -4.1654e-02, -1.0416e-05, -4.3502e-02,
        -3.7888e-05, -1.7257e-05, -1.1036e-05,  8.7711e-04,  9.9854e-06,
        -2.8046e-05, -2.9426e-05, -4.3946e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9779e-05, -7.3727e-05, -3.2558e-05, -2.4348e-05, -4.2102e-05,
        -1.0172e-04, -5.6354e-05, -3.9158e-05, -5.8976e-06,  2.3595e-05,
         4.0622e-05, -9.6465e-05, -1.9016e-05,  1.7136e-05,  2.9177e-05,
         5.0023e-05,  3.7289e-05,  1.4003e-01,  2.6112e-06,  8.5816e-02,
         1.0542e-04, -6.3093e-03, -4.3972e-05, -3.5425e-06,  9.6323e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0491e-05,  2.6508e-05, -6.6401e-06,  3.3972e-05,  2.2728e-05,
        -5.5210e-07,  2.4572e-05, -1.5396e-05,  2.1166e-05, -1.0548e-05,
        -2.0042e-05,  5.0317e-05, -1.6213e-06,  2.1161e-05,  1.2727e-05,
         1.8640e-05, -7.9367e-06, -5.0601e-02,  3.6677e-07, -3.9747e-02,
         3.1713e-05, -1.4555e-05,  1.7880e-06,  1.7004e-05, -2.1992e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0033e-05, -3.9632e-05, -3.2911e-05,  8.9669e-05, -5.2156e-05,
        -6.2329e-05, -4.8060e-05,  8.3863e-06, -5.7660e-05, -7.9875e-05,
        -7.9937e-05, -1.5990e-01, -6.5870e-05, -1.5490e-05, -2.3872e-05,
         8.6312e-06, -1.0013e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2522e-06, -2.5387e-05, -4.5990e-05,  1.4579e-04, -7.4075e-05,
         5.4651e-05, -4.9736e-05,  2.3726e-05, -4.4188e-06, -1.7350e-04,
        -1.4235e-04,  3.6800e-04, -1.2603e-01, -1.1918e-05, -3.1134e-05,
        -1.4014e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0675e-05, -2.2103e-05, -1.3951e-05, -1.6895e-06,  1.1790e-05,
        -1.7814e-05, -3.1270e-05,  1.1451e-05, -1.3144e-05, -2.6280e-05,
         2.6880e-03,  1.5758e-05, -7.1405e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8556e-06,  3.4721e-06, -3.3680e-06,  6.7596e-06,  2.0623e-06,
        -2.7321e-06,  2.3426e-06,  8.8093e-06,  3.5811e-06, -6.8462e-06,
         9.8675e-06, -1.9199e-06, -2.5098e-06, -1.0823e-05, -1.2838e-06,
         1.8023e-06, -8.6790e-07,  5.8743e-06, -5.0396e-06,  4.7355e-06,
         1.0902e-06,  1.0166e-06,  4.8431e-06,  6.7197e-06,  1.8027e-07,
        -1.8710e-06, -2.8317e-06, -4.7477e-06,  2.5284e-06,  1.6839e-06,
        -7.3682e-06,  5.4237e-06, -3.6589e-06,  1.3376e-05,  4.1521e-06,
        -7.7454e-07,  9.6092e-07, -6.0218e-03,  7.5216e-07,  1.0549e-05,
        -1.0904e-02,  5.0158e-06, -2.2004e-07,  1.0016e-03, -2.2509e-02,
         8.8613e-06,  6.6306e-06, -3.7766e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4446e-04,  4.5723e-06, -2.8905e-05,  1.9176e-05,  1.4376e-05,
         1.1984e-05,  1.0073e-06,  2.9045e-05, -1.8748e-05,  5.3242e-05,
        -3.7186e-05, -5.6261e-05,  4.0390e-05, -4.2254e-05,  4.4910e-05,
         1.5183e-05, -1.2127e-05,  2.4264e-05, -3.7103e-05, -4.5231e-05,
        -1.3224e-05, -5.6972e-05, -4.5026e-05,  3.8732e-05, -2.7346e-07,
        -9.1055e-06, -3.1446e-05,  4.8830e-05,  5.1495e-06,  2.4383e-05,
         3.4224e-05, -8.2119e-06,  3.7275e-05, -4.4351e-05,  2.9036e-05,
         3.0551e-05, -6.7749e-03,  8.1348e-05,  8.0578e-02, -1.7138e-04,
         1.6835e-01,  3.1063e-05,  7.9111e-05,  8.8033e-05,  5.5562e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8051e-04, -2.4522e-05, -4.7619e-06, -1.1318e-05,  5.1490e-05,
         4.4074e-05, -7.4422e-06, -4.2357e-06, -1.5525e-05, -1.7406e-05,
        -6.1747e-05,  1.9633e-05,  1.5590e-05, -7.4131e-06, -8.2823e-06,
         9.2152e-06, -1.6976e-05, -2.7141e-05,  6.6079e-06,  2.5382e-06,
        -7.0059e-05,  7.4887e-06, -4.5376e-05,  5.2489e-05, -8.4622e-06,
         3.7522e-05,  1.4010e-05,  4.1463e-06, -2.4081e-06, -1.2631e-05,
         1.2674e-06,  1.6333e-06, -1.3363e-05, -4.3211e-05,  2.6223e-05,
        -3.0542e-06,  1.9011e-01, -3.9444e-05, -2.3790e-04,  1.6952e-05,
         3.3871e-05,  5.1171e-05, -6.5110e-03, -3.0031e-05,  1.4385e-05,
        -3.1170e-03, -3.4812e-04, -5.9454e-06,  3.2180e-05, -1.0927e-04,
         5.7119e-04, -8.3476e-03,  3.6240e-05,  2.7418e-05, -3.6493e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1850: [tensor([-1.0532e-04, -6.3264e-06,  1.7523e-06,  7.2638e-06,  1.3069e-05,
         6.3028e-06, -3.3347e-05,  2.5383e-05, -2.5264e-05, -7.1552e-06,
         1.4787e-05, -1.5117e-05, -1.4063e-02, -3.1544e-05,  1.3138e-05,
        -4.8561e-02,  1.9835e-05, -8.0908e-06, -1.4537e-05,  4.0652e-05,
        -5.3197e-05,  6.4810e-06,  5.7762e-06,  3.3311e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3305e-06, -1.0449e-04, -6.2077e-05,  5.5808e-05, -4.2693e-05,
         2.9481e-05,  1.9602e-05, -1.1527e-04,  1.3821e-05, -1.8642e-04,
        -3.4272e-05, -2.2924e-05, -1.8933e-01, -7.7891e-05, -2.8000e-02,
        -1.1009e-04,  4.6708e-03, -2.0304e-04,  6.9504e-05, -1.7854e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9772e-05, -9.1844e-07,  1.3285e-05, -5.3586e-06,  1.9036e-06,
        -2.5044e-06, -6.8068e-06, -9.1798e-06,  5.0905e-06,  1.2821e-05,
        -8.2287e-06,  9.0564e-07, -3.4656e-06,  8.6537e-06,  1.3576e-05,
         4.7707e-06,  3.0669e-02,  2.1166e-05, -7.2473e-06, -4.4806e-05,
         5.1689e-03,  8.5621e-06,  5.0361e-06,  1.6565e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0745e-04, -6.1649e-06, -2.4335e-05,  4.8499e-05,  9.3969e-05,
         6.3937e-05, -4.7048e-05,  1.7001e-06,  2.0948e-06, -8.2413e-07,
        -4.7378e-05,  1.0210e-05, -5.6924e-06, -1.3803e-05,  8.3420e-05,
         7.3938e-06, -3.3667e-02,  3.6428e-05, -1.3891e-04, -6.7109e-04,
        -3.9947e-05, -8.9437e-04,  7.5428e-05, -3.5298e-05, -3.8833e-05,
        -1.2424e-02,  1.5263e-01, -3.3763e-05,  5.8237e-05, -3.2454e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1511e-04, -5.9590e-05, -8.0268e-05,  9.0803e-05,  1.6499e-05,
         3.7177e-05,  7.1201e-06, -2.4226e-05, -3.6071e-05, -2.2444e-05,
        -2.4958e-05, -3.7921e-05, -4.8131e-05,  7.8462e-05,  7.0954e-06,
         1.0240e-05,  1.0244e-01, -3.1640e-05, -6.7110e-05, -5.6880e-06,
         6.9191e-05,  1.4377e-01,  6.2899e-05,  1.1873e-05, -2.4317e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7342e-05,  1.2366e-08,  1.5724e-05, -2.0357e-05,  1.4023e-05,
         1.0233e-05,  6.0165e-06, -9.2546e-06,  3.6975e-06,  1.3948e-05,
         1.6998e-05,  5.1467e-06,  1.3942e-05,  1.2889e-05, -1.8226e-05,
         9.5755e-06,  1.7858e-05, -2.9899e-02,  8.6706e-05,  2.9230e-05,
         2.6281e-05, -1.3193e-06,  4.1230e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9802e-05, -4.5710e-05, -3.7025e-05, -1.0156e-04, -1.0900e-04,
        -8.4430e-05,  5.4225e-06, -1.2983e-04, -7.4333e-05, -6.9952e-05,
         7.4639e-05,  1.0041e-04, -2.9408e-05,  2.3177e-05, -7.5578e-06,
        -1.1800e-04, -6.6995e-05, -1.2291e-04,  2.6574e-01,  1.7889e-05,
        -8.1977e-05, -1.4907e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0851e-05,  7.5053e-05,  2.4862e-05, -7.7410e-05,  7.0140e-05,
        -3.3301e-05, -1.0673e-04, -8.7394e-05,  1.2855e-04,  6.7879e-05,
        -1.4421e-04, -5.8915e-05, -6.3193e-05, -6.1658e-05,  8.7358e-06,
         9.5435e-06, -5.2196e-05,  2.2485e-01, -3.5498e-05,  1.1457e-04,
         2.3239e-04,  1.2525e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6328e-04, -1.9369e-05, -1.5981e-05,  7.9557e-07, -5.8306e-06,
        -8.8815e-06, -1.4166e-06,  1.6410e-06,  1.5145e-05, -6.4503e-06,
        -1.2827e-05,  3.6761e-06,  4.3983e-06,  1.8526e-05,  4.6114e-06,
        -1.4020e-06, -3.8668e-06,  1.3307e-05, -1.5489e-06,  5.7004e-06,
         6.3614e-06,  7.5854e-06,  1.3995e-05,  6.4325e-06, -8.3451e-06,
         6.3526e-02, -1.3613e-05,  9.7538e-03, -3.0310e-06, -3.4710e-04,
         2.1478e-02, -3.2437e-05,  1.7985e-04,  7.8146e-06, -2.3135e-04,
        -6.9659e-05, -5.0732e-05,  2.1030e-05, -1.2536e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5467e-06,  2.6937e-06, -1.2856e-06, -1.4472e-06, -3.0645e-07,
        -6.9650e-06,  7.8367e-07, -7.4759e-06, -6.7606e-06, -3.0996e-07,
        -5.2824e-06,  3.1782e-06, -1.6700e-06, -9.5505e-06, -4.8890e-07,
        -5.6451e-06, -8.3619e-06, -8.2160e-07, -5.0510e-06, -2.4490e-06,
         3.8371e-06, -1.8417e-06,  2.9358e-06, -9.8454e-06,  3.8196e-06,
         1.6552e-03,  8.2315e-06,  4.4343e-03,  1.9555e-05, -9.2535e-03,
        -5.4165e-03, -8.9055e-06,  1.1584e-05,  1.4132e-03,  1.5174e-04,
         8.6266e-06, -5.9313e-06, -8.7120e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2983e-04,  1.1108e-05,  1.9080e-05,  7.9404e-06, -2.6823e-07,
        -5.0051e-06,  2.6102e-06,  2.6863e-06,  1.2746e-05,  4.4122e-06,
        -1.1908e-05, -2.1591e-06, -3.9767e-06,  1.6191e-05,  2.0952e-05,
        -1.1054e-05,  9.2837e-06,  6.9405e-06,  2.6300e-05,  3.6226e-06,
        -6.7585e-06,  1.2921e-05,  1.3616e-05, -1.1447e-05, -4.5522e-06,
        -9.8627e-04,  2.0704e-05, -7.2206e-03, -1.1172e-05, -5.2996e-02,
         2.5821e-05,  2.0429e-05, -2.0388e-05, -8.4562e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9640e-04, -2.0736e-06,  1.0396e-05, -4.6015e-05,  1.0232e-05,
         8.9100e-05,  1.2376e-05, -1.5478e-05,  5.1380e-05,  1.2512e-01,
         2.7390e-05,  7.9614e-05, -8.0083e-04,  2.7108e-01,  9.8445e-05,
        -4.0143e-05, -3.3009e-04, -7.0185e-05,  1.8599e-04, -5.7120e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1900: [tensor([ 2.7065e-05, -1.6451e-04, -8.9290e-05, -8.4170e-05, -7.3751e-05,
        -2.7989e-05, -8.2322e-05, -4.0785e-05, -5.0378e-05,  9.4822e-05,
        -2.7109e-05,  7.1362e-05, -9.1963e-05, -1.3361e-04, -2.9840e-05,
         9.5578e-05, -2.6880e-04,  2.4730e-01, -9.3381e-05,  3.8275e-05,
        -1.0330e-04,  3.3035e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5984e-05, -5.7381e-06, -2.6427e-06,  2.0915e-06,  7.9403e-06,
         9.7297e-06,  1.6252e-05, -5.8009e-06,  6.9476e-06,  8.6320e-06,
        -4.8584e-06,  1.8655e-06,  1.4814e-06,  1.5177e-04,  1.6318e-05,
         1.2824e-04,  3.3184e-04,  2.8263e-04,  2.9248e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.8577e-05, -5.1219e-06,  3.1767e-06, -2.4425e-07, -1.0055e-05,
        -3.3968e-06, -2.6444e-06, -9.8716e-06,  4.6635e-06, -2.7003e-06,
         1.3742e-06,  9.0819e-06, -1.4652e-06, -2.1973e-02,  1.2415e-06,
        -1.1778e-04,  2.6506e-03, -1.1078e-05, -1.4128e-05, -1.9775e-02,
        -7.2161e-06,  9.7536e-06, -4.0706e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5635e-05,  5.4805e-05, -3.8170e-05, -1.1751e-05, -7.9286e-05,
        -2.7536e-05,  3.9100e-05, -9.1333e-05, -1.7702e-05, -5.8108e-05,
        -4.9248e-05, -9.8243e-05, -2.0719e-05,  1.0025e-04, -4.7868e-05,
         1.6239e-05,  8.2928e-05, -4.6554e-05,  1.6735e-04, -6.4462e-03,
        -1.5228e-01, -2.7251e-05, -1.0356e-04,  1.2767e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3385e-05, -1.9725e-05,  4.6943e-06,  5.6449e-05, -4.1036e-05,
        -4.3342e-05,  2.0284e-05, -5.2373e-06,  4.0720e-05, -1.3899e-05,
         1.1112e-05, -2.7945e-05, -2.8729e-05, -8.2363e-02, -3.4511e-05,
         2.2313e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1865e-05,  6.4181e-06, -6.6605e-05, -7.9743e-05, -1.5777e-04,
        -8.9801e-05, -8.2466e-05, -1.2882e-04, -1.6805e-05,  2.5887e-05,
        -1.2822e-04, -1.8563e-05,  1.3483e-04,  2.5353e-01, -1.4000e-04,
        -2.9919e-03, -2.2156e-02, -7.1555e-05, -4.5565e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8207e-05,  3.8007e-05,  2.0974e-05,  2.4379e-05, -1.2726e-06,
         1.2370e-05, -5.4235e-05, -1.5361e-05, -4.4424e-05,  2.6433e-05,
         1.9072e-05,  4.7677e-05, -1.9275e-04, -1.3640e-01,  3.3860e-05,
         6.1092e-04, -1.0674e-02, -7.0354e-05, -1.7519e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3973e-05,  3.1052e-05,  7.6620e-05,  8.5096e-05,  9.3522e-05,
        -1.4701e-05,  9.3469e-05,  2.9368e-05,  9.7866e-05, -8.3931e-06,
         8.0485e-05,  1.0590e-05, -2.6256e-05, -2.9637e-05, -1.7773e-01,
        -3.3271e-05,  3.3748e-03, -1.2018e-04,  6.3517e-05, -2.7425e-02,
         9.1499e-05, -7.0948e-05, -2.3986e-05, -1.0656e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0733e-04, -1.7254e-05,  2.0284e-05,  2.6997e-05, -1.5437e-07,
        -8.4706e-06, -3.7494e-05, -6.3184e-06,  5.0570e-05,  5.1263e-06,
         3.3609e-05, -2.8326e-05, -4.5031e-05,  3.1627e-05, -8.5815e-02,
        -4.8934e-06,  1.8258e-03,  1.9163e-05, -9.2344e-06, -1.3270e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4097e-05,  3.6472e-05,  2.4487e-06,  6.5736e-07,  3.7911e-05,
         2.4163e-05, -4.5766e-07,  1.3972e-05, -1.0118e-05,  6.2648e-06,
         1.4227e-05, -1.0390e-05, -4.0575e-06,  5.2317e-05, -3.4995e-02,
         1.6813e-05,  2.2509e-05, -6.9790e-05,  1.6694e-05, -4.6155e-02,
         2.9966e-05,  3.6771e-05,  1.3638e-05,  1.9620e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7445e-06, -3.4981e-05,  2.9699e-05,  3.5816e-05,  2.7503e-05,
        -2.7876e-05,  1.6702e-05, -5.7346e-05, -8.8389e-06, -1.6702e-05,
        -1.7790e-05,  3.3704e-05,  9.5775e-02, -4.9688e-06,  9.4053e-02,
         2.1934e-05,  1.0482e-02, -5.2791e-06, -9.6450e-06,  5.6298e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5331e-04,  1.0311e-05, -4.7941e-06, -1.3188e-05, -6.9480e-06,
         1.1346e-05,  6.5289e-08, -1.0556e-05, -3.3133e-06,  3.0114e-05,
        -1.2466e-05, -2.0559e-05, -2.4619e-02,  8.1337e-06, -4.8687e-03,
         2.6639e-05, -9.8964e-03,  1.6298e-05, -7.7786e-06, -2.3185e-02,
        -3.3108e-05,  1.8998e-06,  5.1546e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1950: [tensor([-5.0167e-05,  3.7955e-05, -1.3891e-05,  3.6660e-06, -1.3821e-06,
         2.1673e-05,  3.5796e-05,  1.5317e-07,  1.0182e-05,  2.1408e-05,
         1.8718e-05,  2.1830e-05, -1.1064e-05, -1.7049e-02, -5.2016e-05,
        -2.1430e-03, -4.1001e-05,  1.1865e-03, -8.3931e-06, -8.9587e-02,
         1.1144e-05,  1.6867e-05, -3.2685e-06, -1.9399e-05,  7.1323e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5747e-04, -9.2233e-06, -4.4163e-05, -5.7023e-05,  8.7986e-05,
        -3.7617e-05, -1.6712e-05,  5.0978e-05, -7.4650e-05,  4.7431e-05,
         1.2531e-05, -6.2510e-05,  3.3828e-05, -2.3503e-02, -1.2697e-04,
         1.5944e-01, -6.3283e-05, -1.1430e-04, -2.5049e-04,  7.6512e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0012e-05, -1.9532e-05,  2.2693e-05, -4.5023e-06, -4.8099e-06,
         1.0308e-05, -3.5761e-06,  8.1276e-06, -4.3280e-05,  2.0271e-06,
         1.0840e-05, -5.1738e-02, -5.3563e-06, -5.4730e-06, -1.4125e-02,
        -8.8826e-07, -1.8695e-06,  1.2552e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4998e-04, -1.2566e-05, -4.2733e-06,  8.7335e-06,  1.3324e-06,
         3.9963e-05, -1.1983e-05,  3.4126e-05, -3.2450e-05,  1.0203e-05,
         2.1428e-05, -3.6826e-02,  2.7081e-05, -4.6396e-03,  8.8874e-05,
        -2.1520e-02, -3.4462e-02, -1.4260e-05,  3.9827e-05,  4.9871e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1019e-04, -3.8522e-05,  8.1964e-05, -5.9211e-05,  3.2656e-05,
        -1.4593e-04,  8.3391e-06,  1.2336e-05,  2.3542e-05, -2.9632e-05,
         5.0434e-05, -9.7293e-03,  7.8980e-05,  1.2875e-01, -1.8215e-04,
         9.9473e-05,  1.4111e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2499e-04, -4.4241e-06, -2.0759e-05, -2.7868e-05, -9.1776e-06,
         2.8949e-06,  1.5237e-05,  1.2738e-05, -4.2664e-05,  4.8624e-06,
        -4.9605e-06, -1.1759e-02,  3.7726e-05,  1.3210e-05, -3.4687e-02,
        -2.0437e-02, -2.3621e-05, -1.6384e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0595e-04,  3.0895e-05,  7.1015e-06, -2.1279e-05, -1.0354e-04,
        -1.0928e-05,  7.0099e-05,  3.9318e-06,  4.9162e-05, -9.2414e-06,
        -8.9760e-06,  1.0324e-01, -2.6017e-05, -5.2347e-05,  1.1893e-05,
         1.7817e-04,  1.1142e-01, -8.2869e-06,  7.0880e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6894e-06, -8.4844e-06, -4.9693e-06,  1.1306e-05, -3.6947e-06,
         2.5310e-06,  4.9256e-06, -1.8677e-05, -8.2289e-06, -1.7681e-05,
        -1.1156e-05, -2.1576e-02,  3.0841e-05,  3.3748e-05, -8.5654e-03,
         7.6527e-03, -2.6101e-05, -1.4832e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6527e-05,  5.5211e-07,  1.2431e-05,  4.0611e-06,  1.1235e-05,
         3.3344e-06, -3.6756e-07,  1.7222e-05,  7.9248e-06, -1.5005e-02,
        -2.1856e-02, -3.1996e-06, -1.1463e-02, -1.5229e-02, -1.4734e-05,
        -2.7778e-05,  1.7484e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7476e-04,  4.3311e-05, -7.6174e-05, -3.6091e-06, -3.3381e-05,
         5.5802e-05, -3.7666e-05, -9.3822e-06,  1.1377e-01, -6.5296e-05,
         8.3326e-05,  9.5716e-06, -1.5860e-04, -3.0526e-05, -3.7338e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5310e-05,  1.6101e-06,  3.8024e-05, -8.8774e-06,  7.9137e-06,
         4.8149e-06, -8.3212e-06, -5.8527e-06, -1.1605e-02,  7.3665e-05,
        -5.1914e-03, -6.3943e-03, -1.1494e-02,  1.6924e-06, -2.5238e-05,
         1.0120e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2473e-04,  4.0802e-06, -2.6135e-06, -1.3165e-05, -2.9193e-06,
         1.6276e-05, -3.1443e-06, -1.0661e-06, -6.3129e-06, -5.0410e-06,
         1.3230e-05,  4.8756e-06,  1.4353e-05,  4.3046e-07,  3.5099e-06,
         1.5907e-05,  1.8245e-06, -4.8059e-06,  8.5158e-04, -1.0727e-05,
        -6.7585e-04, -5.2691e-06,  2.6586e-06, -1.4431e-02,  8.4497e-04,
        -2.9792e-02,  4.3793e-06, -1.1390e-05, -5.7650e-06], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2000: [tensor([-7.5425e-05, -1.8229e-05,  2.4377e-05,  2.3652e-06,  1.1771e-05,
        -1.8777e-05,  1.6667e-05,  3.5232e-05,  4.3742e-05, -1.7555e-05,
         9.2938e-06,  4.5428e-05,  3.0779e-05,  1.1723e-05,  3.2411e-06,
        -2.7156e-05,  1.0178e-05, -5.6703e-06,  1.3584e-05, -1.3530e-05,
         9.0711e-06,  6.4229e-02,  1.8987e-05, -7.2971e-06,  9.0776e-05,
         6.4896e-02, -3.5452e-05,  1.8476e-04, -3.3641e-05, -1.2727e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2943e-04,  1.0769e-05,  9.1219e-07,  1.1794e-05,  1.0004e-05,
        -1.1795e-05,  4.0574e-06, -1.8314e-06,  6.7727e-06,  6.8249e-07,
        -8.5701e-07, -5.0864e-06,  4.4230e-06, -1.1697e-05,  2.0447e-05,
         1.0705e-07, -1.1189e-05, -6.5477e-06, -6.7181e-06, -8.0453e-03,
        -6.0509e-07, -1.4856e-02,  1.5165e-05, -5.4652e-04, -2.7353e-02,
         7.7082e-06,  8.8415e-06,  2.0868e-06, -2.1616e-02, -6.1629e-07,
        -5.0724e-03, -7.1084e-03, -1.2653e-06, -1.6674e-05, -5.4880e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2534e-05,  1.9550e-05,  1.6841e-05,  2.4461e-06,  2.0585e-06,
         1.2786e-05,  3.9365e-06, -4.0225e-06, -1.5487e-05,  1.4112e-05,
         2.7157e-05,  3.7399e-05,  2.8940e-05,  2.0213e-05, -2.1093e-06,
        -1.5660e-05, -3.3715e-05, -2.3209e-05,  3.8745e-06, -7.0490e-06,
         2.1675e-05, -4.3595e-05,  3.6097e-03,  3.3283e-05, -7.6059e-02,
        -2.2758e-06, -6.9544e-05,  1.9264e-05, -4.6396e-05, -1.2285e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3694e-04,  4.4361e-05, -6.4386e-07,  5.0461e-05,  3.7746e-05,
         5.3734e-05, -2.5511e-05, -2.2119e-05, -4.2590e-05,  1.0815e-04,
        -2.4863e-05, -2.7013e-05, -9.6121e-06, -1.4406e-05,  4.7705e-05,
        -2.3361e-05,  3.9155e-05, -6.2978e-05, -2.4196e-05,  1.0191e-01,
         4.5137e-05, -3.6913e-03, -6.4640e-05,  4.1606e-05,  8.2527e-02,
        -3.2353e-05,  4.3141e-05,  3.1084e-05, -2.8052e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0231e-04, -1.3586e-05, -3.0674e-05, -1.9663e-05,  3.6569e-06,
        -6.4513e-06, -3.2576e-05,  9.7991e-06, -2.2836e-06, -1.1778e-05,
         1.6999e-05, -3.9445e-05, -6.1106e-05, -1.4509e-05,  1.7322e-05,
        -2.4747e-05, -1.0211e-05,  2.7197e-05,  3.2568e-06, -2.7146e-05,
         4.5584e-02,  4.3666e-05, -2.0245e-05, -8.1949e-06,  3.2115e-05,
        -1.9897e-04, -4.6177e-05,  5.6695e-02,  4.9244e-05,  6.4271e-03,
        -2.8248e-05, -5.5153e-04, -4.3326e-06,  5.7894e-02,  5.2899e-05,
         3.3458e-05,  7.7575e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4269e-05,  8.1449e-06,  1.3833e-05, -3.2741e-06,  3.2649e-06,
         7.5566e-06, -1.3118e-05,  1.3977e-05,  2.2874e-05,  6.8401e-06,
         4.6053e-06, -2.7501e-05,  1.2884e-05,  4.4481e-06, -9.1332e-06,
         1.7150e-05,  2.3716e-05,  1.4600e-05, -4.9117e-06, -2.0349e-05,
         6.1069e-03,  3.6639e-05,  5.8665e-06, -9.0102e-04,  7.5832e-02,
        -3.6225e-05,  3.7835e-06, -9.9412e-06, -4.8567e-05, -5.6541e-03,
        -2.5420e-05, -1.1081e-05, -1.5700e-05, -6.8631e-06,  2.6573e-02,
         1.1362e-05,  3.4505e-05,  6.2234e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6805e-05, -6.0461e-06, -1.7525e-05,  1.9545e-05, -1.4709e-05,
         2.3711e-05,  2.7405e-05, -2.3038e-05,  3.8092e-05, -1.3147e-05,
        -6.2520e-06, -4.8370e-06,  1.2657e-05,  1.5714e-05,  5.0468e-05,
        -7.9710e-06, -4.6487e-06, -5.2935e-06,  1.4311e-05, -1.5800e-02,
        -2.2878e-05,  8.4184e-05, -6.5971e-02,  4.0675e-06,  1.3656e-05,
        -2.6737e-05,  1.1432e-05, -6.7521e-02, -1.7410e-05, -4.0246e-05,
         2.5829e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4546e-05, -2.4150e-06, -1.5375e-05,  1.8386e-05,  1.0595e-05,
        -1.0673e-06,  4.5845e-06, -9.4579e-06, -1.8356e-05, -8.7517e-06,
         9.3037e-06,  9.0170e-06,  7.6435e-07,  6.0128e-06,  5.2172e-06,
         5.0510e-06, -2.3640e-05,  9.1130e-06,  1.2892e-05,  1.6375e-07,
         1.9430e-05,  6.7243e-06, -6.8285e-06,  1.7053e-04, -3.1166e-05,
         4.8693e-05, -5.7490e-02, -3.2286e-05,  9.2717e-06, -2.9878e-05,
        -5.4454e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0434e-05,  2.8408e-05, -4.5730e-05, -2.1942e-05,  5.9392e-05,
        -7.2540e-06, -1.1222e-05, -4.2010e-05, -2.5288e-05,  5.1830e-05,
         1.5440e-05, -2.5005e-05, -9.5714e-06, -2.1484e-05,  2.0524e-06,
         7.3813e-06,  4.3738e-05,  3.4502e-05, -8.6304e-06,  8.6500e-02,
        -3.1372e-05, -1.4229e-03, -2.5684e-06,  1.0830e-01, -3.6451e-05,
        -1.5419e-03, -6.7297e-05,  2.2215e-04, -2.3749e-05, -9.7117e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2947e-04, -3.3844e-07,  1.2849e-05, -5.8010e-06, -2.0187e-05,
        -3.8680e-06, -6.0672e-06, -9.6427e-06,  3.3117e-05, -1.0672e-07,
         1.4922e-05,  1.4155e-05,  6.1323e-07,  4.2171e-07, -2.9465e-06,
        -6.9286e-06, -2.5737e-05,  1.2628e-05,  1.5221e-05, -3.1146e-02,
        -2.4074e-05, -2.0467e-05, -2.4127e-02, -1.8894e-05,  1.5648e-05,
        -4.4839e-02, -2.8260e-05, -6.7293e-05, -2.1028e-05,  3.7795e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3217e-05, -1.2773e-05, -5.9228e-06,  8.4380e-06, -2.0395e-06,
        -6.8232e-06, -8.0460e-06, -1.2577e-05,  3.7447e-06, -1.2203e-05,
         9.5395e-06,  1.9179e-05, -7.4196e-06, -2.8640e-06,  1.1880e-05,
        -4.0752e-06, -2.0787e-05,  1.6170e-05, -4.8006e-06, -9.5235e-03,
        -2.2322e-05, -1.5517e-02,  5.2492e-06,  4.8733e-04, -2.3028e-02,
        -1.8869e-05,  2.2546e-04, -3.1594e-02,  8.7773e-06,  2.0022e-04,
        -2.1832e-02, -9.3602e-06,  3.5383e-05, -6.8227e-03,  7.9303e-06,
         7.0144e-05, -2.6848e-05, -3.1424e-05,  1.0989e-05, -1.0961e-05,
        -1.0704e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2759e-05,  1.5626e-06, -2.9080e-07, -9.3308e-06, -4.4391e-06,
         1.0941e-05, -7.7774e-06, -6.1992e-06, -2.3150e-05, -1.2309e-06,
         3.7323e-06,  7.2629e-06, -1.0327e-05, -4.5637e-06, -4.9489e-06,
        -1.9792e-05, -2.4993e-05, -1.2881e-06,  3.8651e-06, -2.3016e-02,
         1.2360e-05, -5.5973e-03, -1.7425e-02,  8.9707e-05, -1.5288e-05,
         3.7440e-06, -3.5254e-02, -2.2904e-05, -4.8698e-06,  7.7295e-04,
         7.6447e-06,  3.5338e-06, -6.0723e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2050: [tensor([-6.3633e-06,  5.9220e-06,  5.6999e-06,  8.8429e-06,  8.8148e-06,
        -1.5008e-05, -3.6924e-06,  6.1112e-06,  5.3217e-06,  2.7955e-06,
        -2.3445e-06, -1.0174e-05,  2.2013e-05, -3.1852e-06,  6.6945e-06,
        -1.1939e-05, -2.1512e-07,  5.2298e-06,  8.5329e-06,  1.4708e-08,
         4.3095e-06,  1.3062e-05,  3.2874e-03,  5.9796e-07, -1.6134e-05,
        -2.3097e-02, -1.9619e-05, -3.4867e-02, -3.3545e-06, -1.1262e-05,
        -4.6752e-05, -1.4170e-05, -3.5273e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7724e-05,  4.3901e-07,  2.2961e-05,  4.1283e-06,  9.3366e-06,
        -7.0320e-06, -2.9030e-06, -5.7934e-06, -2.4821e-06,  9.3491e-07,
        -1.5357e-05, -1.5348e-05,  1.8519e-05,  1.6673e-05, -4.0922e-06,
         1.2152e-05, -8.1698e-07,  5.4766e-06,  4.5930e-06, -5.3363e-06,
         8.1582e-06, -6.9452e-06, -3.7348e-02, -2.5794e-05, -3.1333e-02,
         3.3512e-05,  5.9160e-05,  9.3739e-05, -3.5821e-02,  1.1365e-05,
        -1.7456e-05, -5.6043e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6647e-05,  1.3060e-05,  2.9393e-05, -4.6554e-06, -3.5786e-05,
        -1.7761e-06,  1.1151e-05, -1.3500e-05, -1.2440e-05, -1.0254e-05,
        -1.1580e-05,  3.4164e-06,  2.2040e-05,  1.3398e-01,  1.9986e-05,
         6.6912e-02,  3.1587e-06, -3.4449e-04, -1.6125e-04,  4.5764e-03,
         4.1004e-05, -4.9853e-03, -4.7078e-05, -1.8853e-05,  2.4622e-05,
        -7.6565e-05,  7.5158e-05,  6.9799e-05,  4.1522e-05,  3.1058e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3907e-05,  2.1090e-05,  2.9381e-05,  2.4317e-05, -1.3573e-05,
         5.0604e-05, -3.0458e-05, -1.6073e-05,  4.7536e-06,  3.0423e-07,
         2.6835e-05, -2.7144e-05,  5.8542e-06,  7.4923e-02,  6.3379e-05,
         6.0250e-02, -2.6594e-04, -4.7469e-05, -1.7885e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7850e-04, -2.7599e-05, -6.2375e-05,  1.0075e-04, -7.3103e-05,
         8.7326e-05,  2.3823e-07,  5.0342e-05,  1.8082e-05, -1.3629e-05,
        -5.7872e-05,  4.2190e-05, -4.9815e-05,  2.5781e-01, -1.0808e-04,
        -9.1073e-03, -1.7800e-05, -1.6711e-05,  1.0715e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8355e-05, -3.8317e-06, -1.3715e-05, -1.0129e-05,  3.7187e-05,
        -2.0799e-05,  8.9991e-06, -1.0504e-05,  5.3775e-05,  2.4725e-05,
        -1.0539e-05,  2.5156e-05,  2.3151e-05,  6.7125e-05,  7.0790e-06,
         1.5366e-05, -2.4939e-05,  1.8111e-06,  2.8119e-05,  1.9936e-05,
         1.6537e-05, -5.4286e-05,  2.2753e-05, -3.4143e-05,  4.9519e-05,
        -1.0514e-05,  3.2634e-05,  1.7814e-05,  4.5456e-05,  5.5332e-06,
         1.4612e-01, -2.2858e-05, -1.6494e-05,  1.0058e-02, -7.9687e-05,
        -3.8210e-04,  2.5655e-05, -1.1682e-03, -5.2468e-05,  2.2711e-06,
        -1.8365e-04, -2.7052e-05, -1.6065e-05, -2.6646e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0456e-04,  2.8894e-05, -1.2570e-06,  2.0500e-05, -6.6225e-06,
        -5.2524e-06, -1.0504e-05, -1.5083e-05,  3.8157e-05,  1.1759e-05,
         2.9627e-05, -3.4863e-06,  1.7680e-05,  8.6558e-07,  2.5916e-05,
        -3.6101e-05, -6.3681e-06,  3.0333e-05, -1.2024e-05, -1.2289e-05,
        -3.4049e-05,  4.1450e-05,  1.7090e-05, -1.0343e-06,  5.5072e-06,
         2.9635e-05, -1.7390e-05, -1.4265e-05, -2.2302e-05, -1.2703e-05,
         1.1604e-02,  3.7581e-05,  4.4107e-05, -1.3526e-01, -4.2263e-05,
         8.2417e-06,  2.3630e-05,  1.8453e-04,  3.4440e-05,  1.4720e-05,
        -5.2286e-05,  1.2963e-03,  2.6497e-06, -9.7905e-05, -6.0232e-06,
         1.2131e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6241e-05,  1.2828e-05, -8.5388e-06,  1.0529e-05, -2.9432e-06,
         1.1619e-05,  1.2967e-06,  1.9768e-05, -6.6364e-06,  1.1289e-05,
         1.6256e-05,  8.3906e-06,  7.4888e-06,  1.3185e-05, -3.1031e-06,
         3.8162e-06, -6.4141e-06,  2.9678e-06, -1.2987e-05,  9.1307e-06,
        -6.2395e-07, -9.2029e-06,  9.6540e-06,  2.1244e-05, -7.9908e-06,
         1.5927e-05,  1.5184e-05,  9.1183e-06, -2.8927e-06, -1.2641e-05,
        -5.7652e-05,  2.4075e-03, -1.5949e-05, -9.9055e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0987e-05, -3.4227e-05,  3.6925e-06, -2.3634e-06, -6.6509e-05,
        -5.3893e-05,  2.1395e-05,  1.8244e-05, -7.4864e-05,  5.3680e-06,
        -5.7576e-05, -5.4974e-05,  2.7505e-05, -4.7416e-05, -2.2766e-05,
         3.3230e-05, -1.3796e-02,  4.6918e-05,  4.1554e-05,  1.3730e-01,
         2.0047e-05, -3.4708e-06, -2.6945e-03, -8.4266e-05, -4.9910e-03,
        -1.3892e-04,  9.7594e-07, -3.9067e-05, -4.9364e-05,  4.4804e-05,
         5.8906e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9121e-05,  7.5086e-05, -3.2734e-05,  3.5083e-05, -1.7748e-05,
         5.6585e-06,  2.1897e-05,  8.4842e-05,  2.0843e-05, -2.1741e-05,
        -6.7694e-06,  8.9333e-06, -2.2398e-05,  4.7349e-05, -2.6315e-06,
         4.0792e-05,  1.2812e-01,  5.2591e-05,  8.1273e-02,  7.3338e-05,
        -4.8339e-06, -3.5049e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9767e-05,  2.9540e-06, -1.7603e-05,  1.8853e-05, -1.8218e-05,
         9.8674e-06,  2.6435e-05,  1.8625e-05,  5.1888e-05, -8.1586e-06,
         3.8050e-05, -8.9129e-06, -6.6935e-06, -2.5919e-05,  1.2240e-05,
         9.4191e-06,  4.4005e-03,  2.6528e-05, -1.0109e-04, -1.5260e-01,
        -3.3660e-05,  4.5575e-06,  2.1361e-05,  1.0169e-03, -7.9436e-06,
         2.3690e-03,  4.3066e-06,  4.5083e-05,  3.7250e-05, -3.4634e-05,
        -4.5662e-06,  5.1042e-05, -4.0894e-05,  2.4161e-05,  1.3598e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5202e-05,  5.7125e-07, -1.4247e-05, -2.4439e-05,  4.0585e-06,
        -7.0751e-06,  3.4532e-06, -1.3266e-05, -4.1934e-06,  3.2809e-05,
        -1.2527e-05, -2.8217e-05, -1.6258e-05, -2.3342e-05,  1.9920e-06,
         1.1335e-05, -4.5869e-06, -3.8369e-02,  1.8218e-05, -3.4232e-02,
         4.0764e-05,  8.8105e-06,  1.8643e-03,  4.0145e-05,  2.8823e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2100: [tensor([ 1.3401e-06,  1.5435e-05, -2.3240e-05,  2.4002e-05,  3.6227e-07,
         1.2133e-05, -3.3704e-05,  3.3243e-05,  3.0546e-05, -1.4463e-05,
         2.2401e-06,  8.3706e-07,  2.0965e-05,  2.2084e-06,  4.3467e-05,
        -2.7772e-02,  2.5234e-05,  3.9030e-05,  5.8877e-05, -2.0611e-02,
        -3.8063e-02, -2.2445e-05,  9.7099e-06,  6.3125e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3733e-05, -6.4336e-05, -5.8328e-05,  4.6838e-05,  2.7951e-05,
        -1.9807e-05, -5.3218e-05,  2.5006e-05, -1.5309e-05, -3.1071e-05,
        -3.3007e-05, -5.9420e-05,  8.9709e-05, -3.3356e-05,  3.7651e-06,
         1.4201e-01, -5.8857e-05,  6.4693e-05, -5.2518e-05,  1.9929e-05,
        -1.7995e-05, -6.2977e-05, -9.0471e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8554e-05, -2.2070e-06, -7.4085e-05, -1.1866e-05, -5.4900e-05,
        -5.6169e-05, -6.1542e-06,  1.5866e-05, -4.3460e-05,  4.4834e-05,
        -1.6730e-06,  9.4465e-06, -1.7153e-05, -5.1680e-05,  2.6386e-05,
         2.5587e-03, -1.4926e-05,  3.5643e-05,  2.4760e-05,  5.1896e-05,
        -1.8893e-05,  2.6927e-05, -1.2376e-01, -1.4733e-05, -2.1279e-05,
        -2.9853e-05,  1.7643e-05, -1.2484e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4256e-05,  3.6209e-05,  4.4349e-05,  2.7675e-06,  1.8937e-05,
        -8.1014e-06, -5.9387e-05, -3.6090e-05, -5.1499e-05, -2.2161e-05,
        -4.2891e-05, -2.5750e-05,  1.3169e-05,  3.2106e-06,  4.6740e-05,
         2.3658e-05,  2.6594e-05, -1.3838e-05,  2.3465e-01,  1.1598e-04,
         9.0424e-05, -1.5581e-04, -1.4551e-02, -1.6410e-05, -6.5063e-05,
         2.3276e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1211e-05,  4.4290e-05,  4.6447e-05,  2.6105e-05, -6.4476e-05,
        -2.6075e-05, -1.7970e-07,  2.0449e-06, -6.2351e-06,  4.3666e-05,
        -1.8513e-05, -2.2995e-05,  8.3977e-05, -1.8341e-05, -1.9908e-05,
         1.7461e-05, -2.0541e-07, -3.6833e-03, -2.1048e-06, -6.0560e-05,
        -1.4969e-01, -8.9043e-05, -1.8313e-05, -1.4453e-05, -9.2591e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2123e-05, -3.5321e-05,  1.2174e-05,  9.7639e-06, -1.0009e-04,
         2.9452e-05, -8.3223e-05, -2.4616e-05, -4.0037e-05, -6.4929e-05,
         3.8649e-05, -4.4231e-05,  5.4956e-06,  3.9531e-05,  1.3364e-05,
        -4.7027e-05, -1.6304e-05, -1.4025e-02,  5.0237e-05,  1.0515e-04,
         3.3236e-03, -5.0113e-04,  5.0660e-02,  1.6642e-01, -5.4106e-05,
         2.3633e-05, -6.5755e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8457e-06, -2.5339e-05, -2.2099e-04, -4.7278e-05,  3.5048e-05,
        -1.4468e-04, -1.1754e-05, -6.5910e-05,  4.1457e-05, -4.0717e-05,
        -5.7308e-05, -7.1669e-06,  1.2714e-05, -1.8743e-04,  1.3434e-05,
        -2.7686e-06, -2.7118e-05,  2.3426e-01,  6.4461e-05, -7.6817e-04,
        -8.0013e-03,  3.3058e-05,  8.2517e-05, -4.2634e-05, -1.4553e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1251e-05, -1.5849e-05,  5.5691e-06, -3.0875e-07,  2.6034e-05,
         2.1901e-05, -2.5790e-07,  4.5588e-06, -1.5655e-05,  1.2575e-05,
        -2.5529e-05, -7.0574e-05, -1.1614e-05,  7.2359e-05,  8.0420e-06,
        -1.3498e-05,  1.3716e-05, -2.0814e-05, -8.7928e-02, -1.2522e-05,
         1.3737e-03, -8.5556e-03, -2.8215e-02,  5.7799e-05, -4.5038e-05,
        -9.5544e-06, -1.1267e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6807e-06,  1.0751e-05,  5.8104e-05, -2.1594e-06, -2.9526e-05,
        -1.4779e-05,  3.1387e-05, -1.6183e-05,  4.0698e-05,  1.3452e-06,
         1.8828e-05, -1.6665e-05,  1.3407e-05, -4.2402e-05,  3.5371e-06,
         1.0217e-05, -3.8010e-05,  5.0578e-05,  1.0095e-02, -5.9299e-09,
        -1.1452e-03,  1.3816e-01, -2.5091e-05,  1.5857e-02, -1.6256e-05,
        -1.3555e-04, -6.5555e-05, -4.5713e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9423e-05, -4.6132e-05, -2.1523e-06, -1.8258e-05, -1.6015e-05,
        -1.5418e-05, -7.6064e-05, -2.8273e-06,  2.4428e-05,  3.1450e-05,
         7.2640e-06,  2.6855e-05,  1.7533e-05,  1.1681e-05,  1.7223e-05,
         3.8171e-06, -4.3988e-06,  4.0653e-05,  1.1245e-01, -1.3484e-05,
         6.4565e-02, -7.1590e-05,  1.9176e-05,  4.3620e-02,  7.8849e-05,
         2.5921e-05, -3.1618e-04, -1.5528e-05, -5.3638e-06,  6.2734e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0637e-04, -1.6219e-05, -6.9717e-06,  1.9887e-05,  6.4376e-06,
        -4.9100e-06,  1.2082e-05,  7.4332e-07,  7.6434e-06, -7.3252e-07,
         9.0369e-06, -3.3027e-06, -3.4215e-07, -2.0635e-06, -3.9496e-06,
        -1.3087e-05,  8.1192e-07, -2.4861e-06,  8.7070e-07, -3.6563e-06,
         6.5666e-06, -7.2134e-06,  9.4689e-06, -7.8737e-06,  2.6974e-02,
         2.7474e-05,  7.0064e-04,  1.0300e-05, -2.3198e-02, -7.7811e-06,
        -3.0018e-02, -1.2532e-05, -2.3543e-05, -3.9215e-05,  7.8886e-06,
         6.3521e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3935e-05, -4.2477e-06,  9.2934e-06, -1.5436e-05, -6.2581e-06,
        -1.6532e-05,  5.1866e-06, -2.1908e-06, -9.8974e-06,  3.1615e-06,
        -7.3778e-07,  2.3108e-06,  2.7258e-06, -2.1831e-06,  2.0913e-06,
        -9.7845e-06,  4.4411e-06, -1.0218e-05, -1.6962e-06, -7.1839e-06,
        -3.2897e-06,  7.5063e-06,  3.4084e-06,  2.1902e-07, -3.2606e-06,
         1.1398e-06, -4.4374e-03,  1.3022e-05, -2.1947e-02,  1.4707e-05,
        -7.0218e-06,  2.1002e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2150: [tensor([-1.5638e-05,  1.0346e-05,  1.9719e-05,  5.4768e-05, -2.7970e-05,
         4.0015e-05, -1.2116e-05, -4.5917e-05, -4.9357e-05, -4.7656e-05,
        -2.3390e-05,  4.5086e-05,  5.3750e-05, -2.6710e-05, -4.4594e-05,
         1.1157e-01,  1.0291e-07,  1.2496e-01, -4.4177e-05, -3.9798e-05,
         7.3319e-05, -3.7915e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1308e-05,  2.8831e-06,  2.0177e-05,  5.5918e-06,  1.6143e-05,
        -2.7308e-05, -5.5094e-06,  1.2730e-06,  7.3986e-06,  2.2598e-05,
         3.1751e-06, -6.6526e-06,  1.2284e-05, -1.1586e-05,  1.6066e-05,
         1.9205e-05,  2.0038e-06, -1.5916e-02, -2.5767e-05, -2.5603e-02,
        -4.8597e-06, -1.7028e-05,  4.8908e-06,  1.1320e-03, -2.9878e-02,
         1.0223e-05,  2.9660e-05,  1.7619e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1096e-04,  3.7405e-05,  5.6140e-05,  4.9236e-05, -5.6292e-05,
         7.4393e-05, -4.0399e-05,  3.9669e-05,  3.4692e-05,  2.3528e-05,
        -4.6506e-05,  3.4550e-05,  2.8225e-05, -1.8611e-05,  9.1049e-05,
         7.4165e-05,  4.3331e-02,  9.8033e-05,  5.4312e-05,  3.1054e-02,
         1.5425e-01,  8.6581e-05, -1.5156e-05, -3.7832e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4783e-05,  1.2185e-04,  3.3288e-05, -1.3263e-04, -1.6011e-04,
        -4.7927e-05, -2.0115e-04, -1.6927e-04, -9.5877e-05,  6.3723e-05,
        -8.5880e-05, -1.2871e-04,  1.7754e-04,  1.5106e-04,  9.0885e-05,
         2.7075e-01,  6.7700e-05, -7.3113e-05,  5.1061e-05,  1.7668e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1089e-06, -1.1701e-05, -6.8056e-06, -4.2248e-05,  5.4685e-06,
        -1.7788e-05, -2.0948e-05,  1.3391e-05, -1.1493e-05,  3.0321e-05,
        -1.3045e-05, -1.9838e-05, -1.9690e-06, -5.4534e-05, -1.0854e-05,
        -3.5853e-05,  1.8923e-05, -1.0931e-04, -5.0368e-03, -8.6903e-02,
         7.0403e-06, -2.7807e-05, -5.3279e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3331e-05, -2.8482e-05, -3.8471e-06, -5.3694e-06,  7.1022e-06,
        -6.5233e-06,  4.1515e-05,  1.6158e-05,  4.5952e-06,  1.5904e-05,
         5.4618e-06,  3.6355e-06,  1.0908e-05, -1.0092e-05, -2.3324e-05,
         2.0899e-05,  1.1121e-05, -5.1062e-02, -1.2973e-05, -4.3743e-06,
        -1.6943e-04,  4.7929e-05, -2.7852e-02, -3.5747e-02,  4.0297e-05,
        -1.2182e-04, -1.4707e-04, -6.9603e-06,  1.5229e-05,  3.1803e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0174e-05, -3.3749e-06,  1.0308e-05, -8.1238e-06,  3.1649e-06,
         2.4081e-05,  6.0791e-06,  4.1597e-06, -3.7551e-06, -1.0341e-05,
        -5.7242e-06,  4.5834e-06,  1.7089e-06, -4.6522e-02, -1.1720e-05,
         2.1358e-05, -3.8990e-07, -2.9947e-05, -6.2483e-03, -1.8395e-02,
        -1.7029e-05, -2.5171e-04,  1.1269e-05, -2.6992e-07, -2.5233e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6730e-05, -3.2601e-05,  7.9722e-06, -1.0231e-05, -5.5658e-07,
         4.2232e-06, -4.9686e-06,  1.7380e-05,  5.9288e-06, -8.3268e-06,
        -1.7963e-05,  5.1733e-06, -5.8638e-02, -6.1198e-05,  3.4900e-05,
         7.3514e-05, -6.9302e-05, -8.0434e-04, -7.4557e-06, -5.9369e-02,
         2.7423e-05, -6.0983e-05, -1.2068e-05,  3.6215e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3956e-05, -4.5773e-08,  2.5383e-05,  1.3659e-05, -5.9064e-05,
         3.5825e-05, -2.2750e-06,  4.5723e-05,  2.5293e-05, -3.9907e-05,
        -5.1394e-06, -1.6773e-05,  1.5950e-02,  1.3139e-05,  3.9557e-05,
        -2.5670e-05, -8.1184e-02,  1.8606e-05, -6.2356e-02,  5.2909e-06,
        -1.1409e-04,  1.5499e-05,  3.4891e-05, -2.3266e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0496e-05, -7.3742e-06, -6.3662e-06, -3.7945e-06, -6.0624e-07,
        -1.8552e-06,  9.5971e-06, -9.3848e-06, -6.2942e-07, -1.1435e-06,
         1.5385e-06, -2.0760e-06,  6.1089e-07,  1.9900e-03, -6.6187e-06,
         5.5093e-05,  1.3361e-05,  6.1452e-06,  1.2798e-02,  2.1310e-06,
         1.1023e-05, -8.1050e-06, -9.0171e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7160e-05,  1.7844e-05, -1.2527e-05,  4.9436e-05,  4.4778e-05,
         6.7189e-06,  3.3876e-05,  1.0422e-05,  1.4638e-05,  2.4276e-05,
        -6.0217e-06, -2.0799e-05, -9.5870e-06, -8.1674e-05,  3.2204e-05,
         3.8024e-03,  7.2414e-02,  4.8623e-05,  4.6468e-05,  1.1849e-04,
        -4.4132e-05,  1.9914e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6615e-05,  2.2260e-06,  8.0161e-06,  4.1624e-06, -1.2891e-05,
        -1.5426e-06, -5.4945e-06, -3.4534e-06,  1.2474e-05,  1.5881e-05,
         5.7779e-06, -1.1854e-06, -1.5926e-05, -5.8548e-05,  2.9231e-05,
         1.2960e-03,  2.4352e-03,  1.9174e-05,  3.2351e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2200: [tensor([ 9.5695e-05,  5.3907e-05,  4.3192e-05, -8.1381e-05, -7.4538e-05,
         4.6234e-06,  5.1108e-06,  2.3556e-05,  1.9617e-05, -1.6779e-05,
         1.1552e-05,  3.4630e-06,  1.1087e-06, -5.6703e-05, -1.6057e-05,
        -4.6547e-05,  1.8467e-05, -6.2097e-05, -6.3186e-05, -4.1801e-05,
         1.5692e-05, -1.6432e-01,  1.6254e-05, -3.3885e-03,  5.0927e-05,
        -1.0978e-04, -8.4680e-05, -2.7512e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1750e-05,  7.1075e-06,  4.4624e-06,  5.1976e-07,  2.3061e-05,
        -1.0053e-05,  3.9699e-06,  1.1997e-06,  4.8385e-06,  2.0244e-05,
         1.1448e-05,  3.9236e-06,  3.5251e-06, -7.0160e-07, -7.6536e-06,
         3.4312e-06,  2.8613e-06,  3.8875e-05, -5.4305e-05, -1.0177e-05,
         1.3922e-02,  1.9383e-03,  1.3930e-06, -1.5208e-04,  1.6564e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1153e-06,  1.7670e-06,  3.5396e-05, -3.1995e-05,  4.7760e-06,
        -1.9916e-05,  5.6053e-05,  3.2149e-05, -2.2432e-05, -3.8524e-05,
        -8.0651e-05,  7.2341e-05,  3.5109e-05, -2.6523e-05,  2.0085e-05,
        -3.9100e-05,  2.4985e-05,  3.0578e-05,  7.6010e-05,  1.5786e-01,
         9.6340e-05,  7.6958e-05,  1.9182e-02,  3.3799e-05,  1.5296e-05,
         7.6363e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1013e-05, -3.5047e-05,  3.1373e-05, -5.1208e-05,  1.9393e-05,
        -2.7051e-05,  5.6721e-05,  5.0105e-06, -8.1836e-06, -5.4580e-05,
         9.4038e-05,  1.3769e-05,  1.4472e-05, -3.0561e-02, -7.9135e-05,
        -1.3193e-01, -3.4056e-05, -4.1610e-04, -4.6223e-05, -2.0270e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2109e-05,  1.2790e-05, -3.7641e-05,  3.2895e-05, -1.3105e-04,
        -6.9475e-05, -2.3659e-05,  5.2815e-07,  4.5963e-05, -1.5962e-04,
         1.8018e-04,  1.4601e-04, -3.1405e-05,  3.2324e-03, -2.2335e-04,
        -1.8229e-01, -1.3911e-05,  1.4878e-04,  3.7732e-05, -1.7571e-04,
         1.6508e-04, -9.8175e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8070e-05, -2.7690e-05,  5.1622e-06,  1.1039e-05,  2.1000e-05,
        -3.1187e-05,  2.2671e-05, -1.0401e-05,  1.1578e-06,  6.6236e-06,
        -8.2527e-06,  3.4593e-06, -2.8162e-05,  1.6864e-03,  1.6465e-05,
        -5.1966e-02,  2.0058e-05,  4.7439e-06,  3.7527e-05, -5.9626e-05,
        -1.5764e-05, -1.9710e-05, -4.6379e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5836e-05,  1.4769e-05, -8.5011e-06,  7.8504e-06,  1.4035e-05,
         1.4123e-05, -3.3809e-05, -9.4350e-06,  5.8060e-06,  2.3778e-05,
         2.6591e-05, -1.7829e-06,  3.2146e-06, -4.8712e-02, -2.3960e-05,
         8.6360e-06,  2.2434e-05, -1.1915e-05, -2.3937e-05, -1.9624e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3653e-05, -1.1483e-05, -3.2068e-05,  9.8724e-06, -6.7542e-05,
        -1.5445e-05, -1.4484e-05,  2.0628e-05, -1.4917e-05, -2.1496e-06,
        -2.1518e-05, -1.8209e-05, -5.6854e-05,  6.5107e-02, -9.0789e-06,
         9.5823e-06, -2.4680e-06,  2.3632e-02, -1.7203e-06, -1.6182e-05,
        -3.7963e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4097e-04,  5.8883e-06, -6.9544e-06,  1.8550e-05,  4.1808e-05,
        -3.5722e-06, -1.3847e-05, -2.2947e-05, -5.5824e-05, -1.8199e-05,
        -5.0069e-06, -3.7335e-05, -2.0459e-05, -1.2734e-01,  5.0822e-05,
        -1.1504e-05, -8.5534e-05, -9.7338e-05, -1.6247e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1975e-05,  1.9256e-05,  7.2182e-05,  1.5229e-05, -2.0840e-05,
         3.9652e-05, -8.0796e-06, -1.5700e-05,  1.5981e-05,  5.9192e-06,
        -4.2186e-07,  1.1051e-06,  1.5499e-05, -3.9293e-05,  6.6014e-05,
        -7.6696e-02,  4.6856e-05,  4.4122e-05, -1.2412e-05, -2.7278e-05,
        -3.4240e-02,  7.0376e-05, -4.7499e-05, -4.3375e-04,  3.3448e-05,
        -1.3024e-06, -3.4638e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0955e-04, -2.6893e-05,  6.4202e-05,  1.2382e-05, -1.3548e-05,
        -2.6314e-05,  3.1581e-05,  5.6231e-05,  1.3514e-04,  2.8504e-06,
        -7.5138e-06,  2.8582e-05, -3.5086e-05, -5.3013e-05,  8.1772e-05,
        -1.1780e-01, -1.4131e-02, -2.0295e-05,  3.3414e-05, -5.9702e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9533e-06,  3.8945e-05,  6.2535e-05, -6.8592e-05,  3.7670e-06,
        -7.1985e-05, -4.2148e-05, -4.4853e-06,  2.5087e-05, -6.5252e-05,
         9.1005e-06, -6.9909e-05,  3.6479e-06,  6.0422e-05, -7.5316e-05,
        -2.4457e-03,  2.1930e-05, -1.9505e-04,  1.9879e-05, -5.6906e-05,
         1.9405e-01, -6.4692e-03, -1.4613e-05, -5.7029e-05,  9.3076e-05,
        -1.6756e-04,  3.4903e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2250: [tensor([ 8.6357e-05,  1.2055e-06, -2.4446e-05, -4.8553e-05, -3.1314e-05,
        -5.9988e-05,  3.7488e-05,  1.6127e-05,  7.2191e-05,  2.1275e-05,
        -2.1869e-05, -1.1487e-05,  1.9717e-05, -1.3568e-05, -2.9849e-05,
         8.8718e-05, -1.3801e-01, -4.6014e-05, -7.6153e-06,  1.0446e-04,
        -5.9044e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8981e-05,  1.7762e-05, -3.2935e-05,  1.4148e-06,  2.7520e-05,
        -2.3542e-05,  4.9056e-05,  6.6593e-05,  7.3190e-05,  5.4684e-05,
         4.5519e-05,  3.1972e-06, -3.8688e-05, -1.5433e-05, -5.0851e-05,
        -1.5473e-01, -1.1754e-04, -7.4375e-03, -1.5337e-04, -7.3268e-03,
         2.8569e-05, -2.9061e-06, -7.2277e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6363e-06,  1.8632e-05, -3.2952e-06,  1.8632e-05,  9.1808e-06,
         1.4278e-05,  6.1890e-06,  1.5517e-06, -3.3978e-06, -5.1309e-06,
         1.8845e-05,  2.3600e-05,  3.0509e-06,  1.4417e-06,  2.0764e-05,
        -4.1840e-06,  6.5393e-06, -9.8583e-03,  2.2674e-06, -3.0901e-02,
        -3.0149e-02, -6.1489e-06,  3.6751e-06, -3.9874e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2192e-06,  7.5817e-06, -1.0206e-05, -5.5517e-06, -1.5715e-06,
        -6.0466e-06, -3.3574e-06,  1.3635e-05,  4.9180e-06,  5.3280e-06,
        -6.6880e-06,  9.7188e-07,  5.2129e-06,  8.6518e-07, -7.0802e-06,
        -5.0322e-06,  3.1643e-06, -1.9590e-02,  3.6110e-06, -8.9199e-03,
         9.2745e-06,  3.7195e-03,  3.5116e-05, -5.6278e-06, -2.6380e-05,
         2.4834e-05, -5.0003e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4092e-05,  9.5745e-06,  5.0764e-06,  8.1115e-06,  1.9575e-05,
        -5.0604e-06,  5.5581e-06,  1.1098e-05,  1.1767e-05,  1.0963e-05,
        -1.5733e-05, -3.9737e-07, -8.2306e-06,  1.9165e-05,  5.9113e-06,
         1.3274e-05, -4.6446e-06, -3.2624e-02,  2.7340e-05, -9.2225e-03,
        -2.8790e-05, -1.4990e-02, -5.2158e-06,  2.0448e-05, -2.1010e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5758e-05,  2.4973e-05,  3.1305e-05, -3.2812e-06,  3.1182e-05,
         1.4866e-05,  1.6039e-05,  8.9462e-06,  2.8325e-05,  5.0134e-06,
         3.7503e-06, -5.3730e-06,  6.6828e-07, -2.1298e-06, -8.7054e-06,
         4.8152e-06,  1.4013e-05,  1.1652e-05,  9.7887e-04, -5.8613e-06,
        -6.1537e-05, -9.6291e-02, -2.8827e-05, -1.3487e-02, -2.2462e-04,
        -5.1820e-05, -6.4727e-05, -4.4190e-05, -6.4169e-06, -3.9754e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3068e-04,  1.7143e-05,  5.6677e-05,  1.8277e-05,  1.5530e-05,
        -5.1382e-06,  3.1410e-05,  2.2823e-05,  1.2740e-05,  1.8591e-05,
         5.2035e-05, -2.4135e-05,  3.7636e-05,  2.3005e-05,  1.3756e-05,
         1.1580e-05,  7.2932e-06,  3.6894e-06, -5.4153e-02,  5.2262e-05,
         2.3610e-05, -6.1192e-03, -3.7600e-06, -7.4953e-02, -8.8131e-06,
         3.8809e-06, -5.5107e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4352e-04,  8.0161e-05,  7.4327e-06, -1.5443e-06,  5.4591e-05,
        -9.0111e-05,  4.8384e-05,  1.5998e-05, -7.8887e-07,  2.0453e-05,
        -6.6367e-06, -4.3731e-05, -3.8023e-05, -1.9714e-05,  6.4525e-05,
         9.7958e-06, -4.1803e-05,  3.2738e-06, -1.4844e-01,  2.4677e-05,
        -3.2852e-05, -8.0455e-03, -1.0207e-04,  2.3842e-05, -3.2600e-05,
        -8.3912e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3460e-06,  1.4471e-05,  9.7328e-07,  1.4265e-05,  7.4321e-06,
         3.7039e-05,  1.6635e-05,  1.9553e-05,  2.5662e-05,  1.5562e-05,
        -2.7888e-05, -6.7084e-06,  2.1174e-05,  1.8319e-05, -1.8669e-05,
         1.2165e-05, -4.6510e-06, -2.9686e-02, -3.0786e-05, -1.6067e-02,
         1.7588e-05,  6.2512e-04, -8.1496e-06, -1.0629e-05,  4.4868e-04,
        -4.9691e-02, -7.7303e-07, -6.9541e-06, -5.8261e-07,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7379e-05,  1.5833e-05,  2.9265e-05, -4.0243e-05, -1.9888e-06,
         1.3276e-06, -5.0084e-07,  1.6596e-05, -1.0057e-06,  2.0353e-05,
        -1.4363e-05, -1.9874e-06,  1.2274e-05,  2.3096e-05, -1.6538e-05,
        -6.0474e-06, -3.3263e-06, -5.1295e-02,  5.4654e-05, -4.1055e-02,
        -2.5221e-05, -5.8066e-05,  9.7696e-04,  1.3098e-05, -4.6308e-05,
         1.2039e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8523e-04,  3.5070e-05,  2.3666e-05,  3.7373e-05,  1.9215e-05,
         1.8799e-05,  2.2703e-05,  4.1902e-05,  6.5446e-05,  2.6237e-05,
         1.3117e-05,  3.2414e-05,  6.0762e-05, -1.6552e-05, -7.0378e-06,
        -1.7058e-05,  2.0871e-05, -6.0307e-02,  3.6384e-06, -7.1202e-02,
        -1.2470e-05, -7.6315e-05, -4.1075e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3020e-05, -2.4969e-05,  8.3237e-05, -1.3191e-05,  2.2128e-05,
        -4.0784e-05,  2.4136e-05, -5.0533e-05, -5.4021e-05,  2.3619e-05,
        -9.2248e-06,  9.5433e-06, -1.1004e-05, -1.0300e-05, -9.7970e-06,
        -9.8020e-02, -4.9992e-06, -3.4904e-05, -1.6088e-05, -3.6130e-05,
        -8.6944e-06, -1.1599e-05,  4.4718e-06, -2.7520e-05, -3.0759e-05,
         1.0244e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2300: [tensor([-1.8914e-04, -1.5551e-05, -2.6096e-06,  6.5491e-05, -2.5139e-05,
        -1.0432e-06, -5.0799e-05,  1.8328e-05, -4.8564e-05, -1.2219e-05,
         2.1102e-05, -1.3846e-01,  7.5861e-06, -3.7508e-05, -3.0089e-03,
        -7.3180e-05,  8.4869e-05,  2.9260e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1216e-04,  2.2231e-06,  5.8528e-06, -7.1163e-06, -1.9589e-06,
         4.2431e-06,  1.7649e-07,  1.0093e-05,  4.5784e-06,  2.3731e-06,
         7.9196e-08, -2.8244e-06,  2.2004e-06,  1.1115e-05,  3.9501e-06,
         3.8578e-06, -1.6650e-06, -1.3365e-07,  1.2968e-05, -2.8892e-06,
         3.9825e-06,  7.1308e-07, -4.7893e-06,  3.1779e-06, -3.3516e-06,
         6.0429e-06,  1.6525e-06,  9.9137e-06, -9.3289e-07, -5.7445e-06,
         4.5957e-06, -1.0588e-06, -5.4478e-06, -1.7587e-06,  3.3311e-06,
        -5.2805e-07,  9.7614e-06, -2.1296e-02,  4.4343e-06, -1.1354e-04,
        -1.8690e-02,  5.8604e-06,  2.4997e-04, -1.3085e-05, -1.8787e-02,
         7.1321e-06, -1.0064e-05,  1.0814e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6467e-05, -1.0802e-06,  1.0689e-06, -1.0005e-05, -5.9806e-06,
         3.6242e-06,  8.1658e-06,  5.3041e-06,  9.0053e-06,  1.4119e-05,
        -1.1918e-06,  2.6908e-05, -1.4221e-05, -7.2930e-06, -4.9632e-06,
        -1.0674e-05,  6.1980e-06,  1.9901e-06, -1.5302e-06, -8.9811e-06,
        -1.5491e-06,  5.0570e-06, -7.6065e-06,  7.1716e-06,  2.8583e-05,
         7.3708e-06,  3.4008e-05, -5.1104e-07,  5.2897e-06, -7.9875e-06,
         1.2545e-06, -1.0759e-05, -1.4397e-05, -7.7571e-06, -2.5872e-05,
         5.7718e-06,  4.7521e-06,  1.4561e-02,  1.3395e-05,  3.2992e-02,
        -9.7970e-06,  5.4427e-04,  1.5187e-02, -2.7459e-07,  4.3424e-06,
         3.7599e-02, -2.7933e-05,  7.7355e-06, -1.7316e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2877e-05, -2.5435e-06, -1.9479e-05, -1.4296e-05, -1.7188e-05,
         2.0389e-05, -1.5820e-05, -9.1461e-07, -5.5686e-06, -2.3791e-06,
         2.4213e-07,  6.1663e-06, -3.4051e-06,  1.2915e-05, -5.9477e-06,
        -1.0567e-05,  9.3503e-06, -7.1753e-06, -3.0130e-05, -5.4254e-06,
        -1.9171e-05,  1.2068e-05, -1.9823e-05,  1.1993e-06, -1.9537e-05,
        -1.5757e-05,  1.3666e-05, -1.4128e-06, -2.0408e-05, -7.4942e-06,
         7.5815e-06,  6.4421e-06, -8.7913e-07, -9.8708e-06, -2.2258e-05,
         5.1902e-06,  8.6084e-06,  3.0504e-02,  1.7647e-05,  2.1566e-03,
        -9.5447e-02, -2.2523e-06,  1.2890e-03,  2.0746e-05,  6.7314e-03,
        -3.3952e-05,  1.4840e-05,  4.6494e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5358e-05,  2.6907e-05,  1.2815e-05, -2.6685e-05, -1.1369e-05,
        -4.5400e-05, -3.2490e-05,  8.9712e-06, -3.1740e-05, -6.7060e-05,
        -2.7478e-05,  4.4241e-05, -3.3666e-05,  3.2926e-05, -1.1569e-05,
        -5.5525e-05, -7.6934e-05, -1.7063e-05, -4.4712e-05, -1.6372e-05,
        -1.4546e-06, -6.9949e-06, -2.1186e-05, -2.1901e-05,  1.8291e-05,
         7.5181e-07, -1.9941e-05,  2.6653e-03, -5.3387e-05,  1.9530e-01,
        -1.1789e-05,  4.2862e-05, -4.0677e-05,  7.1763e-05, -6.8757e-03,
        -9.4401e-05,  4.9394e-06, -3.2962e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0853e-04,  1.8559e-06, -1.3519e-05,  3.0867e-06, -5.0797e-06,
        -8.0729e-06, -4.0758e-05, -3.0185e-07,  1.6294e-05, -2.4309e-05,
         1.4530e-05, -1.2587e-06, -3.0773e-06,  1.4605e-06, -7.5208e-06,
        -3.6904e-07, -5.3974e-05,  8.3686e-07, -1.7490e-05,  4.6270e-05,
         1.6433e-05,  3.0259e-05,  2.2184e-05, -5.1776e-06,  2.6925e-05,
        -2.2881e-06,  7.6742e-06,  2.6197e-05, -1.1236e-01, -3.8685e-05,
        -1.2623e-04, -2.0662e-05,  1.2293e-03,  9.0597e-04,  4.6939e-05,
        -1.6720e-05, -8.7492e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4482e-05,  3.1474e-06,  1.8804e-05,  1.2645e-05, -1.8297e-05,
         4.2251e-05, -1.2688e-04,  5.0591e-05, -8.9564e-06, -8.7955e-05,
         3.3207e-05, -7.9849e-05, -1.1959e-05, -1.8196e-06, -8.0993e-06,
         5.3050e-06,  1.1277e-05,  7.5716e-05,  1.2645e-06, -8.5396e-05,
        -3.8094e-05,  6.0617e-06, -5.9434e-05, -3.5702e-05,  2.2389e-05,
         3.3595e-05, -4.2546e-05,  2.5181e-05,  1.6568e-01,  4.2944e-05,
        -2.6437e-03, -1.5959e-03,  7.4201e-05, -1.4241e-06,  9.0709e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2219e-04,  1.4682e-05, -5.6878e-06,  8.1451e-06,  3.6065e-06,
         3.0112e-05,  1.7560e-05,  1.5274e-05, -1.5442e-05, -9.3365e-06,
         3.4445e-06, -1.1151e-05,  6.5156e-06,  1.4235e-05, -7.6957e-06,
        -1.4698e-05,  5.8454e-06, -1.0292e-05, -1.6007e-05,  4.9859e-02,
        -1.3898e-05,  4.5850e-02,  2.9878e-05,  8.5326e-07, -4.5892e-04,
         3.5685e-05, -1.6710e-05, -2.0259e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4663e-05, -1.0214e-06, -5.1125e-05, -5.4314e-05, -2.7013e-05,
        -1.1299e-05,  8.3311e-05,  1.6428e-05, -5.2349e-05, -1.0457e-04,
        -5.1730e-05,  6.2729e-06, -3.0835e-05,  8.9114e-05,  1.1488e-05,
         2.3510e-05,  2.8413e-05,  4.5870e-05, -3.6070e-05,  1.4628e-01,
        -3.1310e-05, -5.0702e-05,  4.3790e-05,  2.4073e-05,  5.3146e-05,
         1.0427e-01, -3.8595e-06,  1.6400e-05, -1.9581e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8063e-05,  2.0652e-05, -3.4392e-05, -9.9883e-06,  3.2838e-06,
        -3.6453e-05, -2.7876e-05, -1.8464e-05, -1.4735e-06, -3.2405e-05,
         2.9695e-05,  5.2209e-05,  6.7538e-06, -4.1006e-05,  6.5617e-06,
         2.6496e-05, -2.9630e-05, -4.8571e-06, -6.3616e-07,  1.5250e-01,
        -3.3815e-05, -4.5135e-05, -2.9566e-03,  1.5040e-05,  7.0949e-05,
        -8.2696e-05, -3.1815e-04,  2.3689e-02,  3.1066e-05,  6.9946e-06,
         2.8804e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3871e-05, -3.2387e-05, -9.7186e-05, -9.8290e-05,  3.5443e-05,
        -1.3000e-05,  1.4430e-05,  1.5862e-05, -5.7881e-05, -1.0067e-05,
         1.0430e-05, -4.0357e-05,  2.8720e-05,  6.8243e-05, -8.1652e-05,
         2.7971e-05,  2.9465e-05, -3.1113e-06,  9.4748e-05,  1.5734e-01,
         5.7435e-03, -2.3202e-05,  2.9509e-04, -1.6679e-04, -2.2111e-05,
        -1.2051e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2171e-05, -1.9108e-06,  4.6720e-07,  6.1126e-07, -2.0206e-05,
         2.5499e-05, -2.4232e-05, -1.3082e-05,  7.4046e-06, -1.6017e-05,
        -2.0082e-05,  7.3570e-06, -2.9419e-05, -2.9393e-05, -5.6348e-06,
         1.2628e-05,  4.7422e-06,  7.2188e-06, -5.4515e-05, -5.1819e-02,
        -6.3295e-06,  2.9274e-07, -1.4627e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
