Iter #50: [tensor([-1.7678e-03, -5.1509e-03, -3.0550e-04, -2.4878e-04, -8.0129e-04,
        -1.0485e-03, -1.3090e-03, -1.0072e-03, -3.2695e-04, -4.2602e-04,
         1.2457e-04, -2.9404e-03,  8.9224e-05,  4.4101e-04, -6.3769e-05,
         5.8626e-04, -2.8927e-03, -1.0942e-04,  3.9046e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5423e-03, -3.5680e-03, -2.2054e-04, -9.8477e-04, -3.2084e-03,
        -4.5136e-04, -8.8576e-04, -8.4336e-04, -1.3202e-04, -3.0865e-03,
        -1.0980e-04, -9.9530e-05, -3.1357e-04, -2.6817e-04, -2.3614e-03,
         2.3705e-04, -1.0878e-04, -1.7363e-04, -2.0701e-03,  8.7992e-05,
         6.2300e-05, -2.2542e-04,  2.3408e-04,  1.3750e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9134e-03,  3.8168e-03,  2.2291e-04,  9.2456e-04,  3.8557e-03,
         3.0070e-04,  9.1417e-04,  1.0522e-03,  2.6275e-04,  3.4031e-03,
         2.5594e-04,  2.8354e-04,  1.1008e-04,  3.2356e-05,  2.0503e-03,
        -3.7683e-04, -1.3199e-04,  1.0787e-03,  5.0308e-05, -1.9553e-05,
         2.3070e-03, -5.4816e-04, -6.2658e-05, -3.4256e-04, -1.9141e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3918e-03, -3.2864e-03, -4.2392e-04, -1.0107e-03, -3.1810e-03,
        -2.9174e-04, -7.3623e-04, -7.9760e-04, -1.4634e-04, -3.4596e-03,
        -1.7468e-04, -4.3472e-04, -1.9400e-04, -1.0750e-04, -2.4894e-03,
         2.1048e-04,  5.4284e-05, -9.0322e-04, -2.1445e-04, -1.8511e-04,
        -2.1285e-03, -2.8342e-04,  6.4991e-05,  1.0503e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5124e-03,  3.3911e-03,  2.6555e-04,  1.4976e-03,  3.5986e-03,
         8.0441e-04,  5.7402e-04,  2.5878e-04,  8.7223e-05,  3.2556e-03,
        -4.2659e-06,  4.3550e-04,  1.5481e-04,  2.5601e-04, -7.8695e-05,
         1.8094e-04,  5.2640e-04,  2.1754e-04, -1.8164e-04, -3.6637e-04,
         2.3332e-04,  2.4692e-04, -2.8751e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6669e-03, -2.5049e-03, -2.1014e-04, -1.1581e-03, -2.7086e-03,
        -6.4702e-04, -2.2025e-04, -2.3668e-04, -5.6910e-05, -2.3955e-03,
        -9.6612e-05, -8.9864e-05, -5.4321e-05, -5.7314e-05, -2.5322e-04,
         3.5303e-04,  9.2734e-05, -1.5365e-04, -1.4557e-03,  2.1112e-04,
        -4.8933e-04, -1.6858e-03, -4.5161e-04,  1.6393e-05,  1.3845e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4315e-03,  2.9061e-03,  2.3967e-04,  1.4460e-03,  3.6151e-03,
         6.2008e-04,  5.2065e-04,  2.3150e-04,  2.4607e-04,  2.8836e-03,
         1.2329e-04,  3.1517e-04,  3.4981e-04,  7.5757e-05, -3.3656e-04,
        -2.8459e-04,  2.6383e-04,  1.3971e-04,  2.4068e-04, -3.9886e-04,
         1.8205e-04,  2.0887e-04, -1.5338e-04,  1.7815e-04,  2.1410e-04,
        -1.9879e-04, -2.1964e-04, -4.0871e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2806e-03,  2.5204e-04,  4.9277e-04,  1.6251e-04,  6.0343e-04,
         6.7693e-04,  2.1065e-03,  1.5125e-04,  2.7496e-04,  2.6238e-04,
         3.3076e-04,  3.6130e-04, -2.5559e-05,  2.7667e-04,  2.3389e-03,
         1.6520e-04,  1.5942e-04,  1.9261e-04,  9.8795e-05,  5.2808e-05,
         1.6689e-03, -6.3358e-05,  2.3532e-04,  4.2089e-05,  1.6300e-04,
        -1.2282e-04,  6.6634e-05, -1.0037e-04, -1.9568e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0758e-03,  2.7092e-04,  5.2958e-04, -5.9607e-05,  4.0110e-04,
         7.1624e-04,  2.4046e-03,  1.2982e-04,  3.0757e-04,  2.6594e-04,
         2.5542e-04,  3.3247e-04, -2.3228e-05,  2.5793e-04,  2.4382e-03,
         1.3369e-04, -7.0791e-06,  1.8666e-04,  1.1165e-04,  6.2581e-05,
         1.6449e-03, -3.9677e-05, -1.6933e-04, -8.5900e-05,  1.3681e-04,
         2.0417e-04, -4.5716e-05,  1.1500e-05, -1.3857e-04, -2.1375e-04,
        -1.2710e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0559e-03,  4.6075e-04,  7.6397e-04, -7.7438e-05,  6.7117e-04,
         9.7972e-04,  3.1926e-03,  2.5812e-04,  4.4048e-04,  4.4465e-04,
         3.7150e-04,  3.8936e-04,  8.4222e-05, -1.0768e-04,  2.6942e-03,
         3.6419e-05,  5.6529e-05,  3.6774e-04,  1.0863e-04, -4.3794e-05,
         2.0518e-03, -1.6642e-04,  3.5675e-04,  1.6022e-04,  2.5285e-03,
        -3.5233e-04,  2.8321e-05, -1.9386e-04, -2.0178e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7990e-03,  2.9690e-03,  1.5362e-04,  1.2875e-03,  3.3308e-03,
         1.6132e-04,  5.9446e-04,  4.7076e-04,  3.1053e-04,  4.5057e-04,
        -2.8411e-04,  8.8236e-04,  6.1277e-04,  6.7508e-04,  6.7153e-04,
         2.2228e-03,  2.2400e-04,  1.7680e-04,  6.8851e-05,  1.9177e-04,
         1.0947e-04,  3.2176e-04, -1.0192e-04, -8.3011e-06,  9.7959e-05,
         1.1139e-04,  3.2442e-04,  2.1760e-04, -2.8009e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5260e-03,  3.3301e-03,  2.3410e-04,  1.4609e-03,  3.0879e-03,
         2.3283e-04,  5.1947e-04,  2.3213e-04,  4.2711e-04,  5.1319e-04,
        -4.7437e-04,  8.4579e-04,  6.4796e-04,  5.1584e-04,  5.8568e-04,
         2.3187e-03,  1.1263e-04,  3.3289e-04,  9.1429e-05,  1.5522e-03,
        -1.4599e-04,  3.8423e-04,  2.4832e-04,  4.6806e-05, -1.1056e-04,
        -3.2485e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-5.1078e-03, -8.4796e-03, -3.0139e-05, -1.1393e-04, -5.6296e-04,
        -7.3567e-05, -3.6932e-04, -3.7046e-04, -9.6572e-04, -6.1598e-03,
        -1.0242e-03,  2.5743e-04, -7.1107e-05, -6.6744e-03, -4.5386e-04,
         4.1767e-05, -2.2177e-05, -1.7366e-04,  2.1162e-04,  6.4295e-05,
         7.3612e-05, -2.0559e-04, -6.6138e-05, -5.1635e-05,  2.6487e-04,
        -8.9650e-05, -1.8645e-04,  1.0330e-05,  2.5294e-04,  8.1773e-05,
         1.4255e-04,  3.9314e-04, -1.4111e-04,  2.7128e-04, -5.0244e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3458e-04,  8.1826e-03, -5.3828e-05,  1.1722e-04,  7.8367e-04,
         2.8037e-05,  3.4937e-04,  5.8309e-04,  6.4018e-04,  4.8630e-03,
         7.9641e-04, -1.6172e-04, -9.0045e-05,  5.8720e-03,  4.5900e-04,
         4.1833e-05, -1.9301e-04,  1.7123e-04, -8.5324e-05,  1.0647e-04,
         3.2095e-05, -5.9826e-05,  2.7141e-03, -1.9853e-05,  1.3564e-05,
        -3.3904e-04,  1.0091e-05,  1.8039e-04, -1.9022e-04, -1.0234e-05,
        -1.0307e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1174e-03,  1.0413e-02,  1.0051e-05,  1.4286e-05,  9.1877e-04,
         2.9996e-04,  4.0162e-04,  4.8721e-04,  7.7310e-04,  6.8498e-03,
         1.2169e-03, -2.9559e-04,  1.7810e-04,  6.1956e-03,  1.4736e-04,
         7.2061e-05, -2.8885e-04,  2.9163e-04,  3.7967e-04, -4.4780e-05,
        -4.8037e-05,  6.1474e-05, -2.3171e-05, -2.9306e-04,  3.3733e-03,
         5.1344e-04, -2.7960e-04,  1.3973e-05, -1.5699e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7101e-04,  1.1831e-02,  4.3582e-04,  6.4179e-04,  3.7648e-04,
         1.7180e-03,  5.8398e-04,  1.3473e-03, -1.9697e-04,  7.3629e-04,
        -9.9676e-05,  3.1792e-04,  5.8540e-04,  2.7241e-04,  1.3928e-04,
         6.8145e-05,  3.3218e-04, -2.6147e-04,  3.2271e-04, -1.2108e-04,
         4.7187e-04, -3.3598e-04, -1.9404e-04,  8.3166e-04,  5.8015e-05,
        -1.1117e-04, -5.4626e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1249e-05,  9.7562e-03, -2.1221e-05,  9.2656e-04,  1.3969e-04,
         1.6209e-03,  1.7704e-04,  7.1985e-04,  1.2864e-04,  8.1398e-04,
         1.7241e-04,  1.1418e-04,  4.1596e-04,  4.1507e-04,  1.5271e-04,
        -9.9109e-05, -9.3569e-09,  3.0311e-03, -1.4396e-04,  2.1354e-04,
        -6.8512e-04,  1.1385e-05,  1.0135e-03,  1.8830e-04,  6.9833e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0837e-03,  9.8261e-03,  3.9198e-05,  7.3932e-04,  2.1822e-04,
         1.8740e-03,  3.5809e-04,  9.6714e-04,  2.1052e-04,  1.0744e-03,
         4.7101e-05,  4.0286e-04,  5.6113e-04,  4.6125e-04,  2.9593e-05,
        -2.4213e-04,  9.3819e-05,  3.8386e-03, -3.3315e-04,  1.6456e-04,
        -2.3424e-04,  1.2221e-03, -1.4483e-04, -3.8645e-04,  2.9685e-04,
         1.3542e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8952e-04,  4.6144e-04, -2.7220e-04,  2.2872e-04,  4.0084e-04,
         1.2336e-03,  8.1196e-04,  3.6306e-04,  1.8272e-04,  3.2653e-04,
         2.5317e-04,  7.7735e-03,  1.3181e-04,  6.2860e-04,  7.4889e-04,
         3.9261e-04,  5.1402e-05,  2.6087e-04, -7.7081e-05, -4.8481e-05,
        -6.1935e-04,  1.4134e-04,  2.0761e-04, -2.5630e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0548e-03,  2.6195e-04, -8.9178e-05,  2.7233e-04,  3.0330e-04,
         9.5211e-04,  7.3775e-04,  1.7081e-04,  1.4397e-04,  1.4774e-04,
         1.5626e-04,  5.0698e-03,  3.2675e-04,  3.3218e-04,  4.6160e-04,
         2.0071e-04, -1.2766e-05, -2.9335e-05, -8.8640e-05, -1.6311e-04,
         1.1267e-04,  2.4076e-03,  2.0598e-04, -1.1237e-04, -1.2952e-04,
        -1.9380e-04,  9.7949e-06, -5.4828e-05,  6.3426e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0891e-04,  4.0397e-04, -3.3668e-04,  3.0898e-04,  2.0919e-04,
         9.2010e-04,  6.5283e-04,  2.8288e-04, -4.2310e-06,  2.6841e-05,
         8.3566e-05,  3.8837e-03,  2.6730e-04,  3.5234e-04,  4.7075e-04,
         3.7298e-04,  7.0366e-05,  1.6920e-04,  1.6123e-04,  2.0768e-03,
         3.4715e-04, -2.1964e-04,  1.7552e-04,  5.4273e-04,  7.0662e-05,
         2.1241e-06,  3.9357e-04, -2.7894e-04, -2.4445e-04, -2.5938e-04,
        -4.6096e-06, -1.5638e-04, -3.7521e-04, -1.5360e-04, -9.5055e-05,
        -2.0949e-04, -1.2107e-04,  1.6258e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1890e-04,  4.5277e-04, -4.1203e-04,  1.9378e-04,  2.4534e-04,
         9.4700e-04,  9.0644e-04,  2.4956e-04, -5.3275e-05, -1.1539e-05,
         1.5410e-04,  5.7361e-03,  2.8859e-04,  3.4597e-04,  5.1289e-04,
         1.9071e-04,  1.8461e-04,  1.1569e-04,  7.3224e-05, -4.4743e-04,
        -4.5653e-04, -5.6205e-05,  3.6169e-04,  2.6696e-03,  8.7671e-05,
         3.5836e-04, -6.2236e-05, -8.7475e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9488e-04,  4.3743e-04, -1.3338e-04,  2.8770e-04,  3.3802e-04,
         1.0422e-03,  1.0542e-03,  1.9394e-04, -6.5805e-06,  4.0804e-05,
         1.7359e-04,  4.6913e-03,  4.2701e-04,  5.3625e-04,  5.7711e-04,
         1.6138e-04,  9.1442e-05,  1.7128e-04,  2.4105e-04,  3.7391e-05,
        -4.5508e-04, -5.4622e-05,  2.9107e-04, -2.4989e-05, -4.3791e-05,
         6.7920e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.4160e-05,  4.6745e-04, -4.3752e-04,  2.4680e-04,  3.5898e-04,
         1.1906e-03,  9.2007e-04,  1.9931e-04, -1.0261e-04, -2.3747e-05,
         2.2833e-04,  6.1269e-03,  3.9859e-04,  4.9124e-04,  5.2566e-04,
         3.1146e-04,  2.3570e-04, -4.6973e-05, -7.1208e-06, -1.0542e-04,
        -4.1078e-04,  9.6529e-05,  1.6241e-04, -1.1634e-05, -1.0236e-05,
        -2.8346e-04,  2.3733e-03,  5.6437e-04, -1.3318e-04, -1.8603e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 1.2942e-03, -3.3278e-02,  9.3271e-06,  1.9408e-06, -2.8465e-04,
        -1.3685e-02,  1.5614e-05,  5.0387e-05, -2.3128e-05, -1.2287e-02,
         9.5652e-06,  2.5396e-05, -8.6498e-03, -3.1626e-05,  1.0080e-05,
        -2.1107e-04, -1.6794e-04, -3.5097e-04,  1.7653e-05, -2.8367e-05,
        -1.7840e-05, -7.8107e-05,  2.1977e-05,  8.5350e-05, -6.9965e-06,
         3.6399e-06,  5.0792e-05,  5.9510e-05, -5.4313e-05, -7.2428e-06,
         4.9653e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.2794e-04, -3.0710e-02, -4.4388e-05, -1.0356e-04, -2.2795e-04,
        -2.0936e-02, -7.1626e-05,  1.0850e-04, -1.5958e-05, -1.3904e-02,
         1.6321e-04,  8.0580e-05, -1.1610e-02,  1.5871e-04,  1.7290e-04,
        -2.3376e-04, -1.4087e-04, -3.7068e-04,  1.3002e-04, -4.4053e-05,
        -2.0804e-05,  6.5126e-05,  2.2412e-05,  9.6068e-05,  1.7370e-04,
         2.9727e-04, -3.5740e-06,  2.3082e-05, -2.3336e-04, -5.3027e-05,
        -8.5890e-05,  1.0422e-04,  8.7325e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8216e-03, -2.4819e-02, -5.8001e-05, -6.6434e-05, -2.0850e-04,
        -1.7376e-02, -7.0478e-05,  7.8051e-05,  3.3326e-05, -1.0758e-02,
         3.6150e-05,  5.9187e-06, -6.2961e-03, -1.8976e-05,  6.2092e-05,
        -2.8030e-04, -1.1586e-04, -2.4352e-04, -5.4587e-05,  1.3633e-04,
        -6.8016e-05,  4.1072e-05,  1.1127e-06, -7.4779e-06,  9.9389e-05,
         7.4995e-05, -2.2088e-05,  7.9343e-05, -1.6883e-04, -1.9805e-03,
         9.8547e-05,  1.2753e-04,  1.0183e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2630e-03,  3.3121e-02, -3.7427e-05,  6.8047e-05,  2.9475e-04,
         1.3992e-02,  2.7305e-05,  5.3625e-05,  4.0189e-05,  1.5911e-02,
         4.0682e-05, -1.7158e-05,  9.9285e-03, -7.6822e-05,  2.3737e-06,
         2.4790e-04,  4.1638e-04,  3.3909e-04, -2.5839e-05, -2.5491e-06,
        -3.0773e-06,  1.1672e-04,  4.4945e-05, -1.4947e-05,  5.2695e-05,
        -9.6868e-05, -2.5584e-05,  2.6374e-05,  1.6619e-04, -2.3146e-05,
        -3.0107e-04,  7.6101e-05,  2.7563e-05,  4.6892e-05,  1.1313e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4480e-04, -2.6233e-02, -1.2334e-06,  1.1274e-06, -2.3234e-04,
        -1.6528e-02, -2.4719e-06,  4.2565e-05,  1.1479e-05, -6.4477e-03,
         2.5908e-05,  5.4241e-05, -1.2009e-02,  9.4510e-06,  6.5300e-05,
        -1.5780e-04,  2.2345e-05, -2.5315e-04,  3.2753e-05,  6.6722e-05,
        -8.0491e-06, -9.9526e-07,  6.4808e-06, -5.0474e-05, -1.7237e-03,
        -1.0663e-04, -1.7879e-04,  4.0699e-05,  1.4867e-05,  3.4135e-05,
         2.0271e-05,  9.7055e-05,  4.8209e-05, -1.5816e-03, -1.5822e-04,
         7.4108e-05,  1.0470e-04, -1.6638e-06,  7.6141e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3993e-04, -3.3348e-02, -1.4450e-05, -4.1703e-05, -4.0097e-04,
        -1.2379e-02, -1.7327e-04,  9.1184e-06,  2.0936e-05, -1.0105e-02,
         5.9424e-05, -3.7884e-05, -9.8510e-03,  1.4333e-04,  3.7790e-05,
        -2.5594e-04, -1.8671e-04, -4.2600e-04,  1.7831e-05,  3.4378e-05,
        -1.0556e-04, -1.0245e-05, -8.3598e-05,  2.9802e-05,  7.5119e-05,
        -4.3812e-06,  8.4457e-05,  3.9818e-05,  6.2888e-05, -1.1033e-04,
        -4.3275e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3881e-04, -3.5543e-02,  9.8359e-05, -2.8587e-05, -1.7625e-04,
        -1.7396e-02, -7.9765e-05,  3.3190e-05,  1.0431e-05, -1.1011e-02,
         9.1654e-05,  9.8389e-05, -8.6527e-03,  1.8777e-05,  5.0390e-05,
        -2.3956e-04, -2.3254e-04, -2.1864e-04,  1.3744e-05,  2.4207e-05,
        -8.5983e-05, -3.6291e-05,  5.4540e-05,  1.1168e-04, -6.2387e-05,
         3.1622e-05, -6.1403e-05,  5.6151e-05, -6.9775e-05,  5.8858e-05,
         1.9179e-05,  9.3827e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2611e-03, -3.0619e-02,  6.7092e-05, -6.3868e-05, -3.0733e-04,
        -1.7175e-02, -4.6759e-05,  9.0796e-05,  1.6542e-05, -1.1941e-02,
         8.6032e-05, -8.5798e-06, -1.2220e-02,  4.0411e-05,  5.8448e-06,
        -3.0112e-04, -1.3058e-04, -2.4987e-04,  1.0265e-04,  3.2625e-05,
        -1.1632e-04, -7.8324e-05, -3.8295e-05,  3.4103e-05,  2.2869e-04,
         5.0844e-05, -8.0415e-05, -2.3686e-05,  8.8147e-06, -6.6137e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3178e-04, -2.8588e-02,  9.0200e-05, -4.3331e-05, -2.1164e-04,
        -1.3664e-02, -5.8566e-05,  1.2458e-06,  5.7310e-05, -1.0452e-02,
         1.9169e-05,  1.5878e-04, -7.5089e-03,  4.6069e-05,  5.0872e-05,
        -3.0348e-04, -2.0375e-04, -2.9642e-04,  5.8662e-06, -1.0250e-05,
        -5.2899e-05, -8.0041e-05, -6.4716e-05, -6.5578e-06,  9.7603e-05,
        -3.9992e-05,  4.1570e-05,  8.4940e-05,  6.3419e-05, -6.2893e-05,
        -6.5415e-06, -7.7084e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8857e-04,  6.4557e-05, -2.3765e-04,  1.4501e-04,  3.2351e-04,
         9.8178e-04,  4.6876e-04, -1.1565e-04,  1.5725e-03,  2.5385e-02,
        -9.6892e-05, -9.7081e-05,  6.6301e-06, -2.3606e-04, -5.2254e-05,
        -5.7071e-04, -8.2836e-06, -2.2515e-04,  6.8153e-04, -2.8780e-04,
         2.5852e-05, -1.7342e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4729e-04,  5.4769e-05, -1.3900e-04,  4.6638e-05,  1.1837e-04,
         1.0417e-03,  3.8278e-04, -1.3319e-04,  1.5222e-03,  3.2404e-02,
        -1.6133e-04,  3.6615e-05,  1.0574e-05, -1.2947e-04, -1.8076e-04,
        -3.8624e-04, -2.5036e-04, -3.7933e-05, -1.7435e-04,  1.5495e-04,
         1.3726e-04, -2.9827e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0277e-05,  1.7948e-04, -8.9204e-05,  5.4613e-06,  1.7886e-04,
         1.3021e-03,  3.0825e-04, -1.8502e-04,  1.6561e-03,  2.5488e-02,
        -2.6252e-04, -5.6922e-05, -8.4868e-05,  2.9354e-05, -5.5643e-06,
        -2.8780e-04, -1.9075e-04,  2.5741e-04, -5.6552e-05, -1.7900e-04,
         3.2320e-03, -5.7135e-04,  1.7564e-05,  1.0329e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 5.2724e-04,  6.7099e-05,  1.8781e-04, -1.0947e-04, -5.6589e-04,
        -2.3350e-05,  8.3347e-02,  2.7360e-04, -1.3854e-04, -1.2929e-05,
         5.0641e-04,  3.2069e-04,  4.1009e-05, -3.3799e-05, -1.1449e-04,
        -3.4244e-05, -2.6319e-05, -3.9381e-04,  1.2970e-03,  2.5816e-04,
         3.9477e-04,  1.3135e-04, -1.3074e-04, -2.3710e-05, -1.6237e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4878e-03, -1.1369e-04, -1.6997e-04,  1.2326e-04,  5.3153e-04,
         4.7266e-05, -6.5526e-02, -7.9451e-05,  7.4922e-05,  3.8155e-05,
        -8.0031e-04, -2.0647e-04,  1.1779e-04,  5.3487e-05, -6.4152e-05,
         1.7588e-05, -1.2543e-05,  6.7329e-05,  2.9149e-04, -1.6823e-04,
         2.8889e-04, -1.0836e-04, -1.0772e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3853e-04, -6.7095e-02,  6.5545e-05, -1.9169e-04, -2.5767e-02,
         1.5088e-05, -1.0045e-04,  1.4478e-04, -3.1720e-05, -1.7067e-04,
        -1.1043e-04, -8.8629e-05,  1.6047e-04, -2.5413e-04, -1.1312e-04,
         8.7007e-05,  5.2793e-05, -3.2428e-05, -1.0531e-04,  7.8103e-05,
        -4.7306e-06, -6.3175e-05, -7.9596e-05,  1.8991e-06,  5.2541e-05,
        -1.3019e-04,  9.8517e-05,  2.8632e-05, -7.3075e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0232e-03, -8.2368e-02,  5.5171e-05, -3.9429e-04, -3.9748e-02,
         4.5549e-05, -5.8676e-07,  1.7243e-04, -1.7464e-04, -1.8455e-04,
        -1.3586e-04, -7.6952e-05,  2.1427e-04, -5.6256e-04, -2.4276e-05,
         5.5449e-05, -4.1140e-05, -5.5572e-06, -3.1772e-04,  3.4463e-06,
        -1.3436e-05,  1.1339e-04, -6.5424e-05, -5.7879e-04,  2.4352e-04,
        -1.1596e-04,  5.6950e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0942e-03, -7.0454e-02, -4.8255e-05, -5.8802e-04, -4.7263e-02,
        -7.7509e-05, -9.4310e-05,  5.9615e-05, -9.4645e-05, -1.2745e-04,
        -1.9562e-04, -1.0621e-04,  1.9739e-04, -6.3743e-04, -7.9526e-05,
         5.0581e-05, -1.1664e-04, -4.8603e-05, -2.2279e-04,  1.7070e-05,
         9.2408e-06, -4.0662e-05, -1.2258e-04, -1.4915e-06, -2.2370e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4441e-04, -8.0687e-02, -9.5202e-05,  1.2612e-04, -1.6697e-05,
        -5.2286e-04, -1.5308e-04, -1.8845e-04,  1.0509e-05, -3.0985e-02,
        -1.1872e-05,  1.1886e-04,  7.3745e-05, -8.2275e-05, -9.2359e-05,
        -9.2729e-05,  6.8567e-06,  5.7649e-05,  1.2361e-04,  2.4846e-04,
        -2.1730e-04, -9.4583e-05,  1.3284e-07, -4.0108e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2850e-04, -6.6558e-02, -6.6388e-05,  1.1588e-04, -4.0400e-05,
        -4.1806e-04,  1.6217e-04, -2.6872e-05,  3.2918e-05, -4.2917e-02,
         4.4533e-05,  1.2176e-04, -8.3914e-05, -1.1314e-05, -3.2554e-05,
        -2.0111e-04,  6.4221e-05, -6.2170e-06,  6.6738e-05, -6.6254e-05,
        -7.7986e-04, -2.9054e-05, -8.6091e-06,  5.3768e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9946e-04, -9.8779e-02,  3.1745e-05,  1.3055e-04, -9.2156e-05,
        -4.5507e-04, -7.4571e-05,  1.8522e-05, -2.6486e-05, -3.1671e-02,
         6.3285e-05, -8.7592e-05, -5.8637e-05, -9.8752e-05, -1.0244e-04,
        -6.1956e-05, -1.6587e-06,  7.1292e-05, -2.0689e-05,  1.6683e-04,
         2.6402e-06, -2.7010e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4971e-04, -4.4590e-02, -2.2851e-05, -4.2681e-04, -3.6791e-02,
        -3.9261e-05, -1.1616e-04,  1.0693e-04, -2.6052e-04, -2.1339e-02,
         2.4643e-04,  7.3551e-05,  4.0074e-05, -2.7771e-04,  2.3141e-05,
        -5.4958e-06,  1.4998e-04, -3.5436e-05, -2.4383e-04,  2.1722e-04,
         1.2983e-05,  4.2541e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4789e-04, -4.6782e-02,  8.2763e-05, -1.2451e-04, -1.7877e-02,
        -3.5956e-05,  1.4393e-06,  1.1829e-04, -1.9500e-04, -1.1757e-02,
         1.4086e-04, -5.4068e-05,  2.3340e-06, -1.5122e-04,  3.3034e-05,
        -1.2671e-05,  9.0203e-05,  1.4832e-05, -1.4840e-05,  1.0769e-04,
        -8.3751e-05, -4.2334e-05,  1.2997e-04, -9.6570e-06,  3.8363e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6847e-04, -3.8285e-02,  1.0024e-04, -1.3360e-04, -1.9344e-02,
        -4.8563e-05,  4.6969e-05,  8.3617e-05, -4.9516e-05, -2.0189e-02,
         1.3570e-04,  2.0317e-05, -1.1125e-04, -1.7517e-04,  5.5851e-05,
        -4.6324e-06,  7.4186e-05,  2.4146e-05, -1.0093e-04,  1.4833e-04,
         3.9040e-05, -7.0539e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0708e-03,  9.7849e-06,  1.9891e-04,  1.4926e-03,  2.2128e-04,
        -1.4245e-04, -5.7597e-04,  2.3590e-04, -3.2537e-04, -1.9153e-04,
        -6.7297e-05, -1.1057e-04,  2.2340e-04,  2.9515e-03, -7.1321e-05,
        -1.8897e-04, -2.0579e-05, -4.7759e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 4.8743e-04, -1.8956e-01, -3.3998e-05,  3.2268e-05,  4.0182e-05,
         1.2082e-05, -2.9458e-04, -1.7046e-04, -5.5017e-04, -1.6242e-05,
        -1.6069e-04, -1.7821e-04, -3.1039e-05,  7.1014e-05,  1.3652e-04,
        -2.3170e-05,  7.4813e-05, -1.7515e-04,  1.2825e-04, -4.0153e-04,
         9.1617e-05, -2.4930e-04,  1.0300e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9089e-04, -1.0659e-01,  1.8707e-05, -1.0269e-04, -5.0658e-04,
        -4.0153e-05, -2.8097e-04, -3.1844e-05, -1.6189e-03, -3.3094e-05,
        -1.1055e-04, -2.4336e-02, -3.5120e-06, -9.8476e-06,  1.2964e-05,
        -5.3884e-05,  9.9740e-05, -5.6635e-05, -1.0373e-04,  5.1850e-05,
        -6.5796e-05,  9.2937e-05, -2.1308e-05, -2.1980e-05, -1.1048e-05,
         7.9368e-05, -7.7877e-05, -5.0903e-05, -2.8684e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7214e-03,  1.7748e-01,  1.3702e-05, -3.7894e-05,  8.7414e-04,
         5.3067e-05,  1.0716e-04,  1.5086e-04,  4.2723e-03, -3.4464e-06,
         2.5360e-04,  2.3611e-02,  5.9974e-05,  2.5448e-05,  1.7397e-04,
         3.7744e-05, -3.0516e-05,  4.4486e-05,  1.1860e-05,  2.5582e-05,
         6.1369e-06,  8.0392e-05, -6.4027e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0941e-03, -4.9104e-02, -8.0946e-05, -9.7785e-05, -5.9636e-04,
        -3.1165e-05, -4.8433e-04, -9.1476e-05, -5.3623e-03, -6.2427e-05,
        -1.6875e-04, -4.9402e-02,  6.9237e-05, -4.0073e-05, -1.0828e-04,
        -1.7345e-04,  4.8666e-05,  1.0779e-05, -9.7517e-05,  3.9094e-05,
         3.0782e-05,  5.2465e-07,  4.3631e-05, -8.4888e-05, -2.2408e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7093e-03,  2.3483e-04, -6.6029e-07,  4.1195e-05, -2.1588e-04,
        -1.5493e-04,  4.7989e-04, -2.3891e-05, -1.5010e-03, -4.0514e-02,
        -2.8263e-04,  3.8975e-05,  1.1866e-04,  3.3715e-04,  1.0515e-04,
         1.2020e-04,  3.1933e-05,  2.7330e-05,  5.4746e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1980e-05,  2.8723e-04, -3.9310e-05, -1.0376e-04, -2.9151e-04,
        -2.8936e-04,  2.5142e-04, -8.6794e-06, -1.9751e-03, -1.1410e-01,
        -3.8958e-04, -3.6678e-06, -1.1120e-04,  1.0238e-04,  2.3219e-07,
         4.6672e-05, -1.5666e-04, -1.5399e-04, -7.3951e-05, -9.8566e-05,
         9.0751e-05, -9.3791e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7436e-05,  2.2152e-04,  3.7753e-05,  8.2631e-06, -2.9441e-04,
        -1.9749e-04,  2.6509e-04,  2.9580e-05, -4.6997e-04, -8.4283e-02,
        -1.6298e-04, -3.2747e-05,  7.7537e-05,  1.6724e-04,  1.0480e-04,
         2.4151e-05,  8.5637e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2736e-03, -7.7798e-02,  3.5523e-05, -1.0965e-04,  6.6285e-05,
         3.5424e-05, -8.9865e-05, -3.5372e-05, -2.1280e-02, -9.5196e-06,
        -1.8960e-04,  4.6254e-05,  9.0035e-05,  9.2055e-05, -4.8388e-05,
         2.7965e-05,  1.7398e-04, -5.1856e-05,  2.7448e-05,  3.5469e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8195e-04, -5.9826e-02, -1.6023e-05,  3.8954e-05,  5.7103e-05,
         5.5666e-05, -1.1127e-04, -6.5025e-05, -2.4681e-02, -6.8123e-06,
        -9.7690e-05,  4.1943e-05,  2.6514e-05,  2.4245e-04, -4.3841e-05,
        -4.3504e-05, -9.7814e-05,  1.4736e-05, -1.0465e-05,  1.4775e-05,
        -1.3331e-06, -4.0756e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1675e-04, -6.4478e-02, -4.7582e-05, -1.2366e-04,  4.8892e-05,
         9.9215e-05, -1.0756e-04,  2.0565e-05, -1.3672e-02,  1.9000e-05,
        -2.2098e-04,  4.8951e-05,  1.2129e-04,  3.1367e-05, -4.4368e-05,
        -8.9064e-05, -6.2337e-05,  2.7956e-05, -9.7426e-05,  1.0720e-04,
         2.2026e-05,  2.5270e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1186e-04, -3.3083e-05,  2.8043e-06, -3.5810e-06,  1.4003e-04,
        -2.4123e-04,  4.1000e-05, -2.0534e-04, -4.0154e-02, -2.6570e-05,
         9.1261e-05, -1.2113e-04, -1.4744e-06, -1.4885e-02,  4.0750e-05,
         3.7640e-06, -6.1797e-05, -1.3450e-06,  1.7134e-04, -3.1524e-05,
         5.6896e-06, -1.3655e-05,  1.3951e-05, -3.2183e-05, -1.4947e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1335e-04, -8.7628e-05, -3.8410e-06, -4.7829e-05,  3.0441e-04,
        -1.6045e-04, -3.0907e-05, -2.3121e-04, -6.8102e-02, -5.3122e-05,
         1.3027e-05, -9.3694e-05, -2.1138e-04, -1.2888e-02,  7.6543e-05,
        -1.0740e-05, -6.8734e-05,  3.0599e-05,  2.7443e-05, -1.2617e-05,
         5.1790e-05,  7.6965e-05,  4.1639e-05, -6.3728e-05,  1.9035e-04,
        -3.9788e-06,  2.4803e-05,  8.4230e-07, -6.1342e-05, -2.3996e-05,
         9.2289e-06], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-1.7730e-03,  2.1246e-01, -1.3274e-04, -3.6434e-04,  4.1898e-04,
         5.8236e-05,  1.1256e-04, -7.1511e-05, -1.0238e-04, -7.9611e-05,
        -1.4279e-04,  1.0668e-04,  1.1980e-04,  8.5683e-05, -2.0211e-04,
         1.0284e-04, -4.3307e-06, -2.6332e-04, -3.5141e-04,  6.0451e-05,
         1.0194e-04,  2.6491e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9634e-04, -1.2059e-01, -2.8807e-05,  1.5022e-04, -1.2775e-04,
         5.4925e-05, -9.2546e-06,  1.9290e-05,  7.4900e-05,  1.1323e-04,
        -2.5159e-05, -1.8428e-06,  8.9707e-06, -4.4098e-05,  1.1195e-04,
        -1.1952e-04,  3.4694e-05,  4.9950e-06, -4.5010e-05,  1.1154e-04,
        -1.6874e-05, -6.8473e-05,  4.2798e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1008e-04,  9.7423e-02,  6.6975e-05,  8.5244e-05,  2.4133e-04,
         2.9313e-04, -2.6763e-05,  2.5801e-03, -1.4230e-05, -1.6069e-04,
         1.0788e-04,  1.2226e-04,  7.2030e-05, -4.0538e-05,  1.7727e-04,
         7.2451e-05,  4.7932e-05, -3.5359e-05, -3.6012e-05,  1.3637e-04,
         1.2472e-04,  1.6781e-05, -6.9800e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0357e-04,  1.3100e-01, -7.1685e-07,  3.5948e-05,  4.4310e-04,
         7.8947e-05, -5.1063e-05,  1.6760e-03, -3.5527e-06,  4.1844e-05,
         1.0818e-04,  5.5691e-05,  5.0783e-05,  6.3625e-05,  7.5088e-05,
         7.0962e-05, -5.4793e-05, -3.8776e-05, -9.2780e-06, -4.4121e-05,
        -3.8700e-05,  2.7312e-06, -2.3224e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5910e-04, -1.8291e-01,  5.7063e-05,  1.3774e-04, -3.5180e-04,
         5.0985e-05, -1.1292e-04, -4.2605e-04,  1.1325e-04, -7.0459e-05,
         3.8639e-05,  5.5554e-05, -6.2476e-05, -1.8373e-05,  2.2596e-04,
        -1.2748e-04,  2.5140e-05, -1.4822e-05, -1.3761e-05,  1.8416e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9844e-04,  1.6098e-01,  6.9552e-05, -9.2740e-05,  2.2407e-04,
         1.2478e-04, -1.5877e-05,  1.0569e-03,  7.8725e-06, -4.5550e-05,
         1.5716e-04,  4.3482e-05, -2.2801e-05,  6.1049e-05,  3.0103e-05,
         3.8814e-05,  7.6847e-05, -8.1943e-05,  2.8837e-05,  9.1520e-05,
         5.4454e-05,  7.0904e-05, -2.4316e-06, -4.2741e-05,  1.9115e-05,
         4.7129e-05, -5.1886e-05,  8.2512e-05,  5.8320e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7477e-04, -7.7525e-02,  7.5695e-05,  1.1599e-04, -1.3500e-04,
         8.8581e-05,  7.0364e-05, -4.5641e-04,  9.1898e-05,  1.1983e-04,
        -1.5722e-05, -1.6941e-05,  1.2937e-05, -5.2268e-06,  1.3361e-04,
        -2.0442e-05,  3.6082e-06, -2.8223e-05, -3.7263e-05,  6.5899e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0098e-04,  1.3724e-01, -5.5928e-05,  1.4173e-04,  1.3810e-04,
         1.7542e-04,  4.7470e-05,  3.0847e-03, -4.4476e-05, -1.8342e-04,
         1.0302e-04,  7.9035e-05,  8.3866e-06,  6.2068e-07,  1.3400e-04,
         5.4989e-05, -5.7185e-06, -2.3158e-05,  2.0296e-05,  1.0526e-06,
         6.7413e-05,  3.9226e-05,  1.1663e-05,  1.6307e-05, -7.3145e-05,
         1.9947e-04, -5.8540e-05, -3.2535e-05,  6.2314e-05, -2.6003e-06,
        -7.5261e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3831e-04,  1.8358e-01, -2.0681e-04, -1.2047e-04,  1.9043e-04,
         1.4394e-04, -7.2740e-05,  1.1998e-03, -2.8916e-05, -3.9866e-05,
         1.4437e-04,  1.0981e-04, -4.7813e-05,  1.3743e-04, -4.6405e-05,
         5.7342e-05,  2.6367e-04,  3.5122e-05, -8.1356e-05,  1.1762e-04,
         1.6875e-04,  1.9579e-05,  3.6074e-05, -2.9357e-05, -9.7652e-05,
         2.7886e-04,  2.0405e-05, -3.9148e-05,  2.1062e-05, -3.6813e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0965e-04, -1.5733e-01,  1.1538e-04, -5.6706e-05, -3.2779e-04,
        -2.3745e-05,  3.5687e-05, -7.5586e-04,  6.9720e-06, -3.4711e-05,
        -1.0126e-04,  4.0776e-05, -2.5331e-05, -1.9348e-05,  1.2522e-04,
        -1.3324e-04, -1.8307e-05,  1.1887e-05, -4.1037e-05, -5.4456e-05,
         3.5067e-05, -1.2385e-04,  7.1246e-06,  4.0573e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9286e-04,  1.7347e-01, -6.8283e-05,  1.1036e-04,  3.4428e-04,
         1.2142e-04,  2.8660e-04,  2.5675e-03, -1.6813e-04,  6.7375e-05,
         1.5213e-04,  2.2764e-05, -5.8048e-05, -2.3084e-05, -1.9490e-05,
         1.3155e-04, -1.1582e-04, -5.9694e-05,  6.3516e-05,  1.6338e-04,
        -2.2178e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1299e-04,  1.9033e-01, -8.9525e-05, -1.4490e-04,  2.9474e-04,
        -1.2136e-04, -6.5690e-05, -2.1572e-04, -1.0380e-04, -7.4965e-05,
         9.0947e-05,  3.9253e-05, -1.3249e-04, -2.0280e-05,  4.2431e-05,
        -5.0564e-05, -3.7288e-05,  1.0927e-04, -2.8466e-05,  6.8758e-05,
        -8.7329e-05,  2.5748e-04, -5.0584e-06,  1.8798e-05, -4.8040e-05,
        -3.4211e-05, -1.3482e-04, -6.6651e-05,  1.2728e-04,  7.6993e-06,
         1.3951e-04,  7.7128e-05, -6.1134e-05,  4.4559e-05, -5.0741e-05,
         5.7372e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-2.8849e-05, -1.6122e-01,  7.8809e-05, -1.8512e-04, -4.1752e-05,
        -3.4780e-03, -4.1757e-06, -1.1557e-04, -3.3927e-03, -4.5081e-05,
        -1.4616e-04, -2.5054e-05,  2.5435e-05, -3.2196e-04,  2.7607e-05,
        -5.0109e-05,  6.2378e-05, -1.1889e-04, -1.0687e-04, -8.8897e-05,
         3.7302e-05, -2.0290e-05,  1.0470e-06,  6.0908e-06,  2.8382e-05,
        -1.1834e-04,  2.5706e-05,  2.4115e-05, -5.2004e-05, -1.1466e-04,
        -2.8111e-05,  1.7106e-05,  2.8222e-05, -5.1768e-05, -2.7422e-05,
         1.5212e-05, -2.3209e-05, -5.4199e-05, -2.4654e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5317e-04, -1.4046e-01,  2.1260e-05, -4.7201e-04, -2.0385e-05,
        -6.1177e-03, -7.9491e-05, -1.3557e-04, -9.6882e-03, -5.3906e-05,
        -2.2630e-04, -6.4568e-05,  1.0089e-04, -8.7346e-04,  1.4403e-05,
        -7.2688e-05,  1.1230e-04, -1.0344e-04, -1.5316e-04, -9.7102e-05,
        -1.4383e-04, -1.0678e-04, -1.9222e-06,  5.8412e-05, -1.5363e-05,
        -6.4171e-05,  1.3088e-05, -1.7037e-04, -1.7525e-04, -9.5205e-05,
         5.1629e-05, -5.0478e-05, -5.3107e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9216e-04, -6.9460e-02,  3.3805e-05,  5.0569e-05, -6.3319e-05,
         3.2828e-05,  6.6044e-05, -1.0999e-04,  2.3296e-05,  1.4796e-05,
         2.3614e-06, -4.1730e-05, -3.8986e-05,  1.9784e-05, -3.2200e-05,
        -2.4947e-05, -3.4359e-05,  3.6804e-05, -6.6141e-06, -1.1437e-05,
        -1.2647e-06,  1.6122e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5274e-04, -1.9150e-01, -1.2267e-04, -1.1218e-04, -3.3781e-04,
        -4.5906e-05, -1.9679e-04, -2.7846e-03,  9.6369e-05, -1.1981e-05,
        -2.2153e-04, -1.4717e-05, -1.0318e-04, -7.7054e-05, -1.2240e-04,
        -6.3543e-06, -2.5419e-04,  1.2217e-04, -1.3990e-04, -1.9540e-05,
        -3.3072e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5435e-04, -1.4797e-01,  3.3167e-06, -5.6637e-05, -9.6799e-05,
         1.3061e-04,  1.3441e-04, -1.1515e-03, -6.0551e-05,  2.1865e-05,
         5.7948e-05,  2.5460e-05,  4.7162e-05,  4.3181e-05, -1.0084e-04,
        -1.6205e-05,  9.0101e-05, -2.3902e-05, -2.2345e-05, -4.3090e-05,
        -9.1971e-05, -8.4093e-05, -3.0787e-05, -4.2065e-05, -2.4292e-05,
         8.5229e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5154e-04,  2.3116e-05, -7.8564e-06, -6.6905e-05,  9.4476e-03,
         1.9541e-04, -1.5862e-05,  5.9911e-05,  2.8573e-05, -9.4739e-05,
        -2.0710e-04,  9.1532e-02,  5.8774e-05,  1.3660e-04,  3.5321e-05,
         2.8228e-05, -1.0189e-05, -1.2068e-04, -4.5369e-07, -6.0705e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9276e-04, -1.2931e-05, -9.4799e-05, -5.1831e-05, -7.2580e-03,
        -1.2647e-04, -7.7504e-05, -8.4323e-05, -2.1700e-04,  1.8434e-04,
        -9.5459e-07, -1.1962e-01, -1.9375e-04, -1.0733e-04, -9.3181e-05,
        -4.7735e-05, -5.5598e-05,  5.8152e-06, -7.7217e-05,  3.8934e-05,
        -4.8950e-05,  3.6040e-05, -4.2452e-05, -3.6151e-06, -2.0418e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2787e-04,  8.4180e-05,  9.6163e-05,  5.1668e-05,  1.3825e-02,
         2.3821e-04,  1.0823e-04,  1.5708e-04,  2.3145e-04,  3.3218e-06,
        -1.5427e-04,  1.3522e-01, -1.5197e-05,  7.4674e-05, -7.2442e-05,
         3.5977e-05,  1.2514e-04, -8.5071e-07, -2.3477e-05,  2.8369e-05,
        -6.9516e-05, -1.8136e-05, -2.0670e-06,  7.5028e-05, -6.1797e-06,
         2.7772e-05, -1.8845e-05,  8.1051e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8110e-04, -4.9469e-02, -4.2840e-05, -9.3039e-03, -6.0363e-02,
         1.0424e-05,  4.2805e-05, -3.2182e-05, -2.7997e-05,  6.5094e-05,
        -2.3973e-05, -2.9170e-05, -1.3672e-02, -5.9940e-05, -2.2253e-05,
        -1.3250e-05, -6.6490e-05,  1.1351e-04, -7.2233e-05, -7.5017e-03,
         6.9804e-06, -6.6861e-05, -1.8407e-05, -3.6130e-05,  4.5529e-06,
        -1.3541e-05, -2.9297e-05,  5.6530e-05,  2.4264e-05, -2.1264e-05,
         4.1437e-06,  6.4691e-05, -9.8140e-05,  2.6660e-05,  1.3399e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6398e-04, -3.0249e-02, -1.2150e-05, -1.6706e-03, -2.9356e-02,
         3.7428e-05, -9.0387e-06, -3.1807e-05, -9.5694e-06,  6.4442e-06,
        -3.9410e-05,  3.9398e-05, -4.8815e-03, -4.0410e-05, -3.9762e-05,
        -2.6553e-05, -6.1682e-05,  3.5043e-05, -3.9048e-05, -8.5122e-03,
         2.3314e-05, -5.9409e-05,  4.0035e-05,  1.8329e-05,  3.8450e-05,
        -2.7441e-05,  1.4101e-05,  2.4562e-05, -2.7963e-06, -4.5502e-05,
        -2.3734e-05,  1.7686e-05,  7.9271e-07, -2.0247e-05,  2.4938e-06,
        -3.2036e-05,  4.0357e-06, -1.9007e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8106e-04, -4.8425e-02, -2.0063e-05, -3.3442e-03, -2.7553e-02,
        -1.4420e-05, -5.4520e-05, -3.9194e-05,  1.7115e-05,  1.1556e-05,
        -6.1274e-06,  1.1184e-05, -5.9324e-03, -4.3133e-05, -4.6220e-05,
        -5.8853e-05, -4.6675e-05,  6.5676e-05, -5.8413e-05, -1.2698e-02,
        -2.5342e-05, -1.9396e-05,  1.0590e-04,  4.6129e-05,  7.3563e-05,
        -5.6537e-05,  7.4183e-06, -6.3693e-06,  3.8854e-06, -9.6653e-05,
        -6.8572e-06, -1.2099e-05, -3.8059e-05, -1.2324e-05,  1.7905e-05,
        -8.6640e-06,  3.2178e-07, -4.4836e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5990e-04, -7.5531e-02,  3.3587e-05, -1.5327e-04, -4.8332e-05,
        -6.6797e-04,  4.4559e-05, -2.9950e-04, -3.0099e-02, -1.1442e-04,
         7.5349e-05, -4.2139e-06,  3.0742e-06,  3.1598e-05,  5.7948e-05,
         1.2237e-05, -1.3374e-04,  3.0062e-05, -9.3171e-05, -2.0474e-05,
        -2.5080e-05, -5.3867e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.5323e-03,  6.4463e-05, -5.5317e-05, -6.1777e-05,  3.3548e-05,
        -2.1621e-05,  9.9980e-02,  1.8008e-04,  5.8728e-05,  5.9290e-03,
         4.4548e-05,  9.7835e-05, -3.2591e-04, -3.7055e-05, -1.1402e-04,
         9.1311e-05,  6.3011e-05, -4.6583e-05,  9.6233e-06,  6.0747e-05,
        -4.7812e-05,  9.1760e-05, -8.9550e-05, -7.7069e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9592e-04,  4.9893e-05, -8.2928e-05,  2.3237e-05,  1.9782e-05,
         9.3060e-05,  3.2779e-02,  6.9090e-05,  1.7218e-04,  2.1909e-02,
        -6.4523e-05,  2.1263e-04, -2.5659e-04,  3.0801e-05, -4.6393e-05,
         5.6582e-05,  1.5052e-05, -3.7622e-05, -2.6335e-05, -1.6694e-06,
         2.1024e-05,  1.6603e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9349e-04, -1.2260e-05, -5.7616e-06,  1.7437e-05, -2.5770e-02,
        -6.2369e-02, -4.5353e-05,  5.4676e-05,  1.4739e-04,  5.7771e-05,
        -5.0253e-05, -1.3359e-04,  3.8066e-05, -1.9699e-05,  1.3477e-05,
        -7.5845e-05,  1.0255e-04, -7.5769e-05, -5.9076e-05,  2.0922e-05,
         5.6488e-05,  2.0138e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1881e-05,  4.4858e-06,  3.4556e-05,  9.3897e-05, -1.4670e-02,
        -6.2748e-02, -1.7055e-05,  1.3333e-04,  1.6047e-04,  7.2882e-05,
        -3.1191e-05,  1.3153e-05,  1.0870e-04, -1.1827e-05,  4.0915e-05,
        -5.0724e-05, -1.4703e-05,  4.6166e-05,  1.4826e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4787e-05,  5.6035e-05, -3.6764e-05,  3.8994e-05, -1.3429e-02,
        -6.1923e-02, -3.4796e-05,  9.0563e-05,  1.1785e-04,  4.0139e-05,
         5.9288e-05, -6.9078e-05, -4.9185e-05, -2.8232e-05,  2.7174e-05,
        -3.5428e-05,  3.1778e-06, -2.2013e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0276e-05, -1.8787e-06,  5.1863e-02,  1.0861e-05, -2.9858e-04,
        -2.2928e-04,  2.5278e-05, -7.7625e-05,  2.9417e-05,  1.1683e-04,
         9.2736e-05, -2.0350e-04, -1.0400e-04, -5.3743e-05,  1.6307e-04,
        -4.1844e-05,  1.5827e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.0032e-04,  1.2440e-04,  3.9507e-02,  8.0925e-05, -5.3492e-04,
        -3.8453e-05,  8.7552e-05, -1.9977e-05,  5.0678e-05, -6.8848e-05,
         1.1215e-05,  6.4311e-05, -1.9688e-04, -3.4482e-05, -1.5319e-04,
         4.4470e-05,  3.8622e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0738e-05, -5.3502e-06,  4.3712e-02,  1.2802e-04, -3.6868e-04,
        -8.5559e-05,  2.0778e-05, -3.4127e-05,  1.6694e-04, -2.1131e-05,
         8.7452e-05, -6.2528e-05,  1.1300e-04,  4.5643e-06,  1.0255e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9129e-04,  1.8054e-01,  1.3379e-05,  6.9187e-04,  2.1572e-04,
         5.0164e-03, -2.8232e-04, -2.2100e-04, -1.0815e-04, -1.0799e-04,
         1.7840e-05,  7.1700e-05,  1.4205e-05, -1.1981e-04, -1.2311e-04,
         5.1772e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.8582e-04,  2.5214e-01,  6.9776e-05, -2.1451e-04, -9.7203e-05,
         2.8553e-03, -4.5806e-04, -1.2496e-04, -2.9880e-05,  3.3403e-05,
        -1.2583e-04,  2.7272e-05, -2.4537e-04, -1.5883e-04,  5.2165e-05,
        -4.4017e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3208e-04,  1.7808e-01, -2.4096e-05,  1.9429e-04, -4.2571e-05,
         2.8829e-02, -3.1555e-04, -3.1521e-04, -3.8627e-05,  7.2234e-05,
         7.1212e-07, -4.5909e-05, -4.7788e-05,  7.5821e-05,  6.2132e-05,
        -1.0718e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8196e-04,  1.8700e-05, -4.5890e-05,  1.5022e-04, -5.3835e-05,
         5.2478e-04,  1.7745e-02, -5.1122e-05, -1.3653e-04,  1.7993e-03,
         4.1189e-05, -8.1835e-05, -1.5174e-06,  2.8405e-05, -7.5578e-06,
        -4.2212e-05,  7.3331e-05, -6.4764e-05,  6.3826e-06,  2.1000e-05,
        -1.7317e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-7.4546e-04, -1.3965e-01, -7.3285e-06, -1.0459e-05,  8.6911e-05,
         4.1813e-05,  1.1216e-04, -2.6124e-05,  8.7906e-05, -3.5273e-05,
         8.3102e-05,  4.3099e-04,  4.4384e-05,  5.7875e-05, -4.3844e-03,
         9.2598e-06, -1.2590e-05,  6.4338e-05,  2.3935e-05, -4.9893e-08,
        -1.2743e-04, -8.5279e-06, -9.1181e-05,  4.4148e-05,  9.2479e-05,
         8.4380e-05, -1.5728e-05,  5.9664e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7609e-04, -1.6693e-01, -5.7677e-06, -3.6676e-05,  6.7880e-06,
         1.7423e-04,  8.7710e-05, -1.0528e-04, -3.6392e-05, -4.0029e-05,
         1.7024e-04,  8.2765e-05,  1.9823e-04, -1.3430e-05, -3.9221e-03,
         2.4277e-05, -1.7514e-05, -7.1174e-05, -1.9376e-05, -2.4525e-05,
        -4.6393e-05, -7.5492e-07, -5.1847e-05, -1.4201e-05, -5.9431e-06,
        -2.4882e-05, -2.6631e-05, -2.9645e-05, -6.0769e-06,  1.2420e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9385e-04, -6.3059e-02, -1.3731e-05,  2.4035e-05,  7.0067e-05,
        -1.9201e-05,  1.8023e-05,  4.6860e-05,  8.9872e-05, -3.0142e-06,
         1.0187e-04,  2.6084e-04,  3.3055e-05, -1.3478e-05, -1.9079e-03,
        -1.2266e-05, -2.0953e-05, -9.7504e-06,  3.5122e-05,  3.0300e-05,
        -1.7785e-06, -2.1275e-05,  3.2722e-05,  3.5403e-05,  3.0366e-06,
         3.9021e-05,  1.6535e-06, -4.1054e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8512e-04,  1.4861e-03,  2.2544e-06,  5.2307e-04,  2.9156e-04,
        -1.3138e-04,  1.0973e-04,  2.5704e-04, -2.8764e-04, -5.7468e-05,
         1.0171e-03,  3.8629e-04,  6.3360e-05, -4.7947e-05, -2.2996e-05,
         6.1434e-05,  1.9440e-04,  1.0087e-05, -3.7387e-07,  5.8212e-06,
        -1.7049e-05, -1.5394e-05,  1.8482e-05,  2.5286e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8036e-06,  9.7448e-04, -2.9764e-05,  4.4759e-04,  1.8984e-04,
         7.0314e-05,  1.8832e-05,  6.6603e-05, -2.9048e-04, -2.9208e-05,
         1.5003e-03,  5.8115e-04, -1.0109e-05, -4.6499e-05, -5.3458e-06,
        -2.7540e-05,  8.9064e-05,  1.2680e-05, -1.3123e-05,  2.4886e-05,
        -2.0133e-05,  1.4549e-06, -1.0623e-05,  9.4429e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8714e-04,  8.5046e-04,  9.7064e-06,  1.5005e-03,  1.1024e-04,
         1.4521e-04,  6.5737e-05,  4.9419e-05, -1.7708e-04,  1.5768e-05,
         9.1838e-04,  1.9372e-04,  4.7175e-04, -4.6057e-05, -5.3370e-05,
        -3.0464e-05,  7.8023e-05,  1.6568e-05,  7.2637e-06, -4.5967e-06,
        -1.2910e-05, -7.8813e-06,  1.3848e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6200e-05, -7.2289e-02,  2.2340e-05, -2.4501e-03, -2.3475e-02,
        -1.8306e-05, -2.1488e-05,  1.7154e-04,  1.3380e-05,  3.1707e-07,
        -5.5354e-06,  2.0261e-05,  7.8536e-05,  5.1144e-05,  8.0551e-05,
         2.1531e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7500e-04, -4.9869e-02,  8.8526e-05, -2.0869e-02, -5.8428e-02,
         3.2682e-05, -3.8975e-05,  2.3883e-04,  2.7621e-05, -9.4374e-05,
        -4.6961e-06,  4.2846e-06, -5.8417e-05,  6.9552e-05,  9.9761e-05,
        -1.0541e-04,  8.2441e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9019e-05, -2.1256e-03,  5.3735e-05, -1.1035e-02, -7.1962e-02,
        -3.9272e-05,  1.1638e-05,  7.9048e-05, -7.6952e-06,  4.4929e-08,
         4.3183e-05, -5.2309e-06, -8.1948e-05, -9.1886e-06, -3.0326e-05,
        -1.2027e-04, -2.1840e-05, -6.4874e-05,  2.5529e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3708e-03, -1.0442e-03, -5.6824e-05, -1.4044e-04, -3.6757e-04,
        -8.1038e-05, -6.6548e-05, -6.9101e-05,  1.6158e-01,  2.4174e-05,
         1.9454e-05, -5.8296e-05,  4.4254e-05, -1.4257e-04,  7.1674e-05,
        -1.3866e-04, -5.0671e-05, -4.6287e-05,  1.6951e-04, -2.0232e-05,
        -5.8089e-05, -1.1813e-04,  4.3770e-05,  4.2262e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4559e-05, -4.2403e-04,  3.5547e-05,  1.6312e-05, -3.9780e-04,
         5.7299e-06, -1.4440e-04,  2.0248e-04,  8.3609e-02, -5.9758e-05,
        -1.1975e-04,  7.5809e-05, -2.7360e-05,  3.2975e-05,  2.3348e-05,
        -1.1159e-04,  1.7828e-05,  8.7470e-06,  6.2076e-05,  1.2927e-04,
         1.1620e-05,  5.3095e-05, -5.8520e-05, -5.6913e-05,  9.5315e-05,
        -5.7710e-06, -4.5291e-05,  2.6040e-05, -2.0199e-05,  1.1860e-04,
        -2.4702e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3558e-04,  9.5539e-04,  1.4456e-04,  3.1003e-05,  1.5165e-04,
        -1.1746e-04,  2.5212e-04, -2.7818e-04, -1.4073e-01,  7.3552e-05,
         9.1806e-06,  9.5231e-05, -3.4942e-05, -8.3119e-05, -2.4910e-05,
         2.1106e-05, -1.3337e-04, -1.3892e-04, -1.5654e-04,  3.1520e-05,
         2.6709e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 4.4939e-04,  2.4584e-01,  2.1669e-05, -5.0271e-05, -6.2595e-05,
        -3.2953e-04, -2.1027e-04, -8.1624e-05, -1.0527e-04,  1.5811e-04,
        -3.2723e-06, -4.6334e-05, -1.0132e-04,  9.1525e-05,  6.8859e-05,
         2.1132e-05,  3.0535e-05,  4.9868e-05, -6.2957e-05, -7.4107e-06,
        -9.2504e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0818e-04, -1.2246e-01,  4.8166e-05,  1.7035e-05,  1.3028e-04,
         1.1523e-05,  7.9184e-05,  2.6046e-04, -6.6906e-05, -3.7321e-05,
        -3.8631e-05,  5.9536e-07, -3.2076e-05, -6.0958e-05, -2.3800e-05,
         2.1811e-05, -3.2858e-05,  1.7294e-05, -2.2944e-05, -4.3412e-07,
         7.7433e-06,  2.4487e-05, -6.5522e-05,  6.2059e-05,  2.3350e-05,
         3.7703e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0327e-04,  2.2312e-01, -8.9725e-05,  3.1316e-05, -2.7932e-04,
        -1.4091e-04, -1.9277e-04,  9.9942e-05, -8.4869e-05,  2.2666e-04,
        -6.7365e-05, -3.0010e-05, -4.4605e-04, -3.8347e-04, -7.9739e-05,
        -1.3853e-04, -1.5928e-04,  3.5600e-05, -4.8315e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5973e-04, -2.7776e-01, -1.7392e-04,  6.1002e-05,  8.0655e-05,
        -1.9743e-04, -2.5033e-04, -2.4724e-04, -1.3287e-04, -3.0631e-04,
        -2.5490e-04,  4.1114e-06, -2.6166e-04, -2.1520e-04,  4.1321e-06,
        -1.9718e-04,  1.9104e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2161e-04, -1.7284e-01,  6.7082e-05,  2.9878e-04,  1.8158e-04,
         7.7509e-05,  1.0696e-04, -1.5639e-05, -6.9008e-05,  1.2638e-05,
         7.1202e-05,  1.3824e-04,  7.8110e-05,  7.6221e-05,  2.9073e-05,
         1.7508e-05,  1.2895e-05,  3.0367e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0939e-04, -1.6288e-01,  9.1461e-05,  2.0404e-04, -9.3604e-05,
        -1.9863e-05,  8.0958e-04, -2.1055e-05,  7.5063e-05,  1.4232e-04,
        -1.2383e-05,  3.3423e-05, -4.7505e-06,  6.0284e-05,  5.6687e-05,
         9.7845e-05, -9.4810e-05, -5.5082e-05,  9.2843e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6233e-04, -2.0993e-01,  1.2507e-05,  2.6540e-04,  2.0257e-05,
        -4.6755e-07,  6.0506e-04,  5.5328e-04,  1.3972e-04,  2.7250e-04,
         1.7633e-04, -1.4850e-04,  8.8933e-05,  1.1869e-04, -4.4937e-05,
         1.0721e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0059e-03, -2.9856e-01, -1.1291e-04,  1.7717e-04, -1.6590e-04,
        -4.2436e-04, -8.4687e-04, -2.8481e-05,  2.4642e-04,  1.4455e-05,
        -2.0633e-05,  9.9502e-05,  4.7553e-05,  2.1046e-04,  9.0381e-05,
        -1.4726e-04,  1.4157e-04,  1.9044e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2950e-04, -1.1008e-01, -8.5086e-05, -6.8108e-05, -1.2869e-04,
        -4.4239e-05,  5.3370e-05, -6.4405e-02, -6.3878e-05, -1.1764e-03,
        -1.0762e-04,  3.1128e-05, -6.9302e-05, -4.6758e-05, -1.4087e-05,
        -8.2701e-05, -1.7824e-05,  2.6333e-05,  5.1779e-05, -9.7417e-06,
         1.1197e-05, -1.0596e-04,  7.9538e-05,  1.1330e-04,  3.3429e-05,
         2.1995e-05, -4.9404e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3860e-04,  1.5838e-01, -8.4396e-05,  4.2298e-06, -5.6502e-05,
        -1.8529e-04, -2.3392e-04,  2.4324e-02, -7.0750e-05,  2.7247e-04,
        -3.0136e-05, -1.3186e-04,  1.0837e-06,  1.1584e-05, -2.5089e-05,
        -2.0924e-04, -9.2509e-05, -1.7954e-04, -7.5602e-05, -1.6790e-04,
         5.7935e-06,  4.9885e-05, -4.0497e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9342e-04, -7.8726e-02,  5.0222e-05,  2.1419e-05, -1.0663e-04,
         3.9320e-05,  1.1488e-04, -3.6364e-02,  6.9037e-06,  2.6929e-04,
        -1.7744e-05,  4.1968e-07, -3.9821e-05, -1.0370e-04, -5.3731e-05,
        -5.5421e-05,  9.1986e-06, -1.4033e-04, -3.6121e-06,  6.5190e-05,
         4.2216e-05, -4.5520e-05, -2.1384e-05,  1.7019e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4248e-04, -2.0143e-01,  8.9464e-05, -3.6051e-05, -5.2745e-05,
        -6.6068e-05,  8.6697e-05, -2.7997e-02,  1.1560e-05, -2.0213e-03,
         7.6694e-05,  1.2283e-04,  8.5341e-05, -1.6446e-05, -3.1310e-05,
        -1.4426e-04,  1.3463e-05, -5.8848e-06,  1.1015e-04,  1.2473e-04,
        -7.7208e-05, -8.3895e-05, -6.0137e-05, -7.4592e-05, -6.0300e-05,
         1.4773e-04,  2.8510e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-2.4472e-04, -2.6625e-04, -1.4771e-04, -9.6568e-05, -1.2635e-04,
        -2.5667e-05, -7.3837e-05,  3.8366e-06,  3.7663e-04, -3.3135e-05,
        -1.3099e-04,  3.3793e-04,  8.2701e-06,  1.1092e-05, -1.4409e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1967e-04, -5.6109e-02,  2.1968e-05, -8.9512e-03, -3.4265e-02,
        -5.9251e-05, -3.2236e-05, -1.7453e-02,  2.4293e-05,  4.5894e-05,
         8.5854e-06,  4.8857e-06,  2.5254e-05, -2.9827e-05, -3.5093e-05,
        -2.5816e-05, -1.6594e-05, -5.7795e-06, -7.6128e-05,  1.5919e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5427e-04, -1.0895e-01,  5.7165e-06, -2.0661e-02, -2.2089e-02,
         1.8201e-04, -6.3558e-05, -1.6798e-02,  1.8136e-04,  5.3170e-07,
         5.6155e-05,  6.5642e-05, -3.6262e-05,  4.9322e-05,  3.9020e-06,
        -6.0975e-05, -1.8134e-06, -2.3772e-04, -4.1164e-05,  2.3375e-05,
        -3.5439e-06,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0197e-04, -1.2015e-01, -6.5567e-06, -7.6665e-03, -2.1229e-02,
         2.8172e-05, -1.4314e-04, -3.5896e-02,  3.2165e-05, -4.3681e-07,
         4.9096e-05, -9.2886e-05, -5.0903e-08, -1.9782e-05, -3.5657e-05,
        -2.9520e-05,  2.7253e-05, -8.1947e-05, -4.7598e-05,  2.0480e-06,
        -4.4696e-05, -3.2223e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9050e-04,  6.6590e-02,  1.3375e-04, -5.8310e-05,  1.6649e-01,
        -1.3017e-04,  4.8784e-05,  3.1802e-04,  5.8619e-05,  1.2035e-05,
         1.6422e-04, -1.4364e-04,  2.1403e-05, -9.6735e-06,  9.0779e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4517e-04, -2.9777e-02,  2.6018e-04, -2.2114e-05, -1.2455e-01,
         1.5815e-04, -6.1171e-05, -1.0757e-04,  3.5743e-04,  4.0090e-05,
         7.6893e-05, -2.3683e-04, -1.2992e-04, -4.6073e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2684e-04, -6.4458e-02,  2.2217e-05,  1.0220e-04, -4.7391e-02,
         1.4346e-04,  1.3985e-06,  3.2724e-05, -6.0329e-05,  8.6681e-05,
         4.3912e-05, -7.9400e-05, -1.8912e-05, -7.1888e-05,  2.7107e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5288e-04,  2.1287e-01,  8.8132e-05, -5.0156e-04, -1.9393e-05,
         2.6914e-04, -7.4524e-05, -1.0883e-04,  9.3010e-05, -1.8272e-05,
        -4.2951e-04,  6.2170e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4212e-04,  2.2424e-01, -1.7029e-04, -1.2201e-03,  1.6942e-04,
        -1.8764e-04, -2.1208e-04, -1.6879e-04,  2.7757e-05,  9.9058e-05,
        -6.7349e-05, -1.3408e-05, -3.0167e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4514e-04, -1.0631e-01,  2.2064e-05,  2.6240e-04,  7.7638e-05,
         1.5525e-04,  4.1216e-05, -3.1408e-05, -3.7281e-05,  1.0422e-05,
        -4.9746e-05, -1.2243e-04,  5.5126e-05, -1.1268e-05, -2.2658e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2618e-04,  3.2581e-04, -9.7581e-05, -8.1149e-05, -1.0587e-04,
         3.9154e-02, -1.1100e-04, -1.0724e-04, -1.4508e-04,  2.1144e-06,
        -2.9982e-05, -5.2428e-05, -8.3514e-05, -7.9336e-05, -1.5704e-04,
         1.2913e-05,  2.6362e-05,  3.8556e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0794e-04,  1.3314e-04,  2.7515e-05,  8.3542e-05,  1.4002e-05,
         3.4741e-02, -4.0055e-05,  9.5349e-06,  4.9895e-05, -3.2245e-05,
        -1.4324e-04, -3.3786e-05, -6.7678e-05,  2.6201e-08, -4.5649e-05,
         5.9758e-05,  1.0260e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #600: [tensor([ 2.1551e-04,  4.9719e-05, -1.4720e-04,  5.0977e-05,  6.2706e-05,
         8.0236e-02,  8.1979e-03,  8.9533e-05, -6.1839e-05,  7.1321e-05,
        -8.0911e-05, -7.4886e-05, -9.5428e-05, -1.2485e-04,  5.0230e-05,
         1.8394e-05,  8.2983e-05, -2.3172e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6356e-04, -1.0128e-04, -5.2188e-05,  8.7223e-05, -9.7510e-05,
         8.0423e-02,  1.0733e-03, -1.0026e-04,  8.7139e-05,  2.9184e-07,
        -3.7588e-05,  7.2469e-05,  4.5502e-05, -2.9744e-05, -2.6576e-05,
        -7.0987e-05, -5.2101e-05,  4.2079e-05, -5.1329e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9666e-04, -1.1454e-04,  7.2359e-06,  4.0035e-05, -3.4605e-05,
         8.5235e-02,  7.5942e-03, -1.4783e-05,  1.1690e-04, -9.7263e-05,
         4.6630e-05, -7.8399e-06,  3.9420e-05,  3.7847e-05, -1.5595e-05,
        -9.6754e-05, -4.2571e-05, -3.3855e-05,  3.1481e-05, -2.7245e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4677e-04,  2.2619e-01,  5.4977e-05,  6.5780e-06, -1.0030e-04,
        -2.2481e-04,  1.2108e-02,  6.3415e-03, -2.4115e-04,  5.4040e-05,
         2.9602e-04,  8.3446e-05,  1.3703e-04,  1.3883e-04,  1.4148e-04,
         4.2359e-05, -4.2022e-05, -1.1742e-04, -6.9854e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0994e-04, -1.6345e-01, -1.6096e-04,  1.4185e-04,  1.9721e-04,
        -2.0675e-05,  1.3033e-03,  1.2493e-03, -4.2896e-05, -9.0935e-05,
        -4.9647e-05,  1.4173e-04, -3.5757e-05,  9.1055e-05, -3.8360e-05,
         6.7882e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2310e-04,  1.3043e-01,  9.2882e-05, -2.9031e-05,  1.8756e-04,
        -1.2133e-04,  3.3870e-03, -9.2969e-04,  1.4618e-05,  9.2083e-05,
        -3.5163e-04, -8.7332e-05, -7.6935e-05, -1.0900e-05,  3.6798e-05,
         5.1352e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3673e-03, -1.2531e-04,  1.6260e-05, -1.3855e-05, -3.3916e-05,
        -9.3134e-05, -1.6991e-05, -7.6739e-05,  1.3559e-01, -2.7406e-04,
        -9.7686e-05, -1.0912e-04, -2.3584e-05, -1.0623e-04, -1.3598e-04,
        -1.9555e-03, -3.8993e-05, -8.6279e-05, -9.6231e-05,  7.0769e-05,
        -7.8265e-05, -9.4963e-05, -1.9868e-05,  4.9290e-05, -2.0324e-04,
         5.0594e-05, -9.6242e-05, -4.4906e-05,  2.3389e-06, -4.4913e-05,
        -5.3936e-05, -4.0220e-05, -7.0628e-05,  4.8863e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3558e-04, -1.6479e-04, -4.8812e-06, -3.7809e-05,  8.7332e-05,
         3.9586e-05,  1.2221e-04,  1.1855e-04, -2.3075e-01,  3.5017e-05,
        -6.0140e-05,  5.8889e-05, -6.5879e-05,  3.3658e-05, -6.6338e-06,
        -5.5904e-03, -6.7180e-05, -5.3957e-06, -1.2052e-05, -1.2146e-04,
         3.6741e-05, -5.7601e-05,  4.9008e-05, -3.4338e-06,  3.7455e-05,
        -1.0276e-05, -4.8336e-05, -3.1902e-05, -5.6455e-05, -2.8147e-05,
         2.5831e-05, -1.4684e-04, -7.1198e-05, -6.7114e-06, -1.0774e-04,
        -2.1726e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7386e-04,  4.5923e-05, -1.3423e-04, -1.5414e-04, -2.1276e-04,
        -5.5489e-05, -7.1862e-06, -5.3909e-05,  2.0200e-01, -4.3681e-05,
         5.7601e-05, -4.6173e-05, -3.5998e-05, -1.2958e-04,  9.1544e-05,
        -9.8582e-04,  1.6380e-05,  1.8386e-05, -1.0523e-04,  1.1752e-04,
         6.4346e-05,  4.0624e-05, -1.4194e-05, -4.7540e-05,  8.9658e-05,
         8.6907e-05,  2.4867e-05,  4.8258e-05,  7.8969e-05,  4.5967e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1205e-04,  1.8254e-02, -2.3920e-05,  4.0635e-05,  1.5188e-05,
         1.4440e-03, -5.9941e-07, -9.3359e-06,  2.6527e-02, -1.1309e-04,
         2.3434e-02,  6.0262e-05,  1.6652e-05, -8.7610e-06,  6.9734e-03,
         4.0416e-03,  4.2186e-05, -6.6612e-06,  5.8554e-04, -2.7219e-05,
        -5.0643e-05,  3.7210e-03,  7.3023e-05,  3.3736e-04,  8.7374e-06,
        -2.0195e-05, -3.0455e-05,  2.0797e-05, -1.1061e-05,  5.7549e-06,
         5.0114e-05, -7.6193e-07,  3.9100e-06,  3.4361e-06, -3.0012e-06,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3192e-05,  1.4670e-02,  1.3073e-05, -4.5350e-05,  3.3683e-07,
         5.9443e-04, -3.2388e-06,  2.2021e-06,  3.5969e-02, -1.1360e-04,
         2.5507e-02,  2.2005e-05,  2.0769e-05, -4.7007e-06,  7.1333e-03,
         2.4993e-03, -1.6278e-05, -4.8538e-05,  4.9265e-04, -2.7189e-05,
        -9.2566e-05,  1.4183e-03, -8.0141e-05,  1.1166e-05,  5.4636e-05,
        -9.7380e-06,  4.1397e-06, -4.7047e-05,  1.0851e-05, -2.5216e-05,
        -2.2799e-05,  6.9548e-06,  1.1271e-06, -1.4669e-06, -1.5366e-05,
         5.2084e-05, -2.5838e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0708e-04,  6.9946e-03, -6.8182e-06, -5.4808e-05, -2.1138e-05,
         2.6737e-04,  2.5394e-07,  3.8610e-06,  1.7408e-02, -6.8502e-05,
         3.9465e-02,  2.9846e-05, -3.7490e-05, -2.6671e-06,  2.3819e-03,
         6.3551e-03,  2.4786e-05, -9.2529e-06,  4.6954e-04, -3.8191e-05,
        -3.9227e-05,  3.6385e-03,  2.8049e-05,  1.0692e-04,  1.3076e-05,
         1.1693e-05, -6.6464e-05,  3.1938e-05, -2.0066e-05, -1.1381e-05,
         6.1075e-06, -2.1117e-05, -1.6806e-05, -2.8537e-06,  2.2013e-05,
         2.3407e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #650: [tensor([ 1.6858e-04, -4.7127e-05,  5.1211e-05,  6.4602e-05,  2.1393e-02,
        -1.0797e-05,  3.4930e-05,  1.6610e-05, -2.6625e-05,  4.8045e-06,
        -4.7804e-05, -6.3428e-05, -3.9169e-05,  5.9958e-06, -1.2739e-04,
         2.2369e-05,  1.2976e-01, -5.9410e-05,  1.8395e-05,  1.2165e-04,
        -3.8659e-05, -6.6649e-05, -5.4396e-05,  8.8834e-05,  7.3147e-05,
        -5.4819e-05, -4.3208e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3228e-05,  1.3401e-04,  4.6862e-02,  2.9642e-05, -1.8359e-05,
         2.5552e-05, -1.5048e-04,  8.7102e-02,  7.2351e-05,  3.9598e-05,
        -1.1378e-05, -2.6268e-05, -7.8380e-05, -4.0856e-05, -6.2532e-05,
        -1.1852e-04,  3.2178e-05,  6.3225e-05, -8.4110e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0510e-04, -6.4960e-05, -1.0124e-02, -2.1667e-04,  8.9710e-05,
         2.8107e-05,  5.8467e-05, -1.8748e-01,  8.6977e-05, -2.6775e-05,
        -9.7278e-06,  1.0550e-04, -1.8286e-04, -7.5533e-05, -6.9017e-05,
        -1.9281e-05,  2.6663e-05,  1.8221e-05, -5.5530e-05, -6.4292e-05,
        -2.7359e-04, -1.7048e-04, -1.8318e-04, -6.2335e-05,  5.0408e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2893e-03,  1.2464e-05,  2.3591e-02,  9.6138e-05,  6.3538e-05,
         4.7684e-05, -1.5124e-04,  1.1828e-01, -6.7194e-05,  1.8493e-05,
         4.0010e-05, -2.2405e-06,  5.7095e-06, -4.3220e-05, -1.1561e-04,
        -8.4469e-05,  7.1815e-05,  1.9856e-05, -2.4487e-05, -3.5543e-05,
        -5.2169e-05,  1.1614e-04, -6.0329e-05, -3.8861e-06, -3.4812e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4757e-04, -1.1228e-01,  4.5325e-05, -5.5343e-02, -2.7365e-02,
         1.1415e-05, -6.7303e-06,  5.0886e-05, -5.2501e-05,  7.7260e-05,
        -5.5450e-04,  1.0804e-05,  2.1678e-04, -2.9730e-02, -4.1720e-05,
        -2.0989e-05, -3.5700e-05, -3.0579e-05, -1.0184e-04, -4.3392e-06,
        -2.7330e-05,  5.1377e-06,  9.4970e-06,  2.8135e-05, -5.6066e-05,
        -6.2223e-06, -2.2029e-05, -1.2498e-05, -4.6480e-05, -5.3718e-05,
        -4.1476e-05,  1.0933e-05,  1.3388e-05, -3.0119e-05, -2.7966e-05,
         3.2014e-05, -5.8725e-05, -4.6701e-06, -4.3877e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1631e-04, -7.2571e-02,  3.8928e-06, -1.6478e-02, -2.5787e-02,
         3.0343e-05, -1.2901e-05,  6.5745e-05, -5.7431e-06, -1.7199e-05,
        -3.3868e-02, -1.5402e-05,  1.3775e-04, -6.8438e-03, -2.2090e-05,
        -6.6249e-05,  9.8223e-06, -2.7024e-07,  4.4196e-05, -7.6539e-06,
         1.8951e-05, -4.8969e-05,  5.2469e-05, -4.1633e-05,  2.7241e-05,
        -1.1017e-05, -2.1064e-05, -1.0089e-05, -6.1860e-05, -6.0571e-07,
        -3.4974e-05, -2.2427e-05,  3.8675e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6164e-04, -1.4432e-01,  4.6047e-05, -5.9551e-03, -3.4430e-02,
         2.4373e-05,  4.4656e-05,  1.5879e-05,  9.5640e-05,  1.2006e-05,
        -6.9102e-03, -1.9525e-05,  5.0490e-04, -2.0304e-03, -8.4090e-05,
         1.9855e-05, -7.1372e-06, -2.2628e-08,  1.9793e-05,  2.3340e-06,
         5.2172e-05,  1.2586e-04,  3.1803e-05,  9.7694e-05, -6.8933e-06,
         4.4507e-05, -4.6174e-05, -1.4688e-05, -2.6017e-05,  3.0250e-05,
        -4.7347e-05,  1.1045e-05,  4.8140e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8899e-04, -2.3932e-02, -7.5845e-05,  5.7308e-05, -2.4991e-02,
        -2.2318e-05, -6.1247e-05,  7.7856e-05, -4.4443e-05, -1.3170e-01,
         8.4545e-05, -1.5365e-04, -3.1661e-05, -8.4581e-05,  6.6397e-06,
        -4.3081e-05, -1.2487e-04, -1.2234e-05,  1.9053e-05,  6.9013e-05,
        -1.7279e-05,  5.9912e-05, -2.9835e-05,  9.0635e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6448e-04, -1.3032e-01, -9.2155e-05, -3.6619e-05,  3.9898e-04,
         2.7298e-05, -2.9091e-06,  2.0418e-04,  6.7742e-05, -5.0538e-02,
         2.6323e-05, -5.9313e-05, -7.8317e-05, -4.1266e-05, -4.0017e-05,
         6.8242e-05,  8.3620e-05,  9.5624e-05, -1.6105e-05, -3.8479e-05,
        -2.5742e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3746e-04, -2.2277e-02, -4.1060e-05,  9.9484e-05, -5.1909e-02,
         6.1222e-05,  7.9912e-05,  9.9920e-05, -1.7451e-05, -5.9225e-02,
         3.5004e-05, -2.4704e-05, -1.6402e-05, -4.8705e-05,  1.3826e-05,
        -1.1017e-05,  9.6978e-05, -4.1912e-05, -3.6623e-05, -4.8730e-06,
        -1.0520e-06,  1.6618e-05,  1.0334e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0960e-04,  1.1732e-01, -3.6344e-05, -4.5948e-05, -3.6517e-04,
         1.4416e-03,  2.9161e-05, -2.9227e-03,  4.3134e-05,  8.3983e-04,
        -8.8448e-05, -2.0184e-03, -1.9374e-03, -1.7832e-05, -4.1864e-05,
        -1.1168e-05, -3.8391e-05, -3.1630e-05, -3.2078e-05, -4.5255e-05,
        -4.0374e-05, -7.2802e-05, -3.5853e-05, -6.7498e-05, -4.2469e-05,
        -1.0280e-05, -7.9485e-06, -1.0248e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4485e-05, -1.5656e-01, -1.4362e-04, -8.0922e-06,  5.1174e-05,
        -5.2661e-02,  2.2779e-05, -2.8942e-03, -1.7270e-05, -2.1675e-03,
        -1.8531e-05,  8.9537e-06, -1.1202e-03,  9.2084e-05,  9.0953e-05,
         5.1723e-05,  1.5260e-04,  3.5435e-05,  4.1723e-05, -5.4009e-05,
        -1.9004e-05,  8.0853e-06, -4.3960e-06,  1.0706e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #700: [tensor([ 4.4097e-04,  2.7155e-01, -6.5872e-05, -3.3909e-04, -7.2276e-05,
         2.0217e-05,  2.4694e-03, -2.0250e-04,  9.9634e-04,  1.8574e-04,
         5.8461e-05, -5.7014e-06,  1.5934e-05, -1.4254e-04,  2.3676e-04,
        -9.0570e-06, -3.9621e-05,  2.0468e-05,  1.2802e-04,  7.9051e-05,
        -1.5040e-04, -7.6856e-06, -8.0895e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3107e-03,  1.7259e-01, -1.8239e-04,  8.9669e-05,  9.7117e-05,
        -8.3904e-05,  1.5449e-02,  2.9510e-05,  2.0135e-02,  2.7635e-04,
         2.3080e-05,  3.1669e-05,  2.1909e-05, -3.5603e-05,  3.3624e-05,
         2.0359e-05,  1.1041e-04,  1.8744e-04, -1.2868e-04, -6.7521e-05,
         1.0564e-04, -1.6488e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2663e-04,  1.2302e-01, -4.5051e-05, -4.9579e-05,  1.3538e-04,
         2.8261e-05,  2.7095e-02, -7.4961e-07,  6.9307e-03,  2.6729e-04,
        -5.9330e-05, -7.6348e-05,  4.4983e-05,  8.7847e-06, -7.6961e-05,
        -1.6824e-05, -8.2758e-05, -4.7248e-05, -3.8143e-05,  1.6061e-05,
         8.6925e-06,  4.7546e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2765e-04,  2.2207e-04,  2.5573e-05,  1.5204e-04,  4.2781e-02,
         1.4938e-04,  5.2670e-06, -9.1803e-06,  4.7343e-05,  1.8365e-05,
         4.9922e-03,  1.2198e-05,  5.3655e-05,  3.4268e-05, -1.9047e-06,
        -4.2941e-06, -9.4653e-06, -8.0242e-06, -4.1653e-05, -1.5046e-05,
        -1.5378e-05, -4.5904e-06, -3.0025e-05,  9.1335e-06,  2.1013e-06,
        -8.4579e-06,  2.2912e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0575e-04,  8.1747e-05,  4.5161e-05,  1.7131e-04,  7.0245e-02,
         1.6653e-04,  7.8943e-05, -7.5942e-05,  3.1352e-05,  6.9268e-05,
         4.0915e-04, -6.3507e-05,  1.1620e-05,  2.0755e-05,  4.8135e-05,
         1.2563e-04,  4.8945e-06,  1.8308e-05,  8.1775e-05,  3.6565e-05,
         5.8232e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7629e-04, -9.3509e-05,  7.5691e-06,  8.2990e-05,  8.7325e-02,
         1.2186e-04,  7.3373e-05, -1.2686e-04, -7.1445e-05,  7.6419e-05,
         4.2997e-05,  1.6136e-06,  4.8764e-05, -2.8478e-05, -5.7344e-05,
        -6.3061e-06, -8.9337e-05,  4.8467e-05,  1.7831e-05, -7.6530e-05,
         8.9421e-05,  1.3615e-05, -5.5619e-05,  8.2865e-06,  4.4978e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0732e-04, -3.0761e-02, -1.8308e-05,  1.7943e-05,  6.2857e-05,
        -1.0563e-01,  9.4959e-05, -7.1297e-06, -1.9542e-02, -5.2587e-03,
         1.0605e-05,  7.3445e-05,  1.2945e-04, -1.6415e-05, -7.8710e-05,
        -3.8276e-05,  5.5563e-05,  7.8571e-05,  2.0307e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7509e-04, -2.1975e-03, -4.8238e-05,  1.0082e-04, -1.4721e-06,
        -1.4966e-01,  9.5485e-05,  2.4958e-05,  9.0354e-04, -1.9255e-04,
         2.7268e-05,  7.7625e-05,  3.2592e-05, -5.0926e-05, -5.3164e-05,
         8.3166e-05,  5.9803e-06,  2.0084e-05, -2.0683e-05,  7.0102e-05,
         3.3999e-05,  2.2167e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2618e-04, -1.1908e-01, -4.0745e-05,  7.4209e-05,  1.8055e-05,
        -4.3372e-02,  6.2544e-05,  6.3201e-05, -5.0096e-03, -3.5539e-04,
        -2.7930e-05, -6.6755e-05,  1.1513e-04,  1.4121e-05, -2.7765e-05,
        -6.2554e-05,  6.8925e-05,  4.2803e-05,  5.5081e-05, -2.0452e-06,
        -5.3719e-05,  2.0950e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1148e-04,  1.4245e-01, -1.0826e-04,  3.1785e-04,  4.4155e-06,
         4.0089e-03,  2.9659e-05, -1.5882e-06,  9.1623e-05, -9.5201e-06,
         3.6776e-05, -2.8546e-05, -7.0508e-05, -7.5259e-05,  5.0288e-05,
        -2.3829e-05,  8.9973e-05,  3.7507e-05, -8.9003e-05,  1.0868e-04,
         1.0519e-06,  4.9656e-05,  7.3114e-05,  2.0409e-05, -9.4200e-06,
        -8.9372e-06,  9.6308e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8299e-04,  2.1067e-01,  1.6886e-05,  7.8563e-05,  1.5376e-04,
         2.6553e-03, -1.5156e-04,  6.0329e-05, -8.6177e-05,  6.9910e-05,
         2.5147e-05,  7.6108e-05, -8.1910e-05, -6.4302e-05,  1.1235e-04,
         2.1197e-04, -1.9169e-04, -1.0802e-04,  6.9270e-05,  1.3769e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6418e-05, -2.8636e-01,  1.5356e-04, -1.1229e-04, -1.3852e-04,
         1.3362e-03, -4.0869e-05,  6.6151e-05,  5.5102e-05, -5.9201e-05,
         1.2166e-04, -9.1181e-05,  3.8088e-05, -5.6200e-05, -9.3841e-05,
        -7.4821e-05,  8.2187e-05, -1.3802e-04,  6.8197e-05,  4.9260e-05,
         3.9353e-05, -1.0846e-04, -6.5693e-06,  3.5488e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #750: [tensor([ 2.4504e-04, -2.4276e-04, -2.5588e-05, -1.6445e-03,  7.0764e-05,
        -1.5088e-04,  1.5617e-01, -5.9208e-05, -2.9889e-03, -2.7243e-04,
         4.2554e-05,  3.8976e-05, -1.8198e-05, -9.9302e-05, -2.3459e-05,
        -1.5734e-04,  1.1577e-04,  1.6001e-04, -1.1470e-04,  5.3601e-05,
         2.2534e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5362e-04,  5.8708e-06,  3.2802e-05, -2.3776e-04, -1.7086e-04,
         9.2557e-03,  1.1549e-02, -8.3282e-05,  3.0697e-06,  1.9613e-01,
         9.2105e-07, -7.2030e-05,  3.0824e-03, -2.6184e-05, -8.3844e-05,
        -1.3486e-04, -1.9396e-05, -1.2040e-04, -2.1442e-05, -8.8285e-05,
        -8.7686e-05,  5.6269e-05, -5.8096e-05, -1.2545e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1199e-04,  1.9590e-05,  2.1266e-06,  3.3478e-05, -2.4637e-05,
         4.6878e-03,  2.5162e-02,  4.8345e-05,  4.8830e-04,  4.2784e-02,
        -7.2166e-05, -2.7927e-05,  2.4427e-02,  1.9673e-05, -2.5028e-05,
        -3.0225e-05, -9.0631e-06,  2.6675e-05, -1.0653e-05, -4.8115e-05,
        -1.5337e-05,  1.2549e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5146e-04,  1.1256e-04, -1.7961e-05, -1.5368e-04,  1.0532e-04,
         7.9383e-03,  4.3419e-02, -7.4043e-05,  9.3029e-05,  1.0618e-01,
        -1.6607e-04, -7.7259e-05,  3.3985e-03,  5.6020e-05,  2.2672e-05,
         7.6748e-06, -2.0249e-05, -4.0913e-05, -1.3204e-05,  1.0709e-04,
         9.6570e-09, -3.4946e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3458e-04, -2.2016e-05, -1.7975e-04,  6.6300e-06,  8.7236e-05,
        -1.0439e-04, -1.6762e-01,  2.4815e-05,  7.1178e-05, -1.6265e-04,
         6.1036e-05, -5.6151e-05, -3.8940e-04,  1.0200e-05, -2.8533e-04,
        -2.5037e-04,  8.9194e-05, -1.5362e-07, -5.8985e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9301e-04,  6.7872e-05, -6.0163e-06,  1.1702e-04,  9.5545e-05,
         3.7000e-05, -1.9932e-01,  1.2252e-05, -2.0202e-05,  1.3888e-05,
        -2.8081e-05,  4.0203e-05,  9.0668e-05, -6.3355e-05,  6.0699e-05,
        -1.5458e-04,  7.1365e-05, -7.7779e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5882e-04, -5.1023e-05,  1.8958e-04,  2.4487e-05, -9.0619e-05,
        -6.4531e-05,  2.1547e-01, -3.8236e-05,  1.5762e-04,  4.0539e-05,
        -8.6034e-05, -3.8840e-05, -3.7509e-05, -5.2029e-05,  5.9989e-06,
        -8.7320e-05,  3.4520e-05, -2.3400e-05, -4.4394e-05,  8.8629e-05,
        -5.2380e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7839e-04, -1.7124e-01, -1.2826e-05,  1.7396e-05, -5.1090e-02,
        -2.6537e-02, -5.9137e-05,  1.1867e-05, -2.3041e-04, -2.0895e-04,
        -8.4438e-05,  1.7344e-05,  5.1106e-05, -8.0322e-05,  2.9913e-05,
         6.9707e-05, -1.6745e-04,  3.2651e-05,  8.9445e-05, -1.3427e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2198e-04,  1.9634e-01, -6.3928e-05, -1.9828e-04,  4.4036e-02,
         2.0822e-02, -8.1025e-05,  3.7337e-05,  6.9346e-05,  4.8833e-05,
        -9.1337e-05, -7.6459e-05, -1.0010e-04,  6.8686e-05,  8.7660e-05,
         8.5038e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9108e-04, -9.6602e-02,  5.7834e-05,  1.6826e-05, -7.2200e-02,
        -2.7458e-02,  8.8694e-05,  2.9302e-05,  3.7639e-05,  8.5432e-06,
        -2.2723e-05,  8.8442e-05,  1.0202e-04, -5.9626e-05,  1.2727e-05,
         2.0340e-05, -4.7965e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4172e-04, -8.2658e-02, -2.3734e-05,  3.9534e-04, -2.7790e-02,
         3.5394e-05, -3.0051e-05,  1.4531e-04, -3.6906e-05, -2.7890e-06,
         7.0944e-05,  1.6012e-05,  5.7963e-05,  1.5322e-05,  1.0877e-05,
        -4.7396e-05,  1.2084e-05,  3.9215e-05, -5.9639e-06,  2.2618e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7461e-04, -1.8401e-01, -7.5829e-05, -4.0983e-03, -4.7677e-02,
         2.3674e-05, -2.7208e-05,  3.8017e-05, -3.4728e-05,  6.1336e-05,
        -6.3432e-05, -3.2516e-03,  2.2331e-05, -1.0357e-04, -1.8420e-04,
         8.9496e-05,  6.6990e-06, -8.7419e-06,  3.4316e-05,  7.1994e-06,
         4.6580e-05, -4.5352e-05,  5.9707e-05,  3.6605e-05,  2.7713e-05,
        -3.0327e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #800: [tensor([ 8.3220e-05,  9.0511e-02,  1.0561e-05,  2.4969e-05, -1.4777e-04,
         2.0150e-03, -6.3668e-05, -3.7135e-05, -1.6064e-06,  9.9146e-05,
        -9.1853e-05,  3.4094e-05, -2.4473e-05,  3.5097e-02,  3.9188e-05,
         1.1288e-04,  2.5800e-05,  2.6391e-05,  3.6156e-05,  1.3649e-02,
        -7.7547e-05, -4.2395e-06,  8.1559e-02, -2.0801e-05, -3.7013e-05,
        -8.0502e-05, -1.1827e-05,  2.0141e-05, -5.9574e-05,  2.2607e-05,
        -1.6240e-05,  1.7650e-05, -4.5265e-05,  7.9002e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2093e-05,  1.4905e-01, -4.8344e-05,  2.2327e-05, -9.5492e-05,
         1.3651e-03, -7.7522e-05, -2.4350e-05, -6.6423e-05,  4.2892e-05,
        -5.7095e-05,  6.5629e-06, -7.6733e-05,  3.5330e-03,  3.4474e-05,
         6.0698e-05, -5.0370e-05, -3.9682e-05,  3.5513e-05,  2.1121e-03,
        -5.4897e-05, -2.9226e-05,  5.7002e-02, -2.1708e-05,  9.6465e-05,
         2.7998e-05, -2.2208e-06, -2.1646e-05, -3.2961e-05, -1.0308e-05,
         2.1086e-05, -4.1727e-05, -2.1283e-05, -4.6209e-06, -2.6458e-05,
        -9.3724e-05,  1.5237e-05, -5.5628e-05,  5.2799e-06, -2.5319e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1277e-04, -9.6010e-02, -1.2977e-04, -2.4261e-05, -5.6350e-04,
         3.1545e-05, -2.3993e-01, -1.0900e-05, -1.0749e-04,  9.8093e-05,
        -1.0207e-05, -5.3527e-05, -4.8477e-05,  1.0262e-04,  1.3168e-05,
        -3.4778e-04, -9.9560e-05, -1.6818e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9412e-04,  5.4227e-02, -2.7888e-04, -3.4529e-06,  1.4363e-04,
        -1.8126e-04,  2.4060e-01,  1.0426e-04, -1.5844e-04, -1.7559e-04,
        -8.9721e-05,  7.5156e-05,  2.6911e-05, -6.0221e-05,  8.1559e-05,
        -3.8973e-06, -2.6332e-04,  4.5565e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7336e-04,  2.1745e-01, -1.2806e-04, -3.9372e-05, -4.7211e-06,
         9.1123e-05,  5.5879e-02,  1.9660e-04,  1.4642e-04,  8.5765e-05,
        -1.7211e-04, -8.4524e-05,  3.2126e-05,  1.6055e-04, -1.2095e-04,
         9.7401e-05,  5.0271e-06,  8.6566e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2550e-04, -1.2202e-01,  9.1605e-05,  3.4751e-05,  3.1412e-06,
         4.6139e-05, -3.1476e-03, -2.6689e-03, -6.9206e-05,  8.4676e-05,
        -1.4090e-03,  2.8932e-05, -3.7466e-02, -1.1215e-01,  7.3073e-05,
        -1.0187e-04, -5.7066e-05,  1.7286e-05,  4.7854e-05,  3.6716e-05,
         1.4616e-05,  6.4904e-06,  3.0016e-06,  3.8316e-06,  1.4713e-05,
        -4.8706e-05,  1.8040e-05, -8.1877e-05,  4.2802e-05, -3.6527e-05,
        -1.6379e-05, -1.1349e-05, -1.1975e-05,  7.7617e-06, -3.6320e-05,
        -4.2325e-05, -6.0480e-05,  1.6446e-05, -3.3640e-05, -2.1570e-05,
        -3.8682e-05,  1.7363e-05,  3.4911e-05, -4.0646e-05, -2.1126e-05,
        -5.0119e-05,  1.7948e-05,  7.0390e-06,  7.4537e-05,  4.8271e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4683e-05, -6.8627e-02,  1.1699e-04, -5.1519e-05,  7.5114e-06,
         7.2481e-05, -1.3836e-03, -2.7657e-02, -2.1008e-05,  5.5080e-05,
        -2.3682e-04,  7.6859e-05, -1.8395e-02, -8.7038e-02,  4.0710e-05,
         1.0019e-06, -1.6635e-05, -3.5834e-05, -3.6274e-05,  5.6500e-05,
         1.1048e-05,  3.5184e-05, -5.4163e-06,  2.5021e-05, -2.3682e-05,
         1.7712e-06, -1.0960e-05, -5.3242e-05,  1.9823e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6819e-04, -1.3166e-01,  9.0100e-06, -6.5838e-06,  6.1588e-06,
        -3.2546e-05, -1.8850e-02, -1.8647e-02,  1.3757e-05,  6.5042e-05,
        -7.7847e-03,  5.5388e-06, -1.3423e-02, -6.9435e-02,  2.9127e-05,
         2.2821e-06, -7.8613e-06,  2.4189e-05, -2.5255e-05,  1.6009e-05,
         2.8942e-05,  3.8043e-05, -7.9321e-05, -5.2060e-05,  2.0629e-05,
        -1.1598e-05,  2.2849e-05, -1.4655e-05,  2.2967e-05,  2.9552e-05,
        -2.9389e-05, -2.5639e-05,  3.8541e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4754e-04,  9.8037e-06,  2.1376e-04,  7.4203e-05, -1.0723e-04,
        -2.4583e-06, -4.8833e-02, -1.2511e-01,  2.2836e-05, -5.9254e-05,
         6.9513e-05,  4.0294e-05,  2.6598e-04,  3.4552e-05, -1.1046e-04,
         1.5868e-05,  7.6015e-05, -7.7943e-05, -1.4855e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5078e-04, -2.9550e-04,  1.5036e-04, -8.1808e-05, -3.0265e-04,
        -3.5071e-05,  9.4960e-03, -2.2653e-01, -4.5587e-05, -1.4569e-04,
        -1.2667e-04,  1.0008e-04,  1.0092e-04, -1.5003e-04,  1.6378e-04,
        -1.1288e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8054e-04, -1.0515e-04,  4.7762e-05,  1.7917e-04,  4.6268e-05,
         1.1821e-04,  5.2264e-04, -1.5098e-01,  7.4761e-05,  2.4088e-05,
         8.2298e-05, -2.1530e-05,  1.2859e-04, -1.1299e-04, -8.5161e-06,
        -5.1142e-05, -6.4099e-05,  1.6997e-05, -7.5025e-05, -3.4574e-05,
        -7.7436e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6698e-04,  1.0340e-05, -1.8635e-04, -7.1422e-06,  1.5157e-04,
         1.4647e-04,  1.0463e-01,  3.0257e-02, -4.9038e-05,  6.0370e-06,
         1.6771e-05, -6.3604e-05, -1.8630e-04, -1.4309e-04,  5.6984e-05,
         9.4524e-05,  1.6031e-04,  7.4797e-06, -1.3625e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #850: [tensor([ 1.0762e-03, -2.3824e-04, -2.9839e-05, -9.1736e-05, -1.6818e-05,
        -1.3472e-04, -1.6783e-04,  5.1474e-05, -1.7581e-05,  1.5518e-05,
         5.5228e-05,  1.1308e-05,  2.1929e-05,  1.4775e-03,  2.3352e-01,
         2.1177e-06, -1.5932e-05, -2.9803e-05,  5.9224e-04,  3.0102e-05,
        -1.0273e-04,  3.2102e-05, -2.5506e-05,  1.4377e-05, -7.1007e-05,
        -3.8643e-05, -8.1114e-05,  6.2833e-05, -7.7916e-05, -1.6206e-05,
        -4.8994e-05,  2.9006e-05, -1.1785e-04, -2.5781e-05,  5.9160e-07,
         1.0816e-04,  2.7915e-05,  2.4816e-05,  2.9907e-05, -2.9983e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7455e-04,  7.5606e-03,  7.2877e-06,  6.3881e-05,  4.6977e-05,
        -1.1165e-04,  1.7876e-02,  2.3902e-05,  6.9280e-05,  2.3644e-05,
         1.0295e-05, -1.0898e-06, -3.3984e-06,  2.0395e-02,  1.9829e-02,
        -2.1050e-05,  1.9704e-05,  1.6783e-05,  8.9744e-04, -5.6861e-06,
         3.7689e-04,  3.0237e-05,  5.4287e-06,  3.1355e-05, -1.4917e-05,
         3.3941e-05, -3.3067e-06, -1.5810e-05, -2.3449e-05,  9.8836e-06,
         6.3426e-06, -6.6106e-06,  3.4834e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4163e-04,  2.3890e-03, -3.1157e-05,  1.9929e-05,  1.3931e-05,
        -9.5564e-05,  3.4664e-02, -2.2193e-05,  1.2603e-05,  3.2767e-05,
         1.8270e-05, -1.0070e-06,  1.7058e-06,  1.8866e-02,  6.4457e-03,
        -1.6620e-05, -1.1823e-05, -7.1533e-05,  5.8566e-03,  1.3041e-05,
        -1.4380e-05, -2.2502e-05, -2.4052e-05,  3.1276e-05, -2.1958e-05,
        -1.9264e-05,  6.5823e-06, -3.4115e-06, -3.1771e-05,  2.3158e-06,
         1.4370e-05, -1.2462e-05, -2.5187e-05,  7.2913e-06,  1.6987e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6062e-04, -1.6509e-01, -4.2784e-05, -1.5929e-02, -2.1442e-02,
        -6.2879e-06,  2.8052e-04, -2.5478e-05, -4.4641e-05, -1.4905e-05,
        -5.0024e-03, -3.9123e-05,  7.7694e-06,  3.9047e-05, -8.8694e-05,
        -3.6248e-05, -4.6673e-05, -2.3534e-04, -3.4140e-05, -2.4664e-03,
         2.1072e-05, -5.2763e-05, -7.7322e-03, -3.3625e-06, -2.3716e-05,
         3.7401e-05, -4.4976e-06, -2.8363e-07, -1.9374e-05, -1.3062e-05,
        -4.5943e-05,  1.7583e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2655e-04, -5.4557e-04,  2.4806e-05, -6.6371e-04, -9.6444e-02,
        -3.7369e-05, -3.9288e-03,  5.4020e-06,  1.0911e-06, -1.1005e-04,
        -2.9718e-03,  1.2619e-05, -8.2557e-05, -7.1151e-05, -3.0913e-05,
        -1.6708e-05, -1.0103e-07,  7.8795e-05, -1.4447e-05, -2.8803e-02,
         3.3808e-06, -9.2507e-05, -2.1549e-02, -1.0328e-05, -2.7156e-05,
        -5.2572e-06, -5.9268e-05, -4.3624e-06, -3.2965e-05,  5.0387e-05,
        -9.5039e-05, -5.0836e-05,  9.4189e-06, -2.5968e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1181e-04,  2.4311e-02, -2.2921e-05,  7.2287e-02,  2.2555e-02,
         9.8208e-06,  1.4832e-03,  1.2739e-05,  3.2205e-06,  3.2491e-05,
        -4.6423e-05, -7.8543e-05,  9.5589e-05, -5.5960e-05, -7.6646e-05,
        -3.1516e-05,  1.0524e-04, -2.8092e-04, -9.1746e-06,  1.5304e-03,
         8.1958e-06,  8.8337e-05,  9.2659e-03, -2.9339e-05,  1.5387e-05,
        -2.3023e-06, -9.3697e-06,  8.0866e-06,  7.2521e-07,  4.1036e-06,
        -8.3697e-06, -8.4268e-06, -9.2080e-06, -3.1189e-05, -1.5260e-05,
        -2.0424e-05,  1.2434e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0354e-05, -1.3787e-01,  3.5718e-05,  7.3147e-06,  4.0488e-03,
        -5.2133e-05,  4.9361e-04, -5.7143e-05, -2.7266e-05,  6.0221e-06,
        -5.4358e-07,  2.6015e-05, -7.2626e-05, -4.6578e-02,  1.5795e-05,
        -2.5598e-04, -9.1557e-06, -1.3664e-02, -1.0573e-04, -1.2294e-04,
        -4.3663e-05, -3.6736e-06, -1.3954e-05, -3.1844e-05, -3.8086e-05,
        -2.2659e-05,  3.5195e-05, -1.5540e-05,  1.5461e-05, -3.0800e-06,
         2.2894e-05,  2.4475e-05,  1.3432e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3294e-04, -5.6252e-02, -4.2247e-05, -1.2197e-05, -1.1401e-02,
         5.0309e-05, -9.2889e-03, -4.9984e-05,  2.5730e-05,  2.8543e-05,
         7.7001e-06,  1.0623e-04, -3.8471e-05, -8.8590e-02,  2.1121e-05,
        -5.9591e-05,  4.5471e-05, -4.6568e-02, -2.7859e-05, -2.8076e-04,
         5.0172e-06, -9.1207e-05, -3.8166e-05,  3.9604e-05,  2.7627e-05,
         6.1012e-06, -8.8136e-06, -2.4688e-05,  1.2178e-06,  4.4254e-05,
        -7.7978e-06, -3.6121e-05,  6.8331e-05, -2.2522e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4639e-04,  2.0035e-04, -4.5261e-06, -4.2420e-05, -1.3846e-01,
        -2.7586e-05, -3.5214e-03, -2.5913e-05,  1.5461e-05, -7.9416e-05,
         4.1673e-06, -4.1652e-06, -6.5234e-05, -8.1123e-03,  2.6143e-05,
        -4.2198e-04, -3.1133e-05, -2.2704e-02, -2.8899e-05, -2.3957e-04,
        -5.1100e-05, -4.1955e-05,  2.8666e-05, -3.0761e-05, -1.8935e-05,
         2.0133e-05, -7.9324e-05,  5.0904e-05, -8.2059e-06, -5.5174e-05,
        -3.2645e-05,  5.7500e-05, -3.9325e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2114e-04, -8.0496e-02,  9.0887e-07, -7.8290e-05, -4.5735e-02,
        -5.6945e-05, -3.3207e-05,  9.3408e-05, -7.2275e-05,  3.0448e-04,
        -8.3772e-05, -3.6092e-05, -2.3785e-05,  3.3547e-05, -4.8592e-05,
         2.3098e-05,  2.8061e-05, -7.4926e-06,  1.3604e-05,  6.1911e-06,
         5.3665e-05, -4.2971e-06,  4.3089e-05, -2.0050e-05,  3.4366e-05,
         1.3941e-05,  1.4264e-05,  1.0919e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7692e-04,  6.0194e-02, -1.1937e-04,  5.2304e-04,  1.7330e-01,
         1.2275e-04,  1.6824e-04,  4.7526e-05, -3.6159e-05, -5.2985e-04,
         6.6822e-06,  6.9069e-03,  1.1603e-04,  2.6541e-05,  4.9882e-05,
        -1.8796e-05, -9.0344e-05, -5.9275e-05,  4.4214e-06, -1.2163e-05,
        -8.2606e-05, -1.3251e-04, -4.1422e-05,  2.8924e-05,  5.3470e-05,
         1.0858e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0473e-04, -1.6189e-01, -1.9493e-06, -3.4158e-03, -9.1440e-02,
        -1.1989e-04, -8.0879e-06,  7.5465e-05,  9.7718e-05, -6.6744e-04,
         6.0110e-05, -1.3590e-02, -4.7991e-05, -3.7681e-05,  2.3610e-05,
        -1.8722e-05, -2.9908e-05, -4.5915e-05,  2.9351e-05,  4.4804e-05,
        -6.6199e-05,  1.3065e-04, -3.2777e-06, -1.4337e-06,  4.5124e-06,
         4.3191e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #900: [tensor([-5.4103e-05, -2.1837e-06,  3.6881e-05,  9.3202e-02, -2.6102e-05,
         3.2282e-03, -3.8199e-05, -6.8915e-06,  9.6909e-03,  3.5232e-05,
        -5.4054e-05, -8.3176e-05,  1.1418e-05,  8.1327e-03, -1.7159e-04,
        -3.2095e-06, -1.2591e-05, -1.6515e-05,  2.4490e-05,  1.5750e-05,
         4.3310e-05, -2.7222e-05, -4.1351e-05, -6.8787e-05, -2.4617e-05,
        -3.2199e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7030e-05, -1.8814e-01, -1.4390e-04, -2.2598e-04, -8.5431e-05,
        -1.0277e-02, -3.8175e-05, -1.4501e-04, -4.1666e-03, -8.2462e-05,
        -3.6253e-05, -2.3243e-04,  3.5345e-05, -1.3218e-04, -1.4799e-04,
        -1.3555e-05, -3.0499e-05, -4.8956e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0620e-04, -1.8873e-01, -1.3238e-04, -1.5221e-04, -3.2830e-05,
        -3.4481e-03, -1.1866e-04,  4.7844e-05, -9.2561e-03, -5.2060e-05,
        -8.6419e-05, -8.7851e-05, -2.5699e-05, -5.6082e-05,  1.1978e-05,
        -6.2881e-05,  5.5814e-05, -9.0838e-05,  2.7475e-05, -3.4768e-05,
        -9.5712e-05,  3.6969e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5507e-04, -2.6553e-01,  1.1456e-04, -7.3291e-05, -2.0420e-04,
        -1.7360e-02,  5.4241e-05, -1.7664e-04, -5.8921e-04, -1.2369e-04,
         5.4069e-05,  5.6826e-05,  6.6983e-05,  9.2293e-05, -9.1943e-05,
        -8.1099e-05, -5.0815e-05, -1.3109e-05,  6.6154e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5577e-04, -7.8189e-02, -1.2805e-05, -2.3937e-02,  2.5103e-05,
         1.0344e-04, -5.7432e-05, -1.4827e-01, -2.2210e-05, -1.7106e-05,
        -1.8394e-06,  8.7907e-06, -1.4879e-04, -2.1488e-02, -2.8328e-05,
         2.7437e-05, -1.6199e-05,  5.9517e-05,  2.1189e-06, -1.1394e-04,
         5.5433e-05, -8.3718e-05, -6.1458e-05,  1.1769e-04,  3.1289e-05,
        -3.5240e-05,  1.7418e-05,  3.1577e-05, -3.8729e-05, -1.9953e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6716e-04, -2.9841e-01,  2.6806e-05, -8.3600e-03, -1.1663e-04,
        -2.5917e-05,  1.5529e-04, -4.1545e-03, -1.5531e-04, -1.9188e-05,
         2.3381e-05, -1.6255e-04, -9.2959e-05, -5.9386e-03, -7.4454e-05,
        -1.2559e-04,  4.1233e-06, -5.3496e-05, -1.8924e-04,  2.3090e-05,
         1.2190e-05, -9.3383e-05, -7.0065e-07,  3.4185e-05, -2.9225e-05,
        -1.0259e-04,  6.5041e-05, -5.7521e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6944e-05, -1.2166e-01,  1.7030e-05, -5.0066e-03,  2.9262e-05,
         1.4071e-05,  9.2884e-05,  7.7688e-03,  4.3691e-05, -2.7299e-05,
         2.4832e-05,  7.8307e-05, -4.8539e-06, -1.5077e-02, -3.0734e-05,
        -1.1608e-05,  5.7356e-05, -4.1296e-05,  2.1942e-05,  2.2188e-05,
         3.9823e-05,  2.2812e-05,  2.4052e-05,  2.9493e-05,  2.7489e-05,
         5.5290e-06,  7.5827e-06,  2.2337e-05,  2.8965e-05, -8.6933e-06,
        -2.2429e-05,  9.0089e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9798e-04, -5.2869e-05,  1.5488e-04,  2.9710e-04,  1.9371e-05,
         5.3946e-04,  2.0821e-06,  1.2421e-03, -4.6166e-05, -4.7072e-06,
         1.1660e-05, -3.1325e-06, -8.0546e-06,  1.8868e-06, -6.3467e-06,
        -1.8643e-05, -2.3390e-05, -6.8907e-05, -1.6713e-05,  2.0927e-05,
         1.7706e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0652e-05, -4.3616e-05,  2.0027e-04,  3.6468e-04,  2.0073e-04,
         6.2021e-04, -6.0032e-05,  7.7675e-04, -3.3492e-05,  9.8370e-07,
         2.5869e-05, -1.0969e-05, -1.9389e-05, -6.5093e-05, -1.1139e-05,
        -7.3228e-06,  3.1851e-05,  1.9996e-05, -2.1230e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1184e-05, -5.0992e-05,  1.1366e-04,  4.2534e-04, -2.3610e-05,
         8.4343e-04, -5.4060e-05,  5.9601e-04, -4.6953e-05,  1.9856e-05,
         2.6470e-06,  1.2468e-05, -1.1561e-05,  5.3806e-05,  6.6481e-06,
         1.2114e-05,  1.0572e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5019e-05, -7.5066e-02,  2.8944e-05,  6.5597e-06, -6.4618e-04,
        -4.2696e-02,  1.9090e-04,  8.9148e-05, -2.2364e-05, -3.6752e-04,
        -6.0155e-02,  2.3967e-04,  1.5206e-04,  3.7351e-05,  5.3624e-05,
        -7.8332e-07, -3.0494e-07, -2.9949e-05, -1.3171e-04,  9.6493e-05,
         3.3124e-06, -7.6064e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2936e-04, -2.7105e-02,  7.0857e-05,  8.0801e-05, -8.4836e-03,
        -1.2414e-01, -3.0370e-05, -1.1472e-04,  5.4030e-05, -6.5956e-04,
        -8.9187e-02,  1.0861e-04,  1.0225e-04, -9.7371e-05, -4.9419e-05,
        -6.1619e-06, -3.1918e-05, -1.9384e-06, -5.4300e-05, -2.8587e-05,
         2.1795e-05, -4.6674e-05, -8.0792e-05,  1.7402e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #950: [tensor([-1.1298e-05, -4.1189e-02, -1.4880e-05,  4.6630e-05,  2.3033e-05,
         4.0351e-05,  1.0976e-05,  1.9194e-05,  1.4064e-03, -3.1927e-02,
         1.4209e-05,  4.0631e-05, -2.9731e-05,  4.5716e-05,  8.2793e-04,
        -1.8235e-04,  5.9051e-06,  1.5390e-05, -4.5713e-06,  2.7144e-05,
         6.4792e-06, -1.9348e-05, -1.1324e-05,  9.9526e-06, -1.1366e-05,
         3.4350e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6671e-04, -6.2137e-02, -2.6118e-05,  1.9967e-05,  5.5389e-05,
         1.0602e-04,  5.0250e-05, -3.3620e-06, -1.8020e-02, -5.1419e-02,
        -9.0793e-06,  3.8331e-05,  7.1182e-05,  2.4902e-05,  1.4569e-04,
        -2.8056e-03,  4.0303e-05,  6.3608e-06,  2.3832e-05, -3.6063e-05,
         2.6193e-05,  1.5535e-05, -4.0409e-05, -2.4532e-05,  1.1858e-05,
        -5.4298e-06, -6.2529e-06, -1.0714e-05, -1.0704e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6915e-05, -1.3237e-01, -2.2181e-05, -6.2060e-06, -8.1331e-06,
         2.9623e-05,  3.8472e-05, -3.4056e-06, -1.3436e-03, -5.9107e-02,
         7.7368e-05,  4.1470e-05, -4.0077e-05, -8.1146e-05,  5.6078e-04,
        -2.5846e-04, -9.5841e-05,  2.6342e-05,  9.3922e-06, -1.9533e-05,
        -1.5906e-05,  1.2226e-05, -1.8300e-06, -6.3613e-06,  5.1723e-05,
         1.2796e-06,  4.4586e-06, -8.9522e-06,  6.5248e-05,  1.3567e-05,
         7.9624e-05, -1.5463e-05,  2.1288e-06, -1.5597e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2009e-05, -1.3463e-02, -2.0882e-05, -9.8559e-07,  4.9528e-05,
         8.1497e-05, -6.8417e-05,  6.9097e-05, -2.8429e-03, -1.5439e-01,
         6.8113e-05,  2.7886e-05,  1.6077e-05, -1.5092e-05,  3.5479e-04,
        -1.4465e-03, -9.2971e-05, -6.2873e-05, -5.4005e-05, -2.7795e-05,
        -2.8222e-05, -4.8441e-05,  4.4993e-06, -3.6172e-05,  1.9988e-06,
        -1.5990e-05,  6.6444e-05, -6.1425e-05,  1.8640e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9741e-04, -8.6442e-02, -9.2186e-06,  2.9304e-05, -4.4330e-06,
         7.2090e-06, -3.2459e-05,  5.6248e-05, -6.6478e-03, -2.4076e-02,
         9.8380e-05, -3.5833e-06, -1.6195e-05,  3.1118e-05,  6.9099e-04,
        -9.8715e-03,  4.8156e-05, -6.3998e-06,  2.8621e-05,  4.9937e-05,
        -1.6628e-05, -2.4935e-05,  4.9587e-06,  2.2392e-05, -5.5835e-05,
         4.8241e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3765e-04, -1.9418e-01, -1.7211e-05, -3.9885e-05, -1.6335e-05,
         6.1618e-05, -1.5059e-05,  8.1786e-05, -1.8383e-03, -2.5131e-02,
         1.2177e-04,  1.3774e-04,  8.4756e-05,  5.9297e-05, -3.2864e-04,
        -1.1610e-02,  2.3671e-05,  1.0947e-05,  9.9685e-06,  3.5808e-05,
        -2.3809e-05, -3.3828e-05, -8.5718e-05, -3.0706e-05, -4.2020e-05,
         2.7915e-05,  1.1607e-05,  2.0028e-05, -3.2005e-08, -7.3310e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8294e-04,  3.0089e-02,  2.7546e-05,  3.1216e-05,  4.3453e-05,
        -1.4420e-04, -6.1307e-06,  6.4966e-05, -2.3970e-05,  2.8250e-05,
         1.3963e-05,  1.1771e-04,  1.0365e-04,  9.7995e-02,  4.1447e-03,
        -9.0662e-05, -3.2599e-05, -3.0064e-05,  2.0625e-05, -8.6975e-05,
         2.5663e-06,  7.4292e-06,  6.3698e-07,  2.0617e-05, -1.6923e-05,
        -2.1029e-05, -7.6094e-05,  1.0248e-05, -2.8387e-05, -9.8202e-05,
         1.5871e-05, -4.7399e-05,  8.3320e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4288e-04,  4.1762e-03,  1.5636e-05, -1.9219e-05, -1.4487e-04,
        -7.3183e-05, -1.0408e-04, -1.7890e-05, -5.5062e-05,  4.7231e-05,
        -1.3130e-04, -8.9726e-05,  1.6300e-04,  1.6451e-01,  1.8302e-03,
        -1.8103e-04,  1.1981e-04, -8.4569e-06,  4.1651e-05,  4.5291e-05,
         9.8600e-05,  4.6589e-05, -3.4346e-05,  7.0252e-05, -5.8965e-05,
         7.8755e-05, -6.5934e-05,  5.4157e-06,  4.1580e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3369e-05,  7.0784e-04, -1.1480e-05,  2.5666e-05,  4.3916e-05,
        -7.0522e-05, -7.0045e-05, -2.1830e-05, -5.9617e-05,  2.1326e-05,
        -9.0115e-05, -8.2471e-05, -9.7216e-06,  1.6545e-01, -1.3292e-05,
        -2.3537e-04, -1.5675e-04, -3.6664e-05, -7.8202e-05, -5.3424e-05,
        -3.0260e-05, -7.6946e-05, -7.5803e-05, -3.6872e-05, -1.8982e-05,
        -9.3073e-05, -7.6254e-05, -7.1214e-05, -6.2262e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4095e-04, -8.5615e-02, -1.4769e-05,  5.1617e-05, -2.1658e-05,
        -1.6759e-03,  3.1776e-05,  2.6380e-04,  2.9745e-06,  1.4504e-05,
        -2.4607e-02,  1.4828e-06,  3.1771e-04, -3.8801e-05,  6.3091e-06,
        -1.9816e-05, -3.2462e-04, -3.5358e-05, -3.3671e-03, -1.5856e-03,
        -9.3360e-06, -1.2993e-05, -6.9362e-03, -1.2600e-05,  4.2771e-05,
         5.0234e-05,  2.3261e-05,  2.6448e-05,  1.0941e-05,  1.6999e-05,
        -7.8825e-06,  1.7520e-05,  8.1398e-06, -2.0922e-05,  3.0022e-06,
        -1.7800e-05,  1.1012e-05, -3.6805e-06,  8.4449e-06, -2.3973e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7905e-04, -1.0875e-01,  3.2536e-05,  6.0792e-05,  3.4401e-06,
        -6.1422e-02,  1.4468e-05,  1.3418e-04,  2.9604e-05, -2.3622e-05,
        -1.3444e-03,  3.8481e-05, -3.8813e-04,  1.0552e-05, -1.4882e-05,
         9.4659e-06, -2.2995e-02, -3.5827e-05, -3.4951e-04, -7.6445e-03,
        -4.9515e-05, -6.1104e-05, -2.0109e-02,  1.1462e-06,  2.0505e-05,
         5.3123e-05, -1.6426e-05, -2.7363e-05,  1.7318e-06, -1.9096e-06,
        -2.9134e-05,  1.5529e-05, -2.2367e-05, -9.9689e-06,  7.7841e-06,
         2.0097e-05, -1.4661e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2607e-04, -3.2186e-02, -3.5740e-05,  3.1722e-05,  3.0240e-05,
        -2.5141e-02,  5.5855e-05,  4.1030e-04,  2.7167e-05,  1.7944e-05,
        -9.7812e-03,  1.2469e-05,  2.2277e-04, -1.2405e-05,  1.1175e-05,
         8.6917e-07, -3.0443e-02,  2.7839e-05, -4.7624e-04, -7.1060e-03,
        -6.0390e-06,  9.5718e-05, -3.4809e-02,  1.6306e-05,  3.6192e-05,
         5.0136e-05,  1.6541e-05,  1.9972e-05,  5.8035e-06, -1.7832e-05,
        -3.9883e-05, -9.8379e-06, -2.0106e-06, -1.9063e-05, -2.5901e-05,
        -4.8403e-06,  1.7252e-05, -6.6676e-06,  5.8054e-06,  2.0600e-05,
        -2.5705e-05,  1.4531e-05,  1.5099e-05,  7.1147e-06,  1.4657e-05,
        -7.0146e-06], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1000: [tensor([-2.7282e-04,  2.0684e-01,  2.3001e-06,  3.6440e-03, -4.8050e-06,
         1.2536e-03,  4.3439e-03, -2.9795e-05,  5.3426e-05,  1.3706e-03,
        -2.4049e-05, -8.6348e-05,  3.0704e-05, -1.5308e-05,  6.9089e-02,
         1.5727e-05, -4.6082e-06,  1.6060e-05, -9.2202e-06,  7.2694e-06,
         5.8871e-05, -3.0588e-06, -8.2888e-05,  3.9736e-05, -2.0555e-05,
        -8.9500e-06,  1.2419e-05, -4.7481e-05,  2.4887e-05,  1.3254e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1812e-04,  2.8032e-02, -5.0587e-05,  1.0737e-03,  3.5232e-05,
        -4.1294e-04,  1.4379e-01, -7.9490e-06,  1.1135e-05, -9.0532e-04,
        -4.0177e-05, -4.9631e-05, -9.1502e-06,  1.9761e-05,  7.6846e-03,
         1.6958e-05, -1.2547e-04, -3.8401e-06,  4.3837e-05, -7.0479e-05,
        -3.6256e-05, -4.3489e-06,  2.3065e-05,  1.9619e-05, -1.0196e-05,
        -8.6831e-05, -2.1231e-05, -4.9211e-05, -5.1770e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1297e-04,  7.7247e-03,  3.8827e-06,  5.7322e-05,  1.7176e-02,
         2.2039e-02,  2.6600e-05, -1.4240e-05,  1.7989e-06,  6.0306e-05,
         1.0686e-01, -1.8547e-06,  1.1203e-04,  7.4695e-03,  1.4081e-05,
        -7.8760e-05,  3.3808e-05, -9.6096e-05,  7.0646e-05, -6.1337e-05,
         7.2510e-06,  1.6775e-05,  6.3154e-05, -2.7166e-05, -4.7568e-05,
         3.1686e-05, -3.5224e-05,  2.2704e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2670e-05,  2.4034e-03, -3.8913e-05,  5.0008e-05,  1.7150e-02,
         2.9702e-02, -1.8188e-05,  1.0801e-04, -8.0555e-06,  8.3876e-05,
         2.3832e-03, -1.1217e-05,  1.1628e-04,  1.1227e-01, -3.3766e-05,
        -3.9437e-05,  3.1879e-06, -2.6726e-05,  3.2646e-05,  3.7703e-05,
         1.7164e-05, -4.9617e-05,  2.5601e-05, -9.0542e-06, -2.7202e-05,
        -2.2996e-05,  2.5646e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1663e-04,  2.7070e-03, -5.1560e-05, -2.9668e-05,  1.5515e-02,
         2.5007e-02,  4.0133e-05, -8.4058e-07,  1.0357e-05,  1.4322e-05,
         2.5612e-02, -5.1667e-05,  8.7686e-05,  5.4513e-02, -2.2253e-05,
        -6.1558e-05, -4.9825e-05, -2.4310e-05, -1.4434e-05, -7.5830e-06,
        -1.9939e-05,  5.7090e-06, -1.8741e-05,  4.7717e-05, -2.6385e-07,
         6.8036e-06, -3.2539e-05, -6.5935e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9993e-04,  1.8193e-03, -7.8878e-06,  1.4812e-05,  2.4650e-02,
         6.4594e-05, -5.1668e-05,  3.4293e-05,  1.3298e-03,  1.6209e-01,
        -6.3162e-05,  9.8994e-05, -9.3598e-05, -1.9270e-05, -2.4222e-05,
        -2.2848e-06, -5.0020e-05, -7.6495e-05,  6.5679e-06, -4.5637e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4963e-04,  7.0909e-03,  1.3129e-05, -1.0877e-04,  1.1040e-01,
        -2.7449e-05, -1.5374e-05,  1.6459e-05,  6.9053e-03,  3.3495e-02,
        -5.6801e-05, -6.6659e-05, -5.1350e-05, -5.6840e-06,  5.2575e-06,
         3.7535e-05, -3.8398e-05, -2.3717e-05,  2.7321e-06,  7.9007e-06,
         2.5592e-05, -1.1275e-05, -7.2541e-05, -4.2875e-06,  2.4596e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8608e-05,  9.9736e-03,  7.4646e-05, -4.4071e-05,  6.3007e-03,
         1.3571e-04,  3.6933e-08, -3.6798e-05,  3.4583e-04,  2.7345e-01,
        -4.8958e-05, -2.1137e-04, -5.0206e-05,  6.7787e-05, -3.3963e-06,
        -6.8093e-05,  1.0685e-05,  1.1389e-04,  2.9999e-05, -4.8241e-05,
        -3.7910e-05, -4.4755e-05, -9.6149e-05, -1.4475e-04, -8.5836e-05,
         3.8803e-06, -3.4286e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5142e-05,  1.8726e-01,  4.4010e-05, -9.5541e-05,  8.8646e-03,
        -6.8534e-05, -2.7841e-06,  9.1926e-06,  6.0908e-04,  4.7491e-02,
         4.8711e-05, -6.8146e-05, -1.6005e-04, -2.1002e-05,  6.4722e-05,
         9.9374e-07,  1.0985e-05, -5.9260e-05, -5.5914e-05, -4.7890e-05,
         1.1988e-05,  3.2605e-06, -1.3456e-04, -7.4379e-05, -4.4477e-05,
        -1.8917e-05, -4.2886e-05, -9.1172e-05, -7.9358e-05, -4.3197e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0726e-04,  1.0710e-03,  2.3078e-05, -1.0109e-04,  1.5008e-02,
         6.3167e-05,  4.3262e-05, -1.3134e-05,  1.0680e-03,  2.0204e-01,
        -4.0172e-05, -1.2215e-04, -1.0022e-04,  3.3833e-05, -4.2732e-05,
         4.4918e-05, -5.2808e-05, -1.6752e-04, -7.5424e-05,  4.4842e-06,
        -9.8598e-05,  1.0267e-04, -4.9967e-05, -9.0931e-05, -8.8943e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4780e-04, -3.2711e-03, -4.4153e-06, -5.7339e-05,  2.3114e-01,
         1.6115e-04, -8.6274e-05, -3.5683e-05, -1.0401e-04,  8.8146e-03,
         4.4045e-05, -1.9093e-04,  2.4661e-05, -6.1027e-05, -1.4603e-04,
         1.3925e-05, -5.3944e-05, -5.6452e-05,  1.0953e-04,  6.0211e-05,
         1.0959e-04,  1.6902e-06, -1.0710e-04, -3.6044e-05,  9.4782e-05,
         4.0148e-05, -3.8519e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3260e-04,  3.6388e-02,  3.6093e-06, -1.7647e-04,  2.4922e-02,
        -9.2890e-06, -5.7496e-06,  1.2523e-04,  6.0732e-04,  2.2222e-01,
        -3.8042e-05, -1.1581e-04, -1.2067e-04,  3.3760e-05, -7.7832e-05,
         1.3110e-05,  3.4228e-05, -2.5688e-06,  1.3709e-04,  2.5461e-05,
         1.1507e-04, -5.3770e-05,  9.3786e-06, -4.8359e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1050: [tensor([-7.4851e-05, -9.2917e-06, -1.0435e-05,  7.1427e-05, -6.1013e-02,
         2.9978e-05,  3.2496e-05, -4.4746e-02,  4.7827e-05,  7.8823e-05,
         6.9951e-04, -7.0577e-05, -1.1756e-05, -4.3602e-06,  1.8905e-05,
         6.8094e-07,  4.6730e-05,  1.1633e-05,  1.7433e-05, -6.4785e-05,
         1.9531e-05,  2.3151e-05, -8.3839e-06, -5.1799e-06, -1.1258e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3754e-04,  1.2832e-04, -8.1144e-06,  5.3549e-06, -1.4941e-01,
         9.0464e-05, -9.1803e-05, -3.5041e-02,  5.7929e-05,  1.3271e-04,
         3.2124e-04,  8.9317e-05,  7.5594e-05,  1.4615e-04, -5.7781e-05,
        -1.0056e-05,  8.2941e-05, -1.5491e-05, -3.2380e-06,  5.5027e-07,
         9.4329e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5249e-04,  3.7313e-07,  5.7657e-05, -1.3199e-04, -1.4470e-02,
        -3.7290e-05, -4.8508e-05, -1.8390e-01,  5.0213e-05,  1.2630e-04,
         9.3630e-04,  5.6801e-06,  4.9780e-05, -3.1385e-05,  1.2066e-04,
        -3.1418e-05,  1.2355e-04,  1.2050e-05, -4.5339e-05,  3.6630e-05,
        -9.8731e-06, -1.7091e-05,  1.1916e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2836e-04,  2.6214e-05,  6.6342e-05,  2.3435e-05, -1.9808e-03,
        -9.0084e-05, -5.3893e-05, -1.6280e-01,  1.0669e-04,  1.4988e-04,
         2.1406e-04, -4.9419e-05, -5.0668e-06,  2.5178e-05,  1.0854e-04,
        -3.5303e-05,  4.2908e-05, -7.2187e-05,  9.2835e-05,  3.7798e-05,
         5.5585e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8373e-05,  4.7412e-05,  1.6793e-05,  2.0304e-06,  2.9858e-03,
        -1.2413e-05, -1.0572e-05, -1.0765e-01,  2.4654e-05, -9.0109e-05,
        -2.6446e-03,  2.6348e-06, -4.5369e-05, -1.5034e-05, -2.7191e-05,
         1.6063e-05, -3.0363e-06,  5.3480e-05, -2.2943e-05,  1.2053e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8538e-05, -4.1427e-05, -5.0229e-05, -4.0819e-05, -4.3640e-02,
        -3.3954e-05, -1.6760e-05, -8.3882e-02,  7.4134e-05,  3.0154e-05,
         4.8535e-04,  1.9549e-05,  1.5906e-06, -1.8393e-05, -4.3521e-05,
         2.7980e-05,  5.0618e-06,  5.1563e-05,  2.2034e-05, -2.7572e-06,
         2.9564e-05,  6.8765e-05, -1.5147e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2366e-04, -8.2683e-06, -2.9161e-05, -4.1231e-05,  2.4138e-04,
         1.2346e-05,  3.4004e-05, -7.0858e-02, -1.0704e-05,  1.0950e-04,
         1.1638e-03,  2.6526e-05, -1.8552e-05, -4.0364e-05, -4.5650e-05,
         6.4362e-07,  4.1552e-05, -9.6045e-06,  2.5009e-05,  4.5728e-06,
        -4.6205e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4162e-05, -9.1782e-05,  5.1207e-05, -2.9245e-06, -1.4370e-02,
         4.6108e-05, -2.4660e-04, -1.9679e-01,  1.2106e-04, -1.1588e-04,
        -1.4925e-03, -8.3293e-05, -1.0550e-04, -7.1422e-05,  1.1349e-04,
         6.1260e-06,  7.0869e-05, -1.7989e-04,  3.0809e-05, -4.0823e-05,
        -2.0239e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7802e-04,  1.3943e-04,  1.2059e-04, -4.3754e-05, -1.2580e-01,
         7.8977e-05,  1.0127e-04, -1.5465e-01,  7.3501e-05,  3.0256e-05,
        -1.1458e-02, -2.4773e-05, -1.5736e-05,  8.1614e-05, -1.6781e-05,
         2.5218e-06,  6.4344e-05, -3.0215e-05,  9.7171e-06,  3.9060e-05,
        -2.2509e-05,  2.1426e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9431e-05, -6.8385e-05, -1.6712e-05,  5.1017e-05, -5.4960e-02,
         2.0187e-05, -1.7413e-05, -1.1049e-01,  1.1790e-05,  9.4118e-05,
         1.3547e-03, -1.6664e-05, -2.9983e-05, -4.2478e-05, -2.0262e-06,
         3.6269e-05,  1.4475e-05, -7.9184e-05, -4.0179e-06,  1.3458e-05,
        -1.0982e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2078e-05,  4.4519e-05,  2.9595e-05,  7.7268e-05, -4.2733e-03,
        -7.9337e-06, -7.9636e-05, -1.9479e-01, -1.0238e-04, -1.0404e-04,
         6.8504e-04,  5.9570e-05, -1.5618e-04, -1.0594e-04, -1.3114e-04,
        -1.3661e-05, -7.0342e-05,  3.5314e-05, -9.8032e-05,  3.4085e-05,
        -5.6340e-05, -2.8519e-05, -7.9932e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2286e-04, -9.2752e-05, -4.4297e-05, -1.3298e-04,  3.2481e-03,
        -3.1757e-05, -4.7715e-05, -1.7281e-01,  9.3465e-05,  1.2530e-04,
         8.0507e-04, -1.8037e-05, -8.8602e-05, -5.1163e-05, -7.6854e-05,
        -6.5110e-05, -8.4823e-05, -1.0742e-04, -3.6097e-06, -1.6060e-05,
        -1.5575e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1100: [tensor([-1.2773e-04,  2.2988e-04, -1.0062e-04, -7.2018e-05,  4.2170e-05,
        -6.4559e-05,  1.0787e-03,  2.0640e-05,  1.3279e-04, -7.0755e-05,
         4.4838e-05,  3.6346e-05, -1.1130e-01, -1.3716e-01,  2.0040e-05,
         1.1866e-05,  4.6154e-05, -7.0604e-06,  6.7918e-05, -8.3617e-05,
         8.8262e-05, -2.4078e-05, -1.2367e-05, -7.8018e-05,  3.7119e-05,
        -5.3313e-05,  6.0264e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8365e-04, -3.2677e-04, -6.4571e-06,  3.2846e-05, -7.9028e-05,
         1.7823e-05, -5.7261e-05,  1.7854e-06,  4.9496e-06,  1.6273e-05,
         1.8983e-05,  1.0571e-05,  2.9182e-02,  5.1735e-02, -1.5125e-06,
         1.2262e-05, -1.5083e-05,  5.6142e-07,  2.8909e-05, -2.8356e-05,
        -1.6602e-05, -1.2054e-06, -1.5730e-05, -1.7875e-05,  4.3471e-05,
        -2.4697e-06, -2.1870e-05,  6.5713e-08, -2.1436e-06, -1.6318e-05,
         2.2660e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7585e-04,  1.3692e-03,  6.5304e-05,  3.5134e-05, -3.6847e-05,
         3.0462e-05,  9.8952e-04,  1.6479e-05, -1.0222e-06,  5.5000e-06,
         6.2761e-05,  1.8968e-05, -4.4077e-02, -1.3273e-01,  9.6888e-05,
         8.9646e-05,  1.3317e-04, -6.4425e-05,  3.2519e-06,  7.5613e-05,
         4.2434e-05, -2.3060e-05,  1.6159e-05,  6.3045e-05, -6.2144e-05,
         5.8341e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1705e-05,  9.5493e-04,  4.6117e-05,  9.4112e-05,  3.9061e-04,
         1.3710e-04, -4.3010e-02, -1.4904e-05,  3.7555e-05, -3.0891e-02,
        -8.4400e-03,  4.1173e-05,  2.7428e-05,  1.1453e-07, -3.7893e-06,
         2.6465e-05,  1.0254e-04, -2.2030e-05,  4.7510e-05, -8.8943e-06,
        -2.5704e-05, -3.7222e-06,  1.8170e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0346e-05,  1.0734e-03,  3.2160e-05,  3.1310e-05,  6.3550e-04,
         2.0678e-04,  8.1974e-04,  1.0166e-04,  5.3283e-06, -3.9750e-02,
        -3.7452e-02,  2.4817e-05, -2.6694e-05,  9.5952e-05,  4.5303e-05,
         3.9695e-05,  1.0224e-04,  1.7467e-05,  3.9170e-05,  5.5192e-05,
         8.1802e-05, -1.3438e-06,  2.2429e-05, -1.1690e-05,  1.3994e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6674e-04,  1.1734e-03,  6.4800e-05,  2.1133e-05,  2.8187e-04,
         1.3582e-04,  4.4567e-03,  1.0538e-05, -5.6366e-06, -1.0678e-01,
         6.3075e-03, -5.4283e-06, -2.6014e-05, -1.1615e-05,  4.1170e-05,
         4.6213e-05,  1.5086e-04,  6.8030e-05,  5.9126e-05,  1.0503e-04,
         1.9247e-05, -6.0038e-06,  5.1727e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2041e-04, -3.2253e-01,  6.7555e-05,  7.2961e-05, -2.0677e-05,
        -6.2720e-05,  1.0412e-05,  2.8080e-04,  1.7940e-04, -6.8696e-05,
         2.9116e-05, -2.4284e-05,  7.5220e-05,  3.2613e-05, -7.6989e-06,
         1.6504e-05,  1.1381e-04, -1.7634e-05, -4.1062e-05,  1.2167e-04,
        -1.1761e-05, -2.1085e-05,  1.1021e-04, -7.4247e-05, -3.1077e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9886e-04,  3.3228e-01, -1.8829e-04, -8.1694e-05, -2.0925e-04,
        -8.5102e-06,  2.6196e-04, -4.8107e-05, -8.2876e-05, -1.0889e-04,
         4.1973e-05, -2.6222e-04, -8.0292e-06, -4.3205e-05,  3.6556e-05,
        -2.0153e-04, -7.4303e-05,  3.5980e-05,  8.7450e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4763e-04, -5.1020e-01,  1.4704e-04,  2.3580e-04,  5.1841e-04,
         1.7424e-04,  3.4531e-04,  4.0839e-05,  1.0566e-04,  8.3468e-05,
         2.9499e-04, -2.2309e-04,  3.1282e-04,  1.0858e-04, -3.5905e-05,
         2.8126e-04,  1.7456e-04, -1.3418e-04,  9.4149e-05,  6.7276e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7019e-05,  1.2404e-02,  1.7608e-05,  3.3807e-05,  2.3740e-02,
        -4.1252e-05, -2.5191e-05,  2.0524e-04,  2.6301e-05,  8.2912e-03,
        -4.6347e-05,  3.8805e-03, -8.6861e-06,  1.2673e-05,  2.2207e-05,
        -1.6216e-05, -1.9230e-05,  1.1765e-05,  1.2415e-05, -1.5902e-05,
         2.0918e-05,  8.3739e-06, -6.3530e-06,  5.8879e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0291e-04,  4.3513e-02, -4.2394e-05,  3.0259e-05,  6.9773e-03,
        -1.8356e-05, -7.4678e-05,  1.0328e-04, -3.5570e-06,  9.1837e-03,
        -3.9987e-05,  4.2373e-03,  4.5222e-05,  2.5194e-05, -8.1445e-05,
        -7.1044e-05,  7.3337e-06,  3.0491e-05,  8.1705e-06, -1.3675e-05,
         1.3021e-05, -6.0464e-06,  5.9386e-05,  1.3763e-05,  9.6212e-06,
         3.8305e-05,  1.2273e-05, -1.1953e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1596e-05,  1.4566e-02,  5.6763e-06, -2.9319e-05,  1.4171e-02,
        -6.0129e-05, -1.0216e-04,  1.8533e-04, -5.0412e-05,  4.2690e-02,
        -4.4577e-07,  3.5128e-03, -1.6201e-05,  5.6589e-05, -6.9184e-05,
        -2.4216e-05, -7.0438e-05, -1.8454e-05, -1.1253e-05, -1.4170e-05,
        -3.2673e-05,  1.5494e-05, -3.3883e-05, -9.3521e-05,  5.0498e-05,
         5.6132e-06,  8.7547e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1150: [tensor([ 7.7957e-06, -4.3791e-02, -7.4648e-06,  1.6850e-05, -4.8033e-05,
        -1.1435e-02, -8.1086e-02, -3.3748e-05, -8.1924e-05, -2.5988e-02,
         1.1309e-05, -7.7280e-05,  1.4813e-05, -3.3653e-02,  3.1875e-05,
        -5.4875e-05,  1.3982e-05, -1.3884e-05, -1.3375e-05,  1.3338e-08,
        -2.0649e-05, -3.2601e-05, -3.2170e-05, -7.8510e-05,  3.3294e-05,
        -9.7803e-06, -2.3731e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4381e-04,  6.7760e-03, -2.9351e-05,  4.0555e-04, -1.4752e-05,
        -2.3134e-02, -6.0995e-02,  1.1206e-05,  1.0991e-03, -6.9784e-02,
         1.1280e-05, -1.2101e-04,  1.0691e-04, -1.8047e-02,  9.7038e-06,
        -4.5394e-05, -1.2051e-05,  4.5196e-05,  2.1762e-05,  4.3824e-06,
        -4.2486e-05, -1.4184e-05, -3.1876e-05, -1.4194e-05,  5.4070e-05,
         6.6246e-05,  8.4554e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0414e-05, -5.2664e-04, -1.3093e-05,  4.1565e-05,  4.2162e-06,
        -1.5155e-02,  1.3198e-03, -4.8768e-05,  2.3187e-04, -2.0608e-02,
        -4.5163e-06, -8.4020e-05, -2.4700e-05, -3.2995e-02,  2.0470e-05,
        -6.1413e-05, -2.3595e-05, -1.8547e-05, -1.8586e-05, -1.4659e-05,
         2.2320e-06,  1.0979e-05,  5.1323e-06, -2.3447e-06, -7.0035e-06,
         1.0213e-05,  5.4123e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7546e-04,  1.5533e-05,  7.0088e-06, -2.5368e-05,  7.6966e-03,
        -2.6650e-06,  5.3416e-05,  3.4564e-02,  3.6955e-05,  1.1835e-05,
         1.3651e-03,  3.9115e-05, -1.1600e-05,  7.0970e-06,  9.0331e-06,
        -1.1987e-05, -2.2231e-05,  3.7403e-05,  8.0865e-06, -6.5000e-07,
        -2.2439e-05,  2.7543e-05, -1.1791e-05, -1.4762e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8308e-04, -2.0978e-06,  8.9218e-06,  5.2355e-05,  2.1986e-02,
        -8.7797e-07,  2.0988e-05,  2.8971e-02,  1.1016e-05, -1.2522e-05,
         9.6484e-04,  7.1900e-05,  9.1500e-06,  3.0838e-05,  3.8318e-05,
        -2.7129e-05,  2.9337e-06, -2.5297e-05, -1.0344e-05, -1.1312e-05,
         2.6715e-05,  7.7547e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1677e-04,  1.7926e-05, -1.0851e-05,  2.1849e-05,  2.3964e-02,
        -2.1178e-05,  2.6092e-05,  2.9067e-02,  1.9291e-05,  1.1890e-05,
         1.8972e-03,  3.0465e-05,  4.0346e-07,  3.7512e-05,  1.9144e-05,
         8.7232e-06, -9.2211e-06,  2.4891e-06,  2.9485e-05,  2.2174e-05,
        -2.4211e-05, -2.0047e-05,  1.8081e-05,  2.7588e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5628e-04,  4.3425e-05,  1.5099e-05,  1.0977e-01,  8.2011e-05,
        -6.6174e-07,  8.3106e-06,  3.4714e-06,  1.7420e-05,  1.0476e-04,
         7.9249e-05,  7.4078e-04, -1.1771e-06,  4.5823e-03,  7.0466e-06,
        -9.5397e-05,  6.3813e-03,  2.7470e-06,  1.0778e-04,  2.6503e-05,
         2.8809e-05, -4.5132e-05,  3.8379e-03,  8.0091e-05,  3.8624e-05,
        -3.6049e-05, -2.8316e-06, -1.4396e-05,  2.7539e-05,  7.2873e-05,
         5.3504e-05, -1.7253e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4553e-04, -1.5585e-05,  2.5737e-05, -6.9643e-02,  3.2690e-05,
        -7.4847e-06, -1.3063e-05,  6.0679e-05, -2.0480e-06,  1.8538e-04,
        -1.9928e-05,  7.8998e-04,  3.1509e-05, -3.1496e-03,  4.2160e-05,
         9.1840e-05,  1.8210e-04, -3.2884e-05,  2.3640e-05, -3.1423e-05,
         1.9710e-04, -4.0891e-05,  5.1518e-04, -3.5935e-05, -8.0836e-07,
         2.9726e-05,  1.0723e-05,  6.3485e-07, -3.5411e-06,  2.2925e-06,
        -3.2411e-05, -2.3931e-06,  1.2260e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8575e-05,  4.6988e-05,  7.7937e-06,  1.4602e-01,  5.2929e-05,
        -1.1944e-05,  5.1691e-05, -4.1686e-05,  2.8526e-05,  5.7713e-04,
         6.2538e-06,  2.6509e-03, -5.5452e-05, -1.8179e-04,  3.5130e-05,
        -8.0380e-05,  3.1681e-05,  9.0525e-06,  1.0542e-05,  7.8358e-05,
        -6.5043e-05, -7.2972e-05,  1.7237e-03, -2.2795e-07, -1.0539e-05,
         1.8911e-05,  1.3748e-05, -2.3088e-05,  5.9452e-06, -1.9394e-05,
        -2.6006e-07,  5.5424e-05, -4.4763e-05, -2.0010e-05, -2.4272e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5691e-04, -1.7551e-01, -1.9807e-05, -3.3427e-05,  5.1665e-05,
        -3.9800e-05, -2.9378e-02, -3.0230e-03, -3.8085e-05, -6.4669e-05,
        -7.1438e-05, -6.7932e-05, -1.0273e-04, -6.0267e-05, -5.1185e-05,
         3.8390e-06,  1.9427e-05, -1.1160e-04, -8.6198e-05, -8.6748e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0867e-04, -7.7078e-02,  6.1173e-07,  3.5458e-05,  3.7949e-04,
        -1.2960e-04, -1.5737e-01, -1.2992e-01, -8.8577e-05, -4.0342e-05,
         1.2994e-04, -4.8845e-05, -5.5734e-05,  8.2980e-05, -1.2458e-05,
         1.3470e-05,  1.8199e-04,  1.0382e-05,  2.4443e-05,  9.0126e-05,
        -1.1413e-04,  2.3124e-05, -5.0895e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5215e-05, -1.2563e-01, -1.5368e-05,  9.8108e-06, -2.5682e-05,
         9.0318e-06, -3.1783e-02, -2.5362e-02, -1.1454e-05,  1.6180e-05,
        -7.4466e-06,  3.0361e-05, -2.6329e-05,  4.6726e-05,  4.3809e-05,
        -3.4978e-05,  7.7143e-05, -3.6454e-05,  6.3365e-05, -1.3579e-05,
        -8.9117e-05, -1.1817e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1200: [tensor([ 7.2684e-04, -6.7267e-02, -2.9082e-05,  1.2352e-04, -1.7263e-02,
        -1.0199e-05,  8.5238e-04, -2.8990e-05, -1.0494e-01, -4.0644e-05,
        -2.8824e-05,  6.0947e-07,  3.3955e-06, -2.6506e-05,  5.7003e-05,
         4.8337e-05,  2.2653e-04, -1.4201e-04, -4.6893e-05,  1.1628e-05,
        -3.3754e-02,  2.4744e-05, -2.5793e-05, -2.7719e-05,  4.8712e-05,
         3.2795e-05, -2.3137e-07,  7.0121e-06,  5.1267e-05,  4.7085e-05,
         1.2460e-05, -3.8003e-05, -1.4876e-05,  3.8553e-05,  3.9056e-05,
         1.8051e-05,  1.0439e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6806e-04, -3.7069e-02,  2.7756e-05,  1.5467e-04, -2.2461e-01,
        -1.2902e-04,  4.8640e-04, -1.9018e-04, -1.4068e-01,  5.4413e-05,
        -6.5391e-06,  9.9413e-07, -2.3494e-07, -1.7785e-04, -6.8727e-05,
         4.2170e-05,  1.1202e-04,  1.3696e-04,  1.0673e-05,  2.2418e-05,
        -8.4415e-02,  1.0154e-04, -2.9648e-05, -3.7681e-05,  4.3281e-05,
        -1.1685e-05,  4.4410e-05,  8.5163e-05,  5.1925e-05, -2.7491e-05,
        -2.8015e-05, -1.5215e-05, -3.8608e-05,  2.4257e-05,  2.2366e-05,
         7.2638e-05,  1.0365e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8603e-04,  9.6307e-02, -7.2189e-06, -2.1793e-05,  4.8163e-03,
        -7.5940e-06, -4.7832e-04,  3.4057e-05,  4.8683e-02,  3.3212e-05,
        -2.9188e-05, -3.2134e-05,  4.1084e-05, -5.0199e-05,  1.5230e-05,
         1.5431e-05, -1.1858e-06,  1.9888e-05, -3.0913e-05,  5.6897e-06,
         1.1603e-02,  1.5008e-05,  6.9884e-07, -3.4757e-05, -4.3708e-05,
         1.0763e-05,  3.2171e-06, -6.6093e-06,  2.4410e-05, -2.4132e-05,
         3.9742e-06,  1.1141e-05,  6.9640e-06,  2.1164e-06, -9.2036e-06,
         8.2039e-06,  6.6225e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0024e-05, -1.6093e-01,  1.1478e-05,  1.7840e-04, -6.7295e-02,
        -1.0843e-04,  2.5402e-04, -3.5905e-05, -6.1091e-02,  2.6325e-05,
         6.5271e-05, -1.8425e-05,  7.7513e-05, -6.3602e-05, -4.6380e-05,
        -5.8966e-05,  1.7062e-04, -1.6325e-05,  4.7628e-05, -2.3789e-05,
        -1.2912e-01, -2.1809e-05, -4.8126e-05, -5.1055e-05,  1.1084e-05,
         4.3704e-05,  9.9876e-05,  3.6752e-06, -1.9304e-05, -5.0786e-06,
         2.7560e-05,  7.3064e-05, -2.5386e-05, -2.2377e-06,  5.4534e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2314e-04, -1.9283e-01, -5.5678e-05,  9.3438e-05,  4.0347e-03,
        -5.6759e-05,  1.5364e-03, -3.9496e-05, -6.6715e-02,  6.1515e-06,
        -7.2104e-05,  5.3728e-05, -8.8975e-05, -3.7467e-05, -5.7874e-05,
        -6.3150e-05,  5.4553e-04, -4.1082e-05, -6.6464e-05, -3.2612e-05,
        -9.7148e-02,  3.5505e-05, -2.7067e-05, -3.0684e-05, -3.0542e-05,
        -2.5341e-05,  5.3334e-05, -1.7589e-05,  8.2180e-05,  5.0180e-05,
         2.0429e-05, -5.4117e-05,  2.3017e-05, -7.6146e-05, -2.4556e-05,
        -2.3863e-05, -1.0579e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7283e-05, -1.6804e-01, -9.4199e-05,  2.5119e-05, -4.0935e-02,
        -7.5690e-05,  6.3282e-04, -4.3581e-05, -9.9940e-03,  5.3005e-06,
        -2.2185e-05,  4.0592e-05, -2.2008e-05,  6.3056e-06, -3.2449e-05,
        -3.7937e-06,  3.7360e-04, -9.1949e-05, -7.7503e-05,  1.2855e-05,
        -1.7068e-01, -8.7850e-05, -2.5277e-05, -1.0517e-04,  6.7074e-05,
        -1.3125e-05,  8.7686e-05, -4.0550e-07,  6.4383e-05,  3.9510e-05,
        -4.5615e-05, -3.0338e-05, -8.1469e-05,  1.4650e-05, -2.9801e-05,
         5.7223e-05, -6.1347e-05,  4.2958e-05,  7.8332e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7384e-04, -8.1508e-04,  1.1804e-05, -1.0049e-04,  1.0716e-02,
        -1.0562e-05, -1.1625e-04,  4.2963e-05,  7.3719e-02,  9.7792e-06,
         3.0253e-06, -4.0834e-05,  1.4201e-06, -4.3172e-05, -1.4853e-05,
        -1.6821e-05, -1.0953e-04,  3.4234e-05,  7.8578e-06,  4.3350e-05,
         3.9585e-02,  9.2575e-06, -9.4447e-06, -1.6782e-05, -1.2016e-05,
         1.2329e-05,  1.3929e-05, -3.4776e-05,  2.0416e-05, -5.2952e-05,
         7.8542e-06,  9.2944e-06, -2.2527e-05,  1.0144e-05,  1.8753e-05,
         3.3160e-05, -4.5345e-05,  1.3586e-05,  2.8518e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2316e-04, -7.9587e-02,  3.7059e-05,  5.9322e-04, -2.7060e-02,
        -9.7533e-05,  6.5141e-04, -2.7993e-05, -1.3210e-01, -6.5679e-06,
         1.6402e-05,  6.2790e-05,  6.5819e-06,  3.4542e-05, -2.3799e-05,
        -1.4806e-05,  2.2728e-04, -8.1997e-05, -2.2053e-05, -2.3118e-05,
        -1.2321e-01,  4.2126e-05, -6.8000e-05, -7.2604e-05, -1.2531e-05,
         8.5560e-05,  5.6277e-05, -1.8128e-05,  6.3301e-05, -1.6325e-05,
        -2.6852e-07, -8.5190e-05, -3.9174e-06, -2.0528e-05,  4.2865e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1263e-04,  4.4603e-02,  8.8602e-07, -7.6404e-05, -8.1517e-04,
        -2.3713e-05,  1.9051e-05,  2.6063e-06,  1.9431e-02, -5.0630e-06,
        -1.0758e-05, -2.8868e-06, -1.4668e-05,  1.4226e-05,  8.3026e-07,
        -4.4615e-06,  2.1487e-04,  1.5811e-06,  1.1348e-05,  4.9881e-05,
         3.4260e-02,  2.2225e-05,  1.0633e-05,  3.0641e-05, -2.2867e-06,
        -1.2583e-05, -1.9870e-05,  6.8331e-06, -2.7090e-06, -2.7888e-05,
        -1.4511e-05, -4.9197e-06, -2.4059e-06, -1.9569e-05, -8.5626e-06,
        -1.4614e-05, -2.7817e-06, -8.0576e-06, -1.0208e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4622e-04, -1.1858e-01, -9.1501e-06,  1.0986e-04,  1.6726e-03,
        -4.5452e-05,  1.9316e-04, -5.2562e-05, -4.4672e-03,  1.5526e-06,
        -2.7516e-05,  4.2468e-05, -2.6079e-05,  4.0603e-05,  3.6263e-05,
        -2.2143e-05,  5.5243e-04, -7.0459e-06, -1.1771e-05,  3.6244e-05,
        -2.4467e-02, -8.7569e-06, -5.1097e-06, -2.7486e-05, -2.7039e-06,
         1.3382e-05,  1.6591e-05,  3.1029e-05, -2.1468e-05, -4.4243e-05,
         1.7352e-05, -2.6535e-05, -4.3370e-05, -8.0278e-06,  9.9904e-06,
         3.3204e-05,  2.4115e-05, -5.0542e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2025e-05, -6.8101e-02,  2.3073e-05,  5.2409e-04, -2.8301e-02,
        -3.7431e-05,  5.4176e-04, -6.2555e-05, -2.2117e-01, -6.1759e-06,
        -1.6646e-05,  2.5109e-05,  4.3493e-05, -6.0349e-05,  1.8735e-05,
         7.3034e-05,  2.7511e-05, -3.9385e-05, -5.9136e-05,  1.6393e-04,
        -2.9009e-02, -2.5211e-05, -5.3588e-05, -1.0119e-04,  1.1462e-04,
         5.9090e-05,  1.6292e-05, -3.0141e-05, -1.0361e-05, -1.9126e-05,
        -1.9897e-06, -3.6242e-05,  5.3828e-05, -7.6954e-05,  5.7821e-05,
         3.3787e-06, -5.7042e-06,  1.6271e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0407e-04, -1.0775e-01, -1.4300e-05, -1.5733e-04, -5.7951e-02,
        -4.0151e-05,  5.7176e-04, -7.6027e-05, -1.5437e-01, -1.1739e-04,
        -6.2933e-05, -2.7709e-05, -5.3735e-05, -5.0485e-05, -5.8930e-05,
        -2.6427e-06,  2.6116e-04, -8.2934e-05,  2.0925e-05, -2.6914e-05,
        -9.6671e-02, -1.5150e-06, -5.4807e-05, -4.8213e-05,  8.5220e-05,
        -1.0918e-05,  5.2621e-06, -1.7754e-05,  3.0152e-05, -6.5791e-05,
         2.5359e-05, -2.2664e-05, -2.1900e-05, -2.3105e-05, -1.6792e-05,
         6.7500e-06,  2.8744e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1250: [tensor([ 1.1983e-04,  3.1002e-04,  3.6948e-05,  2.2716e-05,  2.2739e-05,
         2.7856e-03,  5.4044e-02, -4.6241e-06,  2.8106e-03,  2.8842e-05,
        -2.4686e-05,  1.3733e-02,  2.2827e-05,  1.7677e-05,  2.4470e-05,
         1.6297e-05,  1.0708e-05, -5.7981e-06,  1.5359e-05, -1.7360e-05,
         7.4639e-06,  3.1252e-05,  2.1514e-05,  2.3652e-06,  2.0726e-05,
         1.6600e-05, -1.2334e-06, -1.3350e-05,  1.8811e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7006e-05,  7.0240e-04, -7.1599e-06,  7.2561e-06,  1.7176e-05,
         1.0538e-02,  2.3851e-03, -6.2725e-06,  2.1749e-03,  1.1306e-05,
         3.7467e-04,  6.5213e-03,  6.1224e-07, -2.5396e-06,  8.7289e-07,
         1.8793e-04,  1.6977e-05, -7.6400e-06, -1.1955e-05, -3.8567e-05,
        -1.7792e-05,  1.1978e-05, -2.6692e-05,  1.0946e-06,  9.7043e-07,
        -4.4801e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2514e-05,  1.8019e-05,  4.8943e-05, -4.6302e-06,  2.4365e-05,
         8.4171e-05, -1.9142e-07, -9.1361e-06, -5.4130e-06, -5.4049e-07,
         5.4434e-06,  2.4094e-05, -6.2746e-06,  1.6437e-05, -3.5211e-06,
        -1.8124e-05,  1.4710e-05,  1.3385e-06, -7.9184e-07,  6.7226e-06,
        -6.9236e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9629e-05, -4.7184e-06,  3.4967e-05, -2.4770e-05, -1.6741e-05,
         3.2654e-04, -2.8728e-05, -3.1126e-06,  5.9412e-05, -1.0736e-06,
         7.7780e-06, -1.2134e-05, -1.6236e-05,  5.9294e-05, -1.2369e-05,
        -3.2490e-06,  2.3882e-05,  8.3875e-06,  3.1543e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3093e-04,  2.4620e-05,  4.4926e-05, -1.0851e-05,  1.7509e-05,
         1.0921e-04, -4.0898e-05, -8.7390e-06,  1.9339e-04, -4.1464e-08,
        -1.1648e-05, -8.3843e-06,  1.2675e-05,  7.9935e-06,  4.9228e-05,
         1.3962e-05, -3.3552e-06,  1.0854e-06,  3.6665e-06,  6.7297e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2615e-05,  6.5562e-04, -1.0762e-05, -9.0451e-06,  1.4931e-04,
         4.1763e-05,  1.4674e-05,  8.7274e-03,  3.2096e-06, -2.2049e-06,
        -3.2030e-06, -1.6887e-05, -1.6753e-06,  2.3048e-06,  4.5617e-06,
         1.3509e-05, -2.3473e-06, -1.9010e-06, -7.8747e-06,  3.7595e-06,
         3.6744e-06,  4.5143e-07,  3.5537e-06, -6.3167e-06,  7.5823e-06,
         4.9158e-05,  3.1015e-06, -8.8936e-06,  4.1562e-06,  3.1024e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5654e-06,  9.2501e-04, -3.8684e-05,  1.8572e-05,  4.6067e-04,
         3.3531e-05,  7.6547e-06,  2.0086e-03,  3.3900e-05, -6.6002e-06,
        -1.1859e-05, -4.9812e-05, -4.6347e-06,  1.6048e-05,  9.6171e-06,
         2.1526e-06, -5.2513e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5639e-04,  4.4366e-03, -1.1770e-05,  1.9628e-06,  6.0281e-05,
         3.7885e-05,  4.1325e-05,  2.8068e-03,  1.1678e-05, -3.6448e-06,
        -1.2473e-05, -4.6505e-05, -1.1454e-06, -3.0392e-06,  1.4888e-05,
         4.4642e-07, -3.2334e-05,  1.9768e-06,  4.0630e-06,  4.5478e-06,
        -1.2933e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9590e-05,  2.2643e-02, -6.1003e-06,  1.7807e-05, -3.7338e-05,
         5.2419e-06,  2.2657e-04,  9.8762e-06, -1.1482e-05, -2.5572e-05,
        -1.3170e-05,  5.2444e-06, -4.0285e-06,  6.5862e-06,  2.7681e-05,
         2.1685e-05,  1.1019e-06,  5.5095e-07, -2.6263e-05, -6.4440e-06,
        -1.6737e-06, -9.5445e-06, -1.5365e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5837e-05,  1.2885e-03, -2.5685e-05,  2.5109e-04, -3.3969e-05,
        -1.0307e-05,  2.3450e-03,  3.6162e-05, -1.3713e-05, -5.9875e-06,
        -1.6760e-05,  5.4254e-06,  1.1904e-05, -2.4823e-06, -4.9909e-06,
         7.4099e-07,  7.2939e-06, -1.4270e-05,  2.6391e-06,  5.7035e-07,
        -1.0777e-06,  1.4758e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2851e-05,  1.2589e-03, -9.5942e-06,  6.8891e-07, -4.3847e-05,
        -2.0320e-05,  1.5439e-03, -7.8979e-07, -1.3427e-05, -5.5824e-06,
        -1.0106e-07,  1.3635e-06, -3.7964e-06,  1.9847e-05, -2.6345e-05,
        -1.1561e-05,  1.1301e-06, -4.6194e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0145e-05,  4.8988e-03,  2.6270e-06, -1.7353e-05, -7.1249e-06,
         2.2424e-05,  1.3461e-05,  4.0954e-03, -8.0013e-06,  4.0934e-06,
        -6.9907e-06, -1.3598e-05,  4.7951e-06,  1.3606e-07,  3.2824e-06,
         7.0838e-06, -7.2292e-06, -3.1395e-05,  1.6070e-06,  7.9927e-07,
        -2.5565e-05, -6.1731e-06,  1.8398e-06,  5.2854e-06, -2.7811e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1300: [tensor([ 1.7707e-04, -9.7056e-03, -1.8166e-05,  1.3832e-05, -1.1228e-02,
         6.4413e-06,  2.1903e-05, -5.2061e-03,  1.1398e-05,  2.1714e-05,
         4.9666e-05,  1.8672e-05,  4.0994e-05,  1.9602e-05,  2.1132e-05,
         1.8798e-06,  2.1340e-05, -1.2270e-05,  2.6968e-06,  5.0437e-06,
         1.4996e-05,  1.0389e-05,  5.5079e-06, -1.0515e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2013e-04, -6.9598e-03, -1.9830e-05,  2.2397e-06, -1.9934e-02,
         2.9449e-05,  3.1555e-05, -4.3009e-03, -1.8634e-07,  2.5289e-05,
         7.1476e-05,  8.1977e-06,  1.7348e-05,  3.8495e-05, -9.6795e-06,
        -3.9262e-06,  1.8033e-05, -5.7946e-06, -4.0989e-06,  6.6992e-06,
        -9.1431e-06,  2.7589e-05, -8.4254e-06, -1.5773e-05, -1.1761e-05,
        -5.8281e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0705e-04,  6.0628e-04,  1.2827e-04,  4.5446e-04, -6.1295e-02,
        -2.9377e-01,  1.6349e-04,  6.5843e-04,  2.5278e-04,  2.3378e-04,
         3.3974e-04,  2.1642e-04, -6.5722e-06,  3.1391e-04,  5.2450e-05,
         5.4151e-05,  1.9271e-04,  2.3053e-04,  7.2825e-05,  3.7868e-05,
         1.0990e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8760e-04, -4.3886e-06,  3.3701e-05,  7.6526e-05,  3.0809e-03,
        -7.7504e-02,  6.5414e-05,  1.8495e-04,  9.3445e-06,  3.4075e-05,
         8.7807e-05, -1.1728e-05,  4.7016e-05,  3.8273e-05,  3.8826e-05,
         2.2829e-05, -1.8053e-05,  3.9338e-05, -1.5308e-05, -1.1360e-05,
         1.4293e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5067e-04, -1.0369e-04, -4.3790e-05, -1.2355e-04, -2.6278e-03,
         8.6861e-02, -5.3182e-05, -5.0650e-05,  4.7943e-06, -3.9212e-05,
        -2.0131e-05,  3.2418e-06, -1.3318e-04,  1.8701e-05, -1.5146e-04,
        -7.0338e-05, -1.5466e-06, -5.0360e-06, -3.7645e-05,  1.3474e-05,
         2.9558e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9325e-04,  1.1639e-05,  2.6075e-05,  4.2798e-05,  6.8764e-07,
        -9.4937e-03, -1.0611e-05, -2.1167e-05, -1.7431e-04,  2.3092e-05,
        -1.6173e-04,  2.7044e-05,  1.4668e-05, -3.1536e-06,  5.4403e-06,
         9.2626e-06,  2.7773e-06,  3.3470e-06,  2.4806e-05,  8.4920e-06,
         1.1206e-05,  1.5902e-05,  4.6224e-06,  1.0537e-05,  8.2841e-06,
        -5.5954e-06, -7.2830e-06, -4.1236e-06, -4.1948e-06, -1.8850e-07,
        -7.8894e-06,  3.9462e-06,  3.0274e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1557e-04,  2.9116e-05,  5.7919e-06,  7.9900e-05,  1.7156e-05,
        -1.0889e-02,  1.2741e-05,  1.6478e-05,  6.4851e-04,  2.0431e-05,
         9.1002e-04,  7.7356e-06,  2.3351e-05,  1.1761e-05, -2.9772e-06,
         1.8182e-05,  4.8484e-06,  5.5921e-06, -1.6314e-05,  2.2422e-06,
         7.9900e-06, -3.6867e-06, -7.6007e-06, -3.5874e-06,  3.0551e-05,
         2.3779e-06,  4.0596e-06,  3.7011e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7723e-04,  2.9403e-05,  1.5094e-05,  2.8755e-05, -9.2457e-06,
        -6.8616e-03,  2.0577e-05,  1.5813e-05,  9.3177e-05,  3.1233e-06,
         6.7784e-04, -1.1570e-05,  1.0209e-05,  1.1968e-05, -5.6211e-07,
         1.4025e-05, -4.6011e-07,  9.4747e-06,  1.3073e-05,  1.0627e-05,
         5.6131e-06,  3.4516e-06,  3.1140e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1476e-05, -5.2282e-05, -4.4863e-05,  7.8198e-02, -1.0054e-04,
        -9.4533e-04, -1.0120e-04, -1.0584e-04, -1.2560e-04, -9.1567e-05,
         3.5309e-05, -1.9731e-07, -3.1082e-05, -9.6297e-06, -3.5986e-05,
        -6.0278e-05,  2.2771e-05, -6.3827e-06, -5.4418e-05, -4.3249e-05,
        -7.1241e-05, -5.4671e-05,  4.6452e-05,  2.5033e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4502e-06, -3.7268e-05, -3.1904e-05,  3.9870e-02, -5.1919e-05,
         6.4219e-03, -9.3353e-05, -8.8389e-06, -6.7445e-05, -9.7935e-05,
        -5.0576e-05,  6.1761e-06,  1.1364e-05, -1.9117e-06, -1.8989e-05,
        -5.7230e-05, -5.9370e-05, -5.7089e-05,  1.3122e-05,  1.0773e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2299e-04, -1.1880e-04, -3.2657e-05,  5.2089e-02, -1.9490e-05,
        -1.8516e-04, -9.5303e-05, -8.1917e-05, -1.2427e-04, -1.6076e-04,
        -1.8674e-05, -7.4671e-05, -1.2225e-05, -2.7585e-05, -1.9537e-05,
        -9.5749e-06, -7.6891e-05, -1.0377e-05, -1.5895e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9967e-04, -8.4502e-05, -1.1201e-04,  7.0726e-02, -1.7441e-05,
        -1.0501e-04, -1.1678e-04, -1.3912e-04, -1.6012e-04, -8.8586e-05,
        -5.4156e-05,  4.5326e-05, -1.9369e-05, -1.1644e-05, -2.9322e-05,
        -4.3629e-05, -4.4089e-05, -1.9663e-05, -1.4203e-05,  9.7523e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1350: [tensor([-3.4778e-05,  3.6886e-06,  2.9398e-06,  3.6845e-07,  1.8976e-05,
        -7.7730e-06,  5.0244e-03,  1.9483e-05,  6.6411e-06,  5.8071e-08,
        -5.6958e-04, -2.3081e-06,  7.1732e-06, -4.8970e-06,  2.0821e-06,
        -1.5560e-06,  1.3206e-05,  8.7968e-08, -2.1038e-05,  5.8245e-06,
         1.2882e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3302e-05,  7.6710e-06,  1.6494e-05, -5.1537e-05,  1.4854e-05,
        -3.2362e-06,  1.3355e-04,  5.6924e-06,  9.9422e-06,  6.2507e-06,
        -9.3529e-03,  3.6254e-05,  8.1428e-06,  1.3062e-05,  3.5134e-05,
         1.3717e-05,  9.0504e-08,  1.2062e-05,  7.5739e-06,  4.5508e-05,
         5.8968e-06, -6.7509e-07, -4.3026e-06, -8.4451e-07,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4089e-05,  1.1605e-02,  2.1454e-06,  1.0622e-04,  1.7151e-05,
         3.2599e-05, -6.4603e-04, -1.3108e-02,  1.5709e-06,  3.4538e-04,
         2.1027e-05,  3.2543e-06,  1.0593e-05,  2.4161e-05,  2.0335e-05,
         2.4490e-05,  8.8182e-06,  9.3815e-06,  5.8307e-07,  1.2387e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9204e-04,  1.0552e-02,  1.7595e-05, -2.6915e-04,  4.7964e-05,
         1.6340e-05, -2.5020e-03, -1.2836e-02,  6.8669e-07,  4.7731e-04,
         1.5731e-05,  8.5800e-06, -1.0942e-05,  2.3982e-05,  1.4508e-05,
        -6.4240e-06,  3.2451e-05,  2.1812e-05,  1.3857e-05,  1.1220e-05,
        -2.7611e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0909e-04,  3.0785e-03,  2.0972e-05,  4.2588e-06,  5.3005e-05,
        -4.5060e-06, -4.3797e-05, -9.6910e-03,  1.5901e-05,  4.2325e-06,
         1.9283e-05, -3.2706e-06,  1.2138e-06,  3.0642e-05,  1.2057e-05,
         3.2382e-06, -6.5802e-06,  2.3805e-05, -1.2575e-06,  2.5485e-05,
         4.0783e-05,  1.3820e-05,  1.4540e-06,  1.3545e-06, -5.5881e-07],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5000e-05,  1.5356e-04, -2.5901e-05,  1.3150e-04, -5.8484e-05,
        -9.7720e-03,  6.5900e-05, -3.1837e-02,  1.8162e-05, -7.1119e-05,
        -1.0069e-02,  7.9233e-05,  4.4080e-06, -4.0091e-05,  5.5337e-05,
         8.5748e-06,  1.1687e-05,  3.3802e-05,  8.2353e-05,  2.3137e-05,
        -1.0321e-05, -2.9398e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4253e-05,  5.9173e-05, -1.4619e-05,  3.1520e-05, -1.8766e-03,
        -1.1240e-02, -1.8801e-05, -2.6542e-02,  9.5788e-05,  5.0448e-05,
        -7.0807e-03,  2.1869e-05, -1.2758e-05, -1.5458e-06,  7.5713e-05,
         2.7279e-05,  2.3895e-05, -2.8686e-05,  8.7277e-05, -5.5615e-06,
         7.6763e-05, -2.5217e-05, -1.5239e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1655e-05,  6.1024e-05, -1.4562e-04,  8.8089e-06, -9.4521e-04,
        -2.1989e-02,  1.1105e-05, -3.0507e-02,  1.3334e-04,  7.6971e-05,
        -4.9506e-03,  6.4109e-05,  2.8855e-06, -4.2778e-06,  1.1116e-04,
        -2.9986e-06,  5.6999e-05,  2.3942e-05,  6.2838e-05, -7.3911e-06,
        -4.5602e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6695e-04,  3.7763e-02,  2.0834e-05,  5.3406e-06,  4.1205e-05,
        -2.3759e-05, -1.8316e-04,  5.2587e-05, -2.0109e-05, -1.1122e-05,
        -1.8087e-06,  3.4153e-05,  1.9391e-05, -3.5376e-05,  1.7999e-05,
         1.3625e-05,  1.1746e-05,  8.1035e-06,  9.4851e-06, -9.5240e-06,
         1.0540e-06,  3.0545e-06, -1.1195e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9389e-05,  3.4395e-02,  7.0692e-06,  2.6537e-04,  3.0062e-05,
        -2.8187e-05,  2.4657e-05,  3.7646e-05, -9.1693e-06, -1.9568e-05,
         4.6971e-05,  3.2662e-04, -3.7202e-06, -2.3652e-05, -7.7271e-06,
        -2.3953e-05,  1.7242e-05, -5.2359e-06,  2.0483e-05, -1.6864e-05,
        -1.1771e-05, -1.0148e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3313e-05,  1.7740e-02,  5.5069e-05,  1.5585e-04, -1.6452e-07,
         1.6349e-05,  7.8887e-05,  4.7640e-05, -1.5105e-05,  6.9528e-05,
         6.5336e-05, -1.7078e-02,  1.4337e-05, -1.2437e-05, -1.7412e-05,
         2.7496e-05,  8.2153e-06,  9.5167e-07, -1.9443e-05, -8.5187e-06,
         1.9944e-06,  7.6312e-06, -6.8199e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7985e-04, -1.6744e-02,  2.8143e-05,  5.3310e-05,  6.3691e-05,
         7.4751e-05, -3.8410e-03, -1.4689e-05,  7.9282e-06,  3.6872e-05,
         9.3463e-07,  5.6944e-06,  3.8553e-05,  2.3534e-05, -8.2161e-06,
         3.4087e-05,  4.8209e-05,  1.2479e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1400: [tensor([-2.9447e-05,  1.3995e-02, -1.2245e-05, -4.2793e-03,  7.2144e-03,
        -1.5948e-05,  3.8003e-06, -7.1144e-06,  2.0732e-03,  7.7509e-06,
         3.8959e-04, -1.6828e-06, -3.6209e-05,  8.6862e-03,  2.8576e-07,
        -1.4755e-05,  1.7887e-06, -1.9575e-06,  5.3741e-06, -2.4105e-06,
         6.0489e-06,  1.2066e-06, -5.6978e-06,  1.8508e-06,  8.4697e-06,
        -6.9951e-06, -8.8225e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9578e-05,  1.7957e-02, -4.1635e-06, -1.3167e-03,  9.2015e-03,
        -1.6936e-05, -7.0904e-06,  8.0747e-06,  1.4587e-03, -2.9198e-06,
         2.4365e-05, -5.0664e-06, -1.3110e-05,  3.5144e-03,  1.1084e-05,
        -5.9647e-06, -5.1479e-06,  7.1132e-09,  4.7639e-06,  2.1748e-06,
        -6.5532e-06,  8.5488e-06,  8.2882e-06, -1.2269e-05, -3.8144e-06,
        -1.7231e-05,  6.6058e-06, -1.3152e-05, -3.2824e-06,  7.0737e-06,
         6.2151e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3393e-05,  1.9289e-02,  1.8423e-06,  1.9250e-03,  1.8759e-02,
         2.9453e-06, -9.7413e-06,  1.8815e-06,  1.2223e-03,  8.1268e-07,
         1.4726e-04, -3.0290e-06, -2.2293e-05, -5.5656e-03,  1.7911e-05,
        -3.3199e-05,  2.8947e-06, -7.9139e-06, -7.3017e-06,  2.4218e-05,
        -2.3529e-06, -1.0477e-06, -3.8430e-06,  2.9876e-05,  1.3901e-05,
        -2.4901e-05,  1.1072e-05,  3.6825e-06, -1.1437e-05,  6.1298e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6240e-05,  4.3008e-05,  4.1398e-06, -1.9537e-02, -4.1196e-05,
         1.6425e-03,  2.1512e-02, -1.8497e-05,  2.7634e-05, -2.3026e-05,
         1.4825e-02,  4.0249e-05, -4.0524e-06,  1.6475e-05,  2.8510e-07,
        -3.4075e-07, -1.4003e-05, -1.8201e-05,  1.2383e-05,  3.0694e-06,
         9.0881e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6862e-05,  2.2712e-05, -3.9807e-06, -7.1740e-03, -2.4185e-05,
         1.7458e-04,  6.6251e-03, -1.6862e-05, -1.6798e-05, -2.9895e-05,
        -3.6213e-03,  6.0090e-06, -6.1820e-06,  6.1162e-06, -7.5896e-07,
        -1.6225e-05, -1.8301e-05, -2.5685e-06, -1.6078e-05,  4.9557e-06,
         1.5388e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0826e-05,  4.1812e-05,  2.1457e-05, -3.1209e-02, -4.6522e-05,
         2.7373e-04,  2.7273e-03, -7.7224e-05, -3.3027e-05, -2.7841e-05,
         1.1621e-02, -3.3163e-05,  3.5129e-06, -2.6432e-05, -5.0028e-06,
        -3.3750e-05,  9.1906e-06, -1.9914e-05, -9.1945e-06,  2.0286e-05,
        -1.8053e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1167e-05,  5.4470e-05,  1.2038e-05, -7.5777e-03, -5.9951e-03,
        -2.0532e-05,  6.9127e-06, -5.5430e-06,  2.8833e-05, -1.4464e-05,
         9.2522e-06,  2.6954e-05, -1.4675e-05, -3.3695e-06, -3.5618e-06,
        -2.7689e-06, -1.3089e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0276e-06,  3.0109e-05, -1.0140e-05, -9.7668e-03, -8.6879e-03,
        -1.1123e-05, -7.2331e-06, -2.0370e-06,  8.6001e-06, -5.3378e-06,
        -6.2762e-07, -1.0478e-05, -1.4586e-05, -1.9885e-05,  1.3433e-06,
        -3.8014e-06, -2.6950e-05, -1.3737e-06, -1.4042e-05, -1.7280e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2266e-04,  1.5940e-04, -4.8970e-06, -2.4536e-02, -4.6880e-02,
        -1.1265e-04,  5.3900e-05,  3.8068e-05, -3.7674e-05, -6.5777e-05,
        -1.2653e-04, -8.6122e-05,  3.7059e-05, -5.2938e-06, -5.0948e-06,
        -9.4767e-05, -3.6708e-05, -1.2544e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9197e-05,  4.0622e-03, -7.3866e-06, -3.5614e-03,  1.1581e-02,
        -4.8599e-06, -7.8296e-06,  9.1603e-06,  6.0387e-07, -2.2355e-06,
         6.4789e-04,  8.5454e-03,  6.9220e-07, -1.2705e-06,  1.4830e-05,
         4.6686e-03,  1.1372e-06,  4.3934e-05, -3.6992e-06, -1.3529e-06,
        -2.3632e-06,  5.6680e-03,  2.4761e-08, -1.7268e-06, -2.9205e-05,
         3.0324e-03, -3.7523e-06,  9.3992e-06, -3.0281e-06,  2.4945e-06,
         5.3640e-06,  7.4007e-06, -1.0413e-05, -1.3015e-05, -3.3436e-06,
        -6.0722e-07, -1.2510e-05, -3.6580e-06, -2.9835e-06,  1.3536e-06,
        -6.9677e-06,  4.3992e-06,  1.4939e-06,  2.3968e-06, -2.7447e-06,
        -6.4669e-07, -1.9994e-05, -6.0585e-06, -1.3238e-05, -6.6720e-06,
        -6.0886e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5616e-05,  1.7533e-03, -9.1953e-06,  6.6472e-04,  1.3545e-02,
        -9.4075e-06, -9.7955e-06,  8.2815e-05,  6.8756e-06, -1.5931e-06,
         4.7010e-04,  4.3122e-03, -9.4180e-06,  1.8652e-06,  2.0058e-05,
         2.4473e-03, -9.6400e-06, -5.6706e-05,  2.1469e-06,  6.4653e-06,
         4.9810e-06,  5.3508e-03,  6.0585e-06,  3.2129e-06,  5.5364e-04,
         4.9644e-03,  3.1881e-07,  1.5693e-05, -9.4028e-06, -6.3253e-06,
         2.5304e-06,  1.1429e-06, -9.7306e-06, -1.4046e-06, -2.9567e-06,
         9.2174e-06, -1.3684e-06, -4.8263e-06, -8.1530e-07, -2.7723e-06,
         3.2918e-06,  4.9346e-06, -6.6681e-06, -6.4040e-06,  5.4015e-09,
        -1.2063e-05, -1.1537e-05,  1.3073e-06, -9.0828e-06, -4.0008e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8658e-05,  6.4076e-03, -6.6505e-06, -4.9556e-04,  7.0551e-03,
        -6.4956e-07, -1.1583e-06,  3.3217e-05,  1.6056e-06,  5.9646e-06,
         3.6555e-04, -2.3556e-04,  8.3916e-06,  3.6914e-06,  1.1120e-05,
         3.8609e-03, -3.2561e-06,  1.2177e-05,  7.7178e-06,  7.3147e-06,
         4.8140e-06,  7.2612e-03,  4.2690e-06,  6.6852e-07,  4.1768e-04,
         5.8935e-03, -3.8022e-07,  8.7028e-06, -1.9816e-06,  2.6177e-06,
         7.0788e-06,  4.6928e-06, -3.5883e-06, -1.2089e-07,  2.8387e-06,
         3.9310e-06, -3.5995e-06, -2.2965e-06,  1.4825e-06,  5.4452e-06,
        -1.0218e-05, -8.5209e-06,  2.2972e-07,  2.6343e-06, -2.2883e-05,
        -1.2726e-05, -7.9472e-06, -8.2844e-06, -4.0399e-07, -1.5827e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1450: [tensor([ 1.5214e-05, -4.2356e-06,  9.2441e-06,  2.6648e-05,  1.5911e-02,
        -1.0019e-05,  6.3016e-06,  1.2591e-05, -8.5066e-06,  1.3779e-05,
         1.3267e-05, -2.1291e-05,  6.5551e-07, -6.1607e-06,  1.2427e-05,
         5.0133e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7233e-05,  6.3989e-06,  2.0277e-05,  5.3766e-05,  4.3458e-02,
        -2.6706e-05, -3.8976e-04, -2.7045e-05, -1.3354e-05,  9.9090e-06,
         2.5929e-05, -4.1240e-05,  2.4152e-05, -5.1381e-05, -1.0872e-05,
        -2.1402e-05,  7.2990e-06, -9.0437e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7449e-06, -6.2852e-05,  2.8975e-05,  1.9752e-05,  5.4992e-02,
        -5.2584e-05, -1.3395e-04,  2.8471e-06,  1.5002e-05,  1.8233e-05,
         5.8479e-05,  1.0470e-05,  2.5363e-05,  3.1108e-05,  1.3081e-05,
        -3.7024e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4292e-04,  2.3433e-04,  1.7246e-04, -1.5923e-01,  1.6131e-04,
        -1.3385e-03,  2.6382e-04,  2.2617e-04,  7.0703e-05,  7.1272e-05,
         2.5582e-05,  1.6353e-04, -1.3939e-04, -5.1324e-05,  2.0362e-04,
         1.0021e-04,  6.9816e-05, -5.0487e-05,  1.2608e-04, -1.1192e-04,
         1.1147e-05,  3.5090e-05, -8.4004e-05,  1.6149e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0287e-04, -8.4530e-05, -6.0051e-06,  1.3164e-01,  1.4380e-04,
         6.0999e-04, -8.4604e-05, -1.9132e-04, -2.8452e-05,  2.4373e-05,
         3.6569e-05,  9.5567e-05,  2.3389e-05, -8.2397e-05, -5.8035e-05,
        -1.3174e-04,  2.4344e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5682e-04, -7.5182e-05,  1.7607e-05,  5.7672e-02,  8.2342e-05,
         7.6550e-04, -3.1883e-05,  1.0518e-05, -4.7784e-05, -5.0231e-05,
        -1.8024e-05, -7.5214e-06, -1.8830e-05,  4.5729e-06, -1.7113e-05,
        -1.5648e-05, -6.8168e-05,  5.2367e-07, -9.6037e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2903e-04,  9.5590e-03, -7.8043e-07, -5.8121e-05,  1.3892e-07,
         7.0759e-06,  9.2720e-04, -1.3951e-07, -2.0313e-06,  1.2511e-02,
         5.4083e-06, -4.2787e-06, -2.2468e-05,  6.4382e-06,  1.2469e-05,
         7.4846e-04,  1.8265e-06, -8.4896e-05,  8.0462e-06, -5.0753e-06,
         6.1121e-06, -3.6129e-04,  5.0141e-06, -2.7354e-03, -1.3121e-05,
        -9.1687e-06, -1.8240e-02,  5.3507e-06,  2.1398e-05, -1.1131e-03,
         9.8903e-06, -9.7580e-06, -9.5338e-06,  3.5797e-03, -3.1688e-06,
         1.1388e-05,  5.3468e-06,  4.0721e-06, -6.2759e-06, -1.3520e-05,
        -1.8180e-05,  7.4974e-06, -1.4135e-05,  1.2490e-05, -5.2688e-06,
        -6.3798e-06, -4.9809e-07,  7.3829e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.7120e-05,  6.4370e-03, -1.2300e-05, -5.1293e-05,  4.0500e-06,
        -3.2287e-06, -6.9835e-03, -2.1366e-06, -9.6808e-06,  3.1339e-02,
        -3.7019e-06,  2.0335e-06, -3.1787e-05,  1.6412e-05,  6.8970e-06,
        -1.4190e-03, -2.0010e-05,  1.2252e-03,  8.1696e-07, -7.8302e-06,
        -5.2232e-06,  8.9975e-05, -5.8383e-07,  2.3475e-04, -1.0866e-05,
        -5.5200e-06, -1.8868e-03,  6.1108e-06,  6.2701e-06,  6.2344e-05,
         5.0763e-06, -7.7823e-06,  5.6733e-06,  1.3713e-02,  9.1381e-06,
        -2.6666e-06, -6.2033e-06, -1.3831e-06,  3.1732e-06, -1.8668e-05,
        -2.3376e-05,  1.5043e-05,  9.2886e-06,  1.4126e-06,  1.0557e-05,
         3.2858e-06,  7.7759e-06,  1.3636e-05,  1.2942e-05, -7.9344e-06,
        -8.2741e-06, -9.6886e-06, -1.1217e-05,  2.1899e-06, -5.5831e-06,
        -6.7300e-06, -1.3418e-05, -9.9331e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2392e-05,  9.2552e-03, -4.8755e-06, -8.1441e-06,  7.3020e-06,
        -4.1293e-06, -1.1836e-02, -2.4427e-07, -1.9981e-05,  1.2890e-02,
        -1.4356e-05,  3.7752e-06, -1.3086e-05,  1.2963e-05,  7.2833e-06,
        -6.2226e-03, -1.1186e-06,  4.1597e-04,  3.6419e-06, -1.5650e-06,
        -1.0893e-05, -1.8219e-03,  1.4911e-05,  3.4817e-04, -3.7207e-06,
        -3.1779e-06, -1.0671e-02,  6.5395e-06,  3.7796e-06,  9.7343e-05,
         1.5366e-05, -1.5149e-05, -1.9886e-06,  4.5784e-03, -8.5182e-06,
         5.4494e-06,  4.6310e-06,  6.4013e-06, -1.3746e-05,  1.8514e-05,
        -2.8579e-06, -8.1380e-06, -3.5770e-06,  6.2132e-06,  3.5180e-06,
         5.0085e-06,  6.7287e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5743e-04,  3.1594e-05, -1.3032e-01,  7.8337e-05, -1.2081e-04,
         1.2104e-04, -1.1620e-05, -2.2823e-04,  4.5611e-05,  2.0858e-04,
        -1.1001e-04, -1.1736e-04,  1.6870e-04, -6.2630e-06, -6.9688e-05,
        -6.4717e-05,  5.7001e-05,  5.1229e-06, -1.7387e-04,  8.9146e-05,
        -1.7457e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0454e-05,  5.0088e-05,  9.9789e-02, -1.7893e-04, -1.0558e-04,
         1.6483e-04, -2.0004e-05,  1.3427e-04, -4.3389e-06,  1.6149e-04,
         9.7822e-07, -2.1764e-05, -7.2606e-05,  7.8551e-05, -5.5514e-05,
         2.4326e-05,  6.8217e-05, -1.2058e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6935e-05,  7.8852e-05,  1.1622e-01,  2.2980e-05, -5.6755e-05,
        -7.1320e-05, -3.8437e-05,  2.9640e-04, -4.9372e-05, -9.8699e-05,
         1.5913e-06, -3.6977e-05,  9.7219e-05,  1.3575e-04,  1.8401e-04,
         8.7859e-05,  1.4960e-04,  1.4932e-04,  5.9036e-05,  9.2974e-05,
         5.0370e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1500: [tensor([ 2.3289e-06,  1.4808e-02, -1.1074e-06,  2.0767e-06,  8.9547e-06,
         8.1498e-06, -7.2722e-03, -1.9783e-06, -1.0264e-05, -7.7819e-06,
         1.4700e-05,  1.6960e-02, -1.0941e-05, -2.2267e-05,  1.3686e-06,
        -6.9443e-06, -6.4740e-06,  2.7910e-03, -1.9186e-05, -3.7642e-06,
        -3.3938e-06, -1.0181e-05,  1.3933e-05,  2.0782e-10,  1.5267e-05,
        -2.6715e-05,  5.9926e-06,  5.3550e-06, -8.1907e-06,  9.5546e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1458e-04,  3.7818e-02,  2.8092e-05, -7.3006e-06,  1.7028e-06,
         8.3088e-06,  1.2960e-02,  8.4137e-06, -7.7142e-06,  1.7439e-05,
         2.1184e-05,  1.5444e-02,  2.7667e-05, -1.8873e-05,  1.6126e-05,
        -1.5660e-06,  3.7238e-06,  6.3371e-03,  2.2173e-06,  1.3869e-05,
         2.7160e-05, -2.5091e-05,  1.6019e-05,  2.8551e-05,  1.9512e-05,
        -2.7027e-05,  1.4195e-05,  2.0536e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5628e-06,  2.7989e-02,  1.4775e-06,  2.3877e-05,  1.2792e-05,
        -1.6210e-05, -1.8510e-02, -3.0705e-07,  2.6582e-07, -9.2808e-06,
         2.7117e-05,  1.4999e-02,  1.2242e-05, -1.8305e-05,  1.5705e-05,
         1.2028e-05,  1.5952e-05,  1.7296e-03,  2.1204e-06,  1.0178e-05,
         1.8479e-05, -2.9186e-05,  2.6752e-06,  2.0828e-05, -1.1462e-05,
        -1.7891e-05,  4.4622e-06, -2.4158e-05,  1.0106e-05, -1.5140e-07,
         5.8780e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3454e-05, -9.0084e-07, -5.5936e-06,  1.4181e-03,  3.6386e-03,
         1.1693e-05, -2.3656e-06,  3.1700e-06,  1.3193e-06, -1.9628e-04,
        -5.4355e-06,  1.8975e-05, -8.1030e-05,  5.3179e-06,  6.5212e-06,
        -7.7093e-06,  1.9845e-06,  8.2804e-06,  5.0612e-06,  9.5122e-06,
         7.0518e-07,  5.8194e-06, -5.2629e-06,  3.4719e-06,  1.0670e-05,
         8.3510e-06,  8.5638e-06,  7.1538e-06, -6.8953e-06,  1.2468e-05,
         1.2729e-05, -1.2615e-06, -2.8184e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1500e-04,  3.3742e-05,  1.0155e-05, -2.5550e-03,  4.0417e-02,
         1.1760e-05,  1.8762e-05,  1.7920e-05,  1.6015e-05, -1.2963e-04,
         2.6365e-05,  1.7332e-05,  1.1788e-05,  1.5849e-05,  3.0063e-05,
         2.2200e-05,  1.3780e-05,  6.0729e-06, -2.4452e-06,  2.2468e-05,
         2.6302e-05, -9.8414e-07, -1.3802e-05,  1.9716e-05,  1.5596e-05,
         2.7057e-05,  1.6741e-05,  1.2290e-05,  2.4889e-05,  2.2479e-06,
         2.8728e-05, -9.8671e-06,  7.9890e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0972e-05,  1.8364e-05,  1.6071e-05, -1.5776e-02,  2.8969e-02,
        -8.2985e-06, -3.0173e-06,  1.3147e-05,  5.5528e-06, -6.1175e-05,
        -7.9713e-07,  1.8414e-05,  1.2141e-04, -2.7405e-06,  1.2736e-05,
        -5.1688e-06, -1.1178e-06,  1.6073e-05, -3.7351e-06,  1.0235e-05,
         5.5620e-06, -1.0186e-06, -4.1642e-06, -6.4636e-06,  1.8168e-05,
         2.2628e-05, -1.4568e-06,  1.3070e-05, -5.0898e-07, -2.4774e-05,
         9.6926e-06,  7.3676e-06, -9.1365e-06, -3.6361e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8791e-05,  5.8640e-02, -1.7235e-05,  8.9193e-06, -1.4840e-04,
         3.1442e-05, -4.1467e-03, -3.5649e-05, -3.2919e-05,  2.7148e-06,
         5.6074e-06, -1.8146e-02, -1.8021e-03,  2.2609e-06,  1.5044e-04,
         1.1350e-02,  3.4160e-05,  7.6483e-06, -2.7446e-05, -1.9835e-05,
        -6.6571e-06,  3.3814e-05, -5.0553e-06, -1.2216e-05,  1.3039e-06,
        -7.1119e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1646e-05,  8.7353e-02, -8.7606e-07, -1.1559e-05, -3.3742e-05,
        -5.0259e-06,  1.8508e-02, -1.1080e-06, -3.7751e-06,  1.8783e-05,
         6.0441e-06,  8.7122e-04,  1.3289e-02, -4.8482e-05,  1.3994e-04,
        -2.2410e-03,  5.7168e-05,  2.5958e-05,  8.3430e-06,  5.3672e-06,
         7.4846e-06,  3.2847e-05,  2.3166e-05, -1.5516e-05,  1.6003e-05,
        -1.1215e-05,  2.0862e-05,  2.0179e-05, -4.2656e-06, -4.0772e-05,
         4.1664e-05, -3.8775e-05,  2.2975e-05, -2.2801e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9003e-06,  8.9442e-02,  2.1886e-05,  4.7578e-05,  2.8347e-04,
        -2.3385e-05,  3.9466e-03, -4.9373e-06,  7.5425e-06,  8.7041e-06,
         7.2305e-05,  9.5004e-04, -2.9768e-03,  2.3979e-05,  1.2626e-03,
         1.9177e-02,  9.4318e-05, -5.6601e-06,  2.6251e-07, -4.3636e-05,
         4.0285e-05, -1.6613e-05,  9.2956e-06,  4.3453e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5474e-05,  1.7732e-05,  4.4897e-06,  2.2543e-05,  2.3895e-05,
         4.1381e-02, -1.3417e-05,  5.7882e-06, -3.4792e-05, -3.1121e-05,
        -6.0740e-05, -1.1459e-05, -8.0622e-06, -5.9712e-05, -2.5722e-05,
         8.7598e-06, -2.7427e-05, -7.5800e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2211e-05, -3.0624e-05,  2.3655e-05, -6.8224e-05,  3.6163e-05,
         3.7526e-02,  4.6334e-05, -6.9393e-06, -2.9390e-05, -1.9998e-05,
        -4.3320e-05,  6.2584e-05, -2.6534e-05,  1.0559e-06, -4.4142e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1350e-05,  2.2303e-05, -2.5315e-05, -4.4868e-05, -4.0378e-05,
         2.9213e-02, -4.1649e-05, -1.7794e-05, -5.5310e-05, -2.6927e-05,
        -4.1073e-06, -1.0161e-06, -4.3011e-05, -2.4180e-05,  8.2269e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1550: [tensor([ 7.3438e-05,  6.1668e-02,  3.9048e-05, -3.2255e-02,  3.2494e-05,
         8.4062e-04, -1.7085e-06,  4.2766e-02, -8.5298e-05, -1.3973e-03,
         6.2191e-05, -3.4712e-05,  4.2543e-05, -6.6204e-06, -8.6742e-06,
        -6.3366e-06,  6.1317e-06,  1.9061e-05, -2.5892e-05,  7.6857e-05,
        -4.5395e-05,  8.8947e-06,  1.2513e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1720e-04,  2.1019e-02,  6.3508e-05, -1.2063e-02,  9.1022e-06,
         2.2009e-04,  1.2712e-05,  1.2973e-01, -2.3864e-05, -3.8730e-04,
        -2.3256e-06,  1.7735e-05,  2.6236e-05, -3.2670e-05,  2.7206e-05,
         2.7776e-05, -2.9769e-05,  3.1411e-05, -2.6296e-05, -4.2645e-05,
        -3.8245e-05,  9.7783e-06,  2.5678e-05, -3.8501e-06,  6.4576e-05,
        -1.6271e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2236e-05, -1.6616e-02, -2.9299e-05,  5.4987e-02, -3.5800e-05,
        -1.0644e-05, -4.9895e-05, -2.0621e-02, -3.7664e-06,  3.8543e-03,
        -1.3504e-05, -1.4458e-05,  1.9079e-05,  2.3939e-06, -5.6603e-05,
         2.4099e-05, -1.6546e-05,  8.7423e-06, -8.7308e-06, -1.2200e-05,
         2.6307e-06, -6.3453e-05,  2.6617e-05,  5.6151e-06, -1.7955e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.3609e-05, -2.3421e-02, -3.4250e-05,  2.1767e-07, -1.2245e-05,
         3.3058e-04,  9.7786e-06,  2.2925e-05, -5.6676e-06,  2.0319e-05,
        -4.5618e-06,  2.9107e-05, -1.3706e-05, -2.0780e-03, -3.9061e-06,
         2.6563e-04,  1.1277e-05,  3.2904e-02, -1.5467e-05, -2.4276e-05,
        -2.2272e-05, -4.9026e-06,  1.6612e-05,  5.0570e-06,  4.2892e-06,
         8.4930e-06,  5.8724e-05,  1.4400e-05, -1.6470e-05, -1.6668e-06,
        -1.7028e-05, -1.5006e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7632e-04,  1.1057e-02,  4.1694e-05,  3.3154e-05,  7.3088e-05,
        -5.9949e-02, -7.4611e-05, -2.7825e-05, -7.4880e-06,  5.6669e-05,
        -3.2462e-06,  9.5151e-05, -4.0415e-05,  4.5313e-03,  1.2573e-04,
        -5.4509e-04, -1.1489e-05, -6.5574e-02, -4.7762e-05, -1.4882e-05,
         4.2980e-06,  3.6607e-05, -8.0625e-06, -4.0043e-05,  2.1731e-05,
         5.0655e-05,  8.3404e-06,  8.1100e-06, -2.8062e-05,  2.0665e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4065e-04,  3.0578e-02, -9.5877e-07, -8.2046e-06,  1.6965e-05,
         2.2175e-03, -9.8220e-06, -2.0178e-06, -2.1935e-05,  5.2456e-06,
         9.3027e-08,  1.6779e-06,  1.7511e-05,  9.4048e-03,  7.8066e-06,
        -1.9366e-04, -7.7297e-06, -1.6742e-02,  3.5559e-05,  1.2437e-05,
         1.5001e-05,  6.3440e-06, -6.6020e-07,  5.4654e-06, -3.3652e-05,
         1.3975e-06,  3.2769e-06,  7.0638e-06, -1.8178e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0377e-05,  1.5486e-02,  2.9591e-06,  1.6156e-03,  6.1196e-02,
        -2.3313e-05, -5.2974e-06,  3.9620e-06,  9.6513e-06, -2.8157e-06,
        -2.5652e-04,  1.7625e-02,  1.7039e-05, -2.6224e-04,  2.7837e-05,
         2.0534e-05,  2.5451e-05,  1.3707e-05,  2.2288e-05,  5.3116e-06,
        -2.8026e-05, -2.4980e-05, -7.3480e-06,  1.0725e-05,  1.5319e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5380e-05, -7.1596e-03,  1.2755e-05, -9.8534e-03, -6.9081e-02,
        -2.6400e-05,  1.3355e-05,  1.4905e-05,  8.0021e-05,  1.4740e-05,
         1.8272e-04, -2.8417e-02,  5.2720e-05,  4.3223e-05, -7.2788e-05,
        -2.9980e-05,  1.7877e-05,  8.3555e-05,  4.5018e-06, -1.0986e-05,
         2.1066e-06,  4.0989e-05, -2.7529e-05, -2.2141e-05, -2.5864e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0338e-05,  9.9496e-04, -2.3068e-05, -2.9524e-02,  6.9021e-02,
        -9.7610e-06, -8.8496e-06,  1.1965e-05,  3.6966e-05,  9.5443e-06,
        -1.9866e-04,  1.4866e-02, -1.8641e-06, -2.2188e-04,  2.1666e-05,
         2.6069e-05,  1.9772e-05,  4.3726e-05, -1.2174e-05, -6.4218e-06,
         1.2371e-05,  2.6600e-06, -1.4281e-05, -2.6334e-05, -1.2327e-05,
        -4.4750e-05,  6.8825e-06,  1.0150e-05,  5.2595e-05,  1.0088e-05,
         1.1611e-05,  1.5193e-07,  1.0292e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9722e-05,  2.1442e-04,  3.0536e-05, -7.7788e-02,  1.1137e-04,
        -1.7694e-05, -2.5908e-05,  2.7039e-05, -7.2766e-05, -1.2713e-01,
         7.8909e-05, -6.4089e-05, -9.7139e-05,  8.4600e-05, -2.3302e-05,
        -2.5068e-06, -4.5562e-05, -2.3576e-05, -4.5564e-05, -4.7836e-05,
         1.0744e-04,  7.6800e-05, -6.0897e-05, -1.5027e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7795e-04, -2.6317e-06,  2.1662e-05, -4.7654e-02, -6.4888e-05,
        -5.4328e-05, -1.9143e-05,  3.4981e-05, -3.4542e-05,  4.4821e-03,
        -5.0080e-05, -2.1758e-05,  5.1546e-05, -5.3407e-06,  7.5459e-05,
         8.1562e-05, -6.7719e-05,  5.5262e-05,  1.3446e-05,  2.4733e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2544e-04,  3.5364e-06, -7.2914e-06,  4.3032e-02,  1.5925e-05,
         4.9827e-05,  6.7561e-06, -1.9588e-05,  2.7423e-05,  1.5898e-02,
        -3.0361e-05,  4.4746e-05, -5.5419e-06, -1.2417e-05, -2.3538e-05,
         2.7949e-06, -2.6482e-05, -4.1913e-05, -2.4533e-05,  1.0873e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
