Iter #50: [tensor([-6.5741e-03, -3.8677e-03, -3.3990e-05,  6.7037e-04,  1.2281e-04,
        -6.1685e-04, -1.5485e-03,  1.2464e-04, -2.8200e-04, -3.0142e-05,
        -2.6588e-04, -2.1820e-04, -9.5094e-04, -9.2427e-05, -4.9949e-04,
        -8.1299e-04, -2.8142e-03, -5.9211e-05,  2.2353e-05, -1.4084e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3157e-03, -3.7010e-03, -2.1646e-04,  5.0149e-04,  1.1880e-04,
        -5.0379e-04, -1.4455e-03,  2.5369e-04, -3.3457e-04, -3.9008e-04,
        -1.2126e-04,  3.1466e-05, -7.5976e-04, -3.6727e-04, -6.6017e-04,
         3.8152e-05,  8.9777e-04, -3.3428e-04, -9.3078e-05,  1.9186e-05,
        -3.9052e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6938e-04,  2.4510e-03, -7.4585e-05, -3.3329e-04,  1.2943e-05,
         3.0403e-04,  5.8080e-04,  3.7607e-05,  8.3092e-05, -5.2304e-05,
         1.8626e-04,  1.6954e-06,  1.7745e-04, -9.0068e-05, -4.3739e-06,
        -3.8058e-05,  4.6214e-05, -9.2283e-05, -7.1028e-05,  7.7676e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0593e-03, -2.5334e-03, -1.7797e-04, -2.3331e-04,  2.7994e-04,
         2.0257e-04, -2.4838e-03, -4.3762e-05,  1.7699e-04, -5.1894e-05,
         2.1050e-05, -2.8973e-03, -3.5611e-04, -1.7166e-04, -1.6675e-04,
        -1.2535e-04,  2.0228e-04, -3.9092e-04, -4.2296e-04,  4.0233e-05,
         1.1192e-04,  3.0599e-04, -2.5059e-05, -3.4194e-04,  1.1271e-04,
        -9.4315e-04, -6.3870e-05, -3.7866e-04, -4.9377e-04, -3.4569e-04,
        -1.5876e-04,  9.5771e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3616e-03, -1.9939e-03, -2.0014e-04, -4.4218e-04,  1.9570e-04,
         2.3553e-04, -2.4179e-03, -1.2082e-04,  1.6327e-04,  1.2599e-04,
         2.4487e-05, -2.0498e-03, -1.3265e-04, -2.1533e-04, -2.3168e-04,
        -1.9768e-04,  1.4630e-04, -4.3428e-04, -2.7994e-04,  4.7241e-05,
         1.6570e-04,  3.8526e-04, -1.5854e-04, -1.4390e-05, -7.7889e-04,
         1.5529e-05, -4.7072e-04, -1.6252e-04,  1.3007e-04,  8.1662e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8245e-03,  1.6792e-03,  1.7233e-04,  3.9537e-05, -2.2821e-05,
         6.7285e-05,  1.3448e-03,  1.3460e-04, -9.2022e-05, -1.1326e-04,
         4.3382e-05,  1.3747e-03,  1.9719e-05,  1.6682e-04,  9.0913e-05,
         8.0970e-05, -4.8818e-05,  2.5537e-04,  2.2138e-04,  6.8117e-05,
         2.1414e-06, -1.2492e-04, -1.7347e-04, -1.2697e-04, -1.3209e-04,
         1.2797e-03, -7.8989e-05, -7.5717e-05,  1.0578e-04,  2.1919e-04,
         8.0871e-05,  9.8323e-05, -5.9609e-06, -6.3900e-05,  2.1636e-04,
         1.6458e-04,  1.4128e-04, -8.4321e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7955e-03,  2.3201e-04,  1.4261e-04,  2.7662e-04, -5.6166e-05,
         6.5916e-04,  3.6461e-04,  4.3763e-06, -1.1468e-04,  1.7848e-04,
         4.6535e-04, -5.5907e-05,  1.5699e-04,  2.6641e-04,  2.8384e-04,
        -2.3082e-04,  2.0821e-04,  2.6955e-05,  7.4666e-06, -2.0606e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0865e-03, -1.8068e-04, -8.4676e-04,  1.1550e-04, -8.1689e-04,
        -9.0942e-04, -4.7582e-04, -2.4085e-04,  2.7840e-05,  1.7797e-04,
        -1.1082e-03, -3.4798e-05, -7.6384e-04,  3.6046e-05,  4.8496e-04,
        -1.3641e-03, -2.9057e-04, -9.3339e-05,  1.0647e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.4501e-06, -1.9085e-04, -8.4913e-04,  1.0131e-04, -7.6297e-04,
        -6.8041e-04, -2.4748e-04, -2.6271e-04,  1.1876e-04,  1.8110e-05,
        -1.3018e-03, -4.1966e-05, -8.6873e-04, -5.9550e-04, -6.5053e-04,
        -9.2361e-04, -3.2038e-04, -4.5248e-04,  8.5259e-05,  5.9927e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7048e-03, -3.2821e-03, -2.2441e-04,  1.0622e-04, -3.7716e-04,
         1.9857e-04, -1.6759e-04, -2.5363e-04, -2.5545e-04, -1.2586e-03,
        -1.4127e-04, -6.2522e-04, -2.5962e-03, -5.3232e-05, -2.2906e-04,
         1.4778e-05,  1.0828e-04, -3.6793e-05, -3.8343e-04, -2.5584e-03,
         5.8590e-06, -1.7283e-04,  2.7648e-04, -3.7323e-04,  1.4027e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9051e-03, -3.2062e-03, -3.3413e-05,  5.1557e-05, -4.9274e-04,
         1.5004e-04, -1.3793e-04, -7.3441e-05, -1.8409e-04, -1.0689e-03,
        -3.8366e-04, -6.2925e-04, -2.3592e-03,  6.7972e-05, -4.9963e-05,
         1.7646e-04, -2.0837e-05,  3.8259e-04, -1.8170e-04, -2.8908e-03,
        -1.6658e-05, -2.0618e-04,  5.5982e-04, -3.0967e-03, -1.4319e-04,
         1.3099e-05,  4.2122e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.2094e-03, -2.7037e-03, -1.6659e-04,  1.9642e-04, -2.7216e-04,
         2.0131e-04, -1.1317e-04, -1.1038e-04, -2.5386e-04, -7.0212e-04,
        -2.0662e-04, -6.2858e-04, -2.6040e-03, -7.7245e-05, -1.0817e-04,
         9.5084e-06,  2.7832e-04, -1.1910e-05, -1.8579e-04, -2.1371e-04,
        -1.0772e-04,  4.4484e-04, -3.0223e-04, -4.8496e-04, -6.6131e-05,
         1.5463e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-2.2859e-03, -3.0842e-04, -1.4093e-04, -1.3248e-03,  3.6200e-04,
        -1.7478e-04, -9.1540e-05, -7.7243e-04,  1.3611e-04, -6.9536e-04,
         8.3677e-05,  4.5497e-05, -1.9331e-03,  2.1619e-04,  2.0180e-04,
         9.5103e-05, -2.1977e-03, -9.0228e-04, -1.0091e-04,  3.7460e-04,
        -4.8285e-04,  1.4975e-04, -2.3956e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9869e-03, -6.0602e-04, -1.2204e-04, -1.3484e-03,  1.0496e-04,
        -6.0668e-05,  7.4478e-05, -4.4317e-04,  2.6727e-04, -7.4565e-04,
        -2.9117e-05,  7.1680e-05, -2.1851e-03,  2.4784e-04, -5.9785e-05,
         1.2946e-04, -2.2358e-03, -7.2471e-04, -1.8158e-04,  2.1259e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8733e-03, -4.5469e-03, -1.2867e-04, -8.2175e-05,  4.5074e-04,
        -4.8730e-03, -2.1087e-04,  1.0083e-04, -5.9747e-04,  1.3889e-04,
        -1.7430e-04,  1.4121e-04, -2.7414e-04, -1.9518e-04, -1.4184e-04,
         2.1425e-04, -2.2658e-04,  2.8395e-05,  6.3672e-05, -6.3548e-05,
         2.2509e-04, -5.4724e-04, -3.7495e-03, -1.5801e-04,  3.1009e-04,
        -3.8044e-03,  1.6622e-04, -4.0700e-06, -6.4848e-05, -2.7308e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4454e-03, -5.0922e-03, -3.8638e-04, -5.8958e-06,  5.1073e-04,
        -4.8340e-03, -2.1776e-04, -8.0213e-05, -4.6231e-04,  1.6077e-04,
        -1.1304e-06,  7.6048e-05, -5.5523e-04, -2.0686e-04, -3.8140e-04,
         3.0545e-05, -4.8534e-04, -1.4965e-04, -5.9880e-05,  1.5900e-04,
         1.9366e-04, -2.1202e-04, -4.6915e-03, -2.1734e-04,  5.0541e-04,
         1.4102e-04, -3.8158e-04, -1.0544e-04, -2.1159e-04,  5.6411e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3690e-03, -5.2226e-03, -1.0292e-04,  1.0710e-06,  5.8935e-04,
        -4.3709e-03, -2.5322e-04,  3.4539e-05, -6.4159e-04,  1.7948e-04,
        -8.5116e-05, -4.2508e-05, -3.8352e-04, -1.3441e-04, -3.0972e-04,
         2.6222e-04, -4.7451e-04,  5.7217e-05, -1.2176e-04,  4.3329e-05,
        -4.3021e-03, -2.2117e-04, -3.9455e-04, -4.4541e-05, -1.4533e-03,
         3.2250e-05, -2.1844e-04, -3.7500e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0254e-03, -7.4684e-03,  3.1235e-04, -8.8318e-05, -9.0080e-05,
         3.4765e-04, -7.3027e-03, -1.4031e-04,  5.2149e-05,  1.4239e-04,
        -1.8522e-03,  2.0211e-04, -2.6473e-04,  8.4924e-05, -1.3671e-03,
         6.9236e-05,  2.4441e-04, -2.0026e-04, -3.7839e-05,  1.5637e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2779e-03, -7.9352e-03,  3.2078e-04, -4.5766e-05, -1.0280e-05,
         4.6437e-04, -7.3690e-03,  7.4156e-05,  1.2100e-04,  1.0299e-04,
        -6.8318e-03,  3.8111e-05, -6.5042e-04, -1.6049e-04, -6.4695e-03,
        -5.8316e-04,  6.1135e-05,  4.8088e-05,  8.2884e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9617e-03, -7.3581e-03,  2.6744e-04, -2.3995e-04, -1.1682e-04,
         3.4778e-04, -6.5010e-03, -1.5412e-04,  1.2156e-04,  5.6968e-06,
        -7.0067e-04, -8.8438e-05, -5.5146e-04, -6.0460e-05,  3.2224e-04,
        -1.2981e-04,  3.5739e-04, -7.5674e-04,  3.3883e-04,  8.4917e-05,
         1.2281e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9101e-03,  8.3333e-03, -1.7776e-05,  2.0049e-04,  1.1308e-04,
        -1.8437e-04,  8.4408e-03,  1.6623e-04, -4.1019e-04, -2.0167e-04,
         1.8853e-03,  1.4579e-05, -1.6327e-04,  2.7811e-04, -4.1967e-04,
        -9.8223e-05, -9.0853e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0199e-03, -8.1219e-03,  2.5216e-04, -1.3337e-04, -1.1719e-04,
         5.1743e-04, -5.3765e-03,  6.1323e-05, -4.1854e-05,  2.9407e-05,
        -6.2759e-03,  3.1341e-04, -4.0619e-05, -3.7430e-05,  1.9847e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6567e-03, -6.8468e-03,  2.6012e-04,  6.3402e-05,  1.3362e-04,
         5.7285e-04, -7.1790e-03,  9.6714e-05,  1.0487e-04, -1.2099e-04,
        -7.3575e-03,  1.1576e-04,  3.2110e-05,  9.2947e-05,  2.5251e-04,
         1.4714e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0707e-03, -4.6107e-03,  2.3016e-04, -5.8417e-05, -3.8273e-05,
         2.5912e-04, -4.5348e-03,  1.5130e-05, -9.9594e-06,  1.1738e-04,
        -4.0106e-03, -1.3703e-04, -3.5572e-04, -4.6310e-04, -6.3349e-04,
        -4.1289e-05,  1.6569e-05,  1.7293e-04, -1.3961e-03, -5.6337e-05,
         5.7659e-04, -2.3118e-04,  4.4001e-05,  2.9958e-04, -1.7173e-04,
         2.5127e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.3000e-03, -2.0369e-02,  3.0593e-04, -6.3576e-04,  8.3898e-05,
        -4.8936e-05, -1.3702e-03,  5.8961e-05,  5.4100e-04, -1.8268e-02,
        -1.4733e-04, -6.6305e-04, -1.5116e-04,  2.7454e-05,  2.0017e-04,
        -3.7109e-04, -3.0566e-05, -3.3906e-05, -3.9435e-04, -1.3275e-03,
         5.3825e-06,  1.0261e-03, -2.1350e-04,  1.1234e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7647e-04, -1.8910e-02,  2.6464e-04, -4.9853e-04,  1.4704e-04,
         1.2474e-04, -9.2902e-04, -2.1570e-05,  5.7153e-04, -1.4428e-02,
         9.3984e-05, -6.1266e-04, -1.6249e-04, -1.9876e-05, -1.2051e-04,
         3.4781e-04, -1.6840e-03,  1.0196e-04, -1.9011e-05, -2.5475e-04,
        -8.1286e-06, -1.8156e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4165e-04, -1.9575e-02,  2.6349e-04, -5.4935e-04,  2.2699e-05,
         9.8019e-05, -1.3751e-03,  2.2216e-05,  6.7128e-04, -1.9280e-02,
        -1.8181e-04, -6.7522e-04, -1.8647e-04, -9.6843e-05, -8.8150e-05,
        -1.8727e-04, -2.6690e-04, -2.2068e-05, -5.8753e-06, -2.8581e-04,
        -1.4208e-03,  1.9511e-04,  2.8114e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0394e-03, -1.6843e-04, -3.2830e-04, -8.0970e-05, -1.2927e-02,
        -2.1125e-05, -4.4722e-04, -7.8572e-04, -1.3833e-06, -1.2317e-03,
         3.6756e-04, -1.4319e-04, -3.7295e-03, -1.9630e-04,  5.8473e-05,
        -1.9646e-05, -4.2885e-03, -2.6230e-05,  2.4768e-05, -5.3173e-04,
        -4.6318e-04, -7.9923e-04,  2.2964e-04, -3.1485e-04, -1.8208e-04,
        -5.8164e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5023e-04, -7.2950e-05, -2.2997e-04, -1.3208e-04, -1.4833e-02,
        -4.2034e-05, -4.6159e-04, -5.7834e-04,  1.1405e-04, -9.3643e-04,
         2.3352e-04, -5.3746e-05, -3.2811e-03, -1.7877e-04,  4.8288e-05,
         4.0488e-05, -4.1300e-03, -1.2814e-05, -1.5474e-03,  4.1124e-04,
        -9.3204e-05, -3.7072e-03,  1.0068e-06, -4.3560e-07,  1.4449e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4038e-03, -6.5806e-05, -3.6562e-04, -1.0886e-04, -1.0943e-02,
        -1.0580e-05, -4.2439e-04, -6.5423e-04,  3.6432e-05, -1.2345e-03,
         2.9089e-04, -7.0947e-05, -2.3163e-03, -1.5060e-04, -1.9991e-05,
         1.1965e-04, -3.7138e-03,  3.2393e-05, -1.0898e-03, -2.3132e-04,
        -7.7848e-04,  3.3308e-04, -3.1622e-03, -4.8820e-05, -7.2056e-04,
        -7.1035e-05, -6.2511e-05, -1.3769e-04,  7.3192e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8156e-03,  2.2521e-04, -7.7997e-04, -5.4621e-04, -3.3816e-04,
        -1.1356e-02, -1.1922e-04, -2.3298e-04, -2.0312e-03, -3.0688e-04,
        -5.0627e-05, -3.9620e-05, -2.3903e-03,  1.2716e-04, -1.2184e-03,
         5.1846e-04, -1.0870e-02, -1.7408e-04, -2.1319e-05, -3.0112e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0281e-04,  4.9430e-05, -5.7375e-04, -3.9743e-04, -6.4091e-05,
        -7.1562e-03,  1.3301e-04, -1.6544e-04, -1.7472e-03, -1.6613e-04,
        -3.3783e-05,  1.2767e-05, -1.4478e-03,  5.0481e-05, -6.9227e-04,
        -3.8769e-04,  8.0757e-05, -6.5980e-03,  1.4837e-04, -4.4192e-04,
        -4.3392e-04, -6.8460e-03, -2.6503e-05, -4.5985e-05,  2.8551e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2026e-03,  7.9751e-06, -6.7199e-04, -7.2224e-04, -2.2899e-04,
        -8.1692e-03,  7.9930e-05, -1.2844e-04, -2.3376e-03, -4.1329e-05,
         1.5730e-05,  6.5152e-06,  4.0168e-04, -4.4339e-04, -1.0106e-02,
         5.4334e-05, -6.7397e-04, -8.2178e-06,  1.6964e-04, -7.4611e-03,
        -2.1157e-04,  1.3929e-04, -4.6416e-05,  5.5437e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3125e-04, -2.9082e-02,  3.6926e-04, -1.1852e-03,  2.2576e-04,
        -2.9176e-04, -4.3778e-05, -2.3466e-03, -4.8111e-03, -5.8079e-05,
        -2.1080e-04, -7.5699e-04, -7.2862e-05, -8.6300e-05,  7.4830e-05,
        -7.1901e-05, -9.3336e-05, -2.9099e-04, -5.3557e-04,  2.2494e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6336e-04, -2.4761e-02,  4.0771e-04, -7.4817e-04, -2.0941e-04,
        -1.0688e-04, -2.8258e-05, -2.2772e-03, -3.0312e-03, -5.3777e-04,
        -5.3035e-04, -9.1752e-04, -2.2732e-04,  1.0572e-04, -4.4946e-03,
        -1.2051e-04, -2.5493e-05,  2.5355e-06, -1.3055e-03, -1.5627e-04,
         3.3774e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4163e-03, -1.5491e-02,  6.6378e-05, -4.8561e-04,  1.9428e-05,
         1.5261e-04, -1.6553e-04, -1.5210e-03, -3.0095e-03, -9.7902e-05,
        -1.9297e-04, -4.0860e-04, -7.3190e-05,  1.6697e-04, -2.9620e-03,
        -1.5137e-04, -7.5375e-05, -1.4304e-04, -1.0399e-03, -1.3074e-03,
        -2.7501e-03,  3.3272e-05, -3.4452e-05, -3.1676e-04, -5.1880e-04,
        -9.9616e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 6.0099e-05,  6.0157e-05, -7.2815e-04, -9.2370e-05, -1.3700e-04,
        -4.5536e-04, -3.0477e-02, -1.4197e-04, -6.9986e-05, -3.4647e-06,
         5.0631e-05,  1.9532e-04, -5.2198e-03, -3.9689e-05, -2.7837e-04,
        -2.6920e-02,  1.7478e-04,  1.0302e-04,  5.6728e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7246e-03, -3.7075e-02, -7.4784e-05, -3.9020e-04,  1.1471e-04,
        -9.8266e-05, -4.3156e-04, -2.9311e-05,  3.8393e-04, -4.7474e-03,
         9.6389e-05, -3.1000e-04, -2.0052e-02,  2.3341e-05,  3.3826e-05,
        -8.4649e-06, -5.0686e-03,  3.9188e-06, -2.7284e-04, -1.7029e-05,
        -2.1677e-04, -2.4908e-04,  2.2704e-05, -1.7371e-04,  1.6020e-04,
        -5.7032e-03,  1.2476e-04, -2.0996e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5410e-03, -3.0606e-02,  4.4958e-05, -4.3125e-04, -5.5656e-06,
        -1.5260e-04, -4.2280e-04,  4.2380e-05,  2.6744e-04, -4.1675e-03,
         1.3601e-05, -2.5961e-04, -2.2513e-02,  9.9301e-05, -1.0755e-04,
         1.3802e-05, -7.2354e-03, -1.5926e-04, -1.3838e-05,  8.0819e-05,
         4.1463e-05, -5.1508e-03,  5.2842e-05, -3.4088e-04, -6.1914e-03,
         2.3602e-04, -1.5183e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3211e-03, -2.8456e-02,  1.5065e-04, -2.5386e-04, -3.4922e-05,
         1.4851e-05, -2.5521e-04,  7.3625e-05,  2.4445e-04, -2.2718e-03,
         1.0749e-04, -1.6096e-04, -1.4065e-02,  1.4196e-04, -7.2399e-06,
         1.0375e-05, -1.1087e-02,  5.6838e-05, -2.6259e-04,  8.4450e-05,
        -2.0134e-04,  6.0988e-05, -2.5166e-03,  1.0483e-04, -2.7701e-04,
        -3.1003e-03,  5.6580e-05,  5.1687e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8550e-04, -2.6635e-02, -9.2916e-05, -4.8375e-05, -4.3062e-04,
        -1.7905e-04, -2.0600e-04, -1.9690e-06,  3.9761e-05, -2.2103e-02,
        -3.5182e-04, -7.9568e-05, -1.6855e-04, -7.6275e-05, -4.7865e-05,
        -1.3399e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4326e-04, -1.7922e-02, -1.3207e-04,  3.4825e-05, -4.5242e-04,
        -6.6751e-05, -3.1133e-04, -1.3609e-05, -2.4738e-05, -1.3170e-02,
        -1.0783e-04, -6.8545e-06, -1.6800e-04, -2.8922e-04,  2.0577e-05,
        -1.4782e-02, -2.8039e-04, -1.6896e-04,  1.0020e-05,  4.0341e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7062e-04, -3.0475e-02, -1.7077e-04,  9.6549e-06, -4.2776e-04,
        -1.3317e-04, -5.4081e-04, -2.2843e-05, -2.3722e-05, -2.6453e-02,
        -2.0089e-04, -2.3705e-04, -4.4024e-04,  3.6216e-04,  1.0319e-04,
         8.6823e-05,  9.9400e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5616e-04, -3.6495e-02,  3.3069e-05,  8.6356e-05, -3.3909e-04,
        -1.1759e-04,  2.3496e-04,  9.8959e-04, -7.8986e-03, -2.2480e-04,
         9.9378e-05, -1.6942e-02,  8.2817e-05, -2.1065e-04, -1.6679e-04,
        -8.6482e-05,  8.8605e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1951e-04, -7.5239e-02, -1.3553e-04,  1.0710e-04, -7.9443e-05,
        -2.2325e-04,  2.8458e-04,  5.9970e-04, -7.5259e-03, -3.6623e-04,
        -6.6828e-05, -7.4763e-03, -1.3099e-05, -3.7490e-04, -7.8626e-05,
        -7.7779e-05, -1.7603e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7356e-04, -6.8131e-02,  5.2461e-06,  1.0686e-04, -4.3125e-04,
        -2.5932e-04,  5.9973e-05,  6.2716e-04, -5.8896e-03, -1.0195e-04,
         1.5552e-05, -6.8127e-03, -6.8808e-05, -7.4568e-04,  1.7706e-04,
        -7.2916e-05,  9.1311e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8415e-04, -1.2444e-02,  1.1708e-05,  1.6426e-04, -2.9355e-05,
         1.1405e-04, -4.3158e-05,  6.6280e-05, -5.8784e-05,  2.1251e-04,
        -8.1190e-03,  4.6200e-05,  2.2832e-04,  4.1531e-05, -2.4832e-04,
        -1.0296e-02,  3.8864e-05, -1.1777e-05, -5.6646e-05, -1.0791e-05,
        -1.0831e-02,  1.4378e-05,  1.0713e-04, -3.7507e-05,  1.7602e-04,
         7.6118e-05,  9.4826e-05, -1.4275e-04,  1.4484e-04, -1.4828e-02,
         6.1497e-05,  9.0723e-06, -2.2886e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1309e-03, -1.8940e-02, -2.9991e-05,  8.5388e-05, -5.7459e-05,
         1.1327e-04, -2.5161e-05,  6.8209e-05, -4.4990e-05,  1.3635e-04,
        -9.7845e-03,  3.5200e-05,  1.5289e-04,  1.0750e-04, -1.2438e-04,
        -1.2238e-02,  8.5250e-06,  4.9749e-05,  2.1777e-05, -5.0885e-06,
        -9.3175e-03, -4.5926e-05,  2.9653e-05, -5.4881e-05, -5.3371e-05,
         2.0103e-04,  6.9045e-05, -3.3324e-04, -1.3621e-02, -2.8274e-05,
        -2.1312e-05, -4.5894e-05,  8.4024e-05,  5.9238e-05,  7.3752e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.2730e-04, -3.0912e-02,  1.9277e-05, -3.2476e-04,  1.2865e-05,
        -1.0106e-04, -6.3883e-04, -8.6310e-05,  1.1788e-04, -3.6276e-03,
        -1.5915e-04,  2.0848e-04, -6.7983e-05, -5.9485e-04, -2.1332e-03,
         3.9434e-05, -8.4804e-06, -2.6424e-05, -1.6449e-05, -5.6685e-04,
         4.0194e-04, -4.4296e-02, -1.1181e-04,  3.0822e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6984e-04, -3.2305e-03, -6.3728e-05, -2.1374e-04, -7.9197e-05,
        -2.7129e-02,  1.6897e-06, -5.9070e-05, -1.1660e-04, -6.8445e-05,
        -2.2393e-02,  1.2356e-04, -9.8846e-05, -9.9223e-05, -2.2823e-04,
        -2.4640e-05, -9.0321e-04, -2.1944e-02,  5.0149e-06, -1.1610e-04,
        -2.9721e-04, -1.3392e-03,  3.2803e-05, -4.7014e-05, -2.8139e-05,
        -3.4355e-03,  4.4888e-05, -3.9585e-04,  2.1003e-04, -9.7073e-03,
         9.8770e-05, -1.5162e-05,  2.9557e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0288e-04, -3.5907e-03,  1.7361e-06, -1.3089e-04, -6.7700e-05,
        -2.5165e-02, -8.5983e-05, -7.5590e-05, -7.3147e-05, -6.3258e-06,
        -3.4922e-02,  1.8901e-04, -8.7811e-05, -3.1317e-05, -2.4210e-04,
         1.3037e-05, -6.8591e-04, -2.0104e-02, -3.2890e-05, -6.8314e-05,
        -2.1807e-04, -3.1986e-03, -1.8319e-05, -1.3710e-05, -2.5562e-05,
        -5.8169e-03,  1.0288e-04, -1.4044e-04,  5.6713e-05, -7.4699e-05,
         2.4717e-04,  3.8152e-05,  8.1977e-06,  1.3962e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2632e-03, -2.9819e-03, -6.6232e-05, -2.4990e-04, -1.4993e-04,
        -2.6273e-02,  1.0688e-05, -2.2048e-05, -2.7683e-05, -2.4023e-05,
        -2.3118e-02,  1.1175e-04, -1.0969e-04, -1.7130e-06, -2.6214e-04,
        -5.4735e-05, -7.6476e-04, -1.9206e-02, -7.9965e-05, -1.5496e-04,
        -2.6422e-04, -2.2401e-03,  3.0110e-05, -4.4488e-05, -4.6808e-05,
        -7.7717e-03,  1.2408e-05, -2.8495e-04,  3.4931e-05, -1.2329e-02,
        -2.4122e-05, -6.8671e-05,  8.8810e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7516e-04, -3.0041e-02, -3.7139e-05,  2.4542e-04, -1.4307e-02,
        -6.6299e-06, -6.0722e-05, -5.0721e-05, -2.5884e-04, -2.9214e-02,
        -8.4733e-06,  1.4341e-04, -1.2296e-02, -4.5011e-05,  7.6673e-05,
        -3.0984e-05, -1.6547e-04, -6.4466e-05, -6.6463e-05,  4.8353e-06,
         1.1433e-06,  1.4303e-04, -4.9341e-05, -1.1236e-05, -1.0204e-04,
         5.7623e-05, -2.3008e-04, -6.5600e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7358e-04, -2.6037e-02,  6.2202e-05,  1.5789e-04, -1.6339e-02,
         4.5958e-05, -3.4770e-05, -5.7220e-06, -1.7248e-04, -2.4288e-02,
        -6.9637e-06,  1.4454e-04, -1.5142e-02, -1.6951e-05,  1.2842e-04,
         9.4120e-06, -3.2736e-04, -3.9122e-05, -3.5949e-06,  1.4137e-05,
        -4.7617e-05,  1.0506e-04,  1.3795e-05,  3.5471e-05, -1.1889e-04,
        -1.6352e-05, -5.2102e-05, -4.9430e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6861e-05, -2.1498e-02,  2.4333e-05,  1.9076e-04, -1.5476e-02,
        -8.0107e-05, -3.0423e-06, -6.2831e-05, -2.3059e-04, -2.8948e-02,
        -2.7958e-05,  1.1669e-04, -1.5615e-02,  5.9252e-05,  7.2046e-05,
        -4.2803e-05, -1.3581e-04, -5.4908e-05, -1.9365e-05,  3.8388e-05,
        -5.3177e-06,  3.3468e-05, -2.5657e-05,  6.4119e-06,  6.1825e-05,
        -2.8877e-05,  1.0137e-05,  1.7162e-04, -2.5768e-04,  9.1363e-07,
         8.9237e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9253e-06, -1.8910e-02, -1.2380e-05, -1.9330e-04, -1.0872e-04,
        -6.2692e-05, -2.9322e-03, -1.9031e-05,  1.0317e-04, -2.4270e-02,
        -6.7547e-05, -1.3911e-04, -1.7561e-05, -4.5284e-05, -1.5698e-02,
         1.6807e-04, -3.1662e-05, -1.7354e-05,  4.9464e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8229e-03, -2.6245e-02,  5.7896e-05, -1.2961e-04, -8.3329e-05,
         9.7654e-06, -2.3829e-03, -2.8443e-05,  1.3682e-04, -1.8323e-02,
        -5.7076e-05, -8.0642e-05, -3.2597e-05, -2.3953e-05, -8.9603e-03,
         2.9030e-05, -1.5479e-05,  2.6029e-04, -1.5562e-02,  1.9930e-05,
        -3.8142e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1274e-04, -2.8323e-02,  8.9763e-06, -1.3804e-04, -1.2000e-04,
        -5.1500e-05, -3.9124e-03, -9.4665e-05,  1.1783e-04, -1.6560e-02,
        -4.4303e-05, -7.3222e-05,  5.1037e-06, -4.8887e-05, -2.1605e-02,
        -4.0505e-07, -3.6184e-05,  4.4545e-05,  1.0351e-04, -1.9666e-04,
        -2.5388e-05,  9.4722e-05, -1.9126e-03, -8.1495e-05,  6.5780e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0735e-03,  6.8600e-04,  4.3929e-04, -4.8804e-05, -1.9353e-04,
         8.7643e-02,  1.6044e-05,  6.6980e-05, -3.8624e-05,  1.8157e-04,
        -1.8668e-05,  2.3581e-02,  1.8277e-04,  5.8771e-04, -1.9457e-04,
        -1.0110e-04,  6.9594e-05,  2.0716e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5343e-04, -4.3770e-04, -1.9219e-04, -6.2941e-05,  2.3669e-04,
        -5.6193e-02,  7.7520e-05, -7.9594e-05, -2.0445e-04, -1.5059e-04,
        -4.5441e-06, -1.0389e-02, -4.1830e-05,  1.1073e-04, -3.7525e-02,
         1.9666e-04, -1.1268e-03, -1.2317e-04,  4.6234e-04,  2.2278e-04,
         1.8534e-04,  7.3352e-06,  2.6101e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-1.7998e-04, -8.7701e-05,  6.4526e-06, -1.1288e-03,  1.4610e-04,
         3.0141e-04, -8.6855e-02,  3.5434e-05, -2.9490e-05, -4.1471e-02,
        -4.0854e-04, -1.3664e-04,  2.2162e-05, -1.3820e-04,  9.2341e-05,
         2.2984e-05,  7.3082e-05,  2.5686e-05,  3.0966e-04, -5.3910e-03,
        -1.2893e-04, -6.0077e-05, -1.3625e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1568e-04,  1.4376e-05,  2.9734e-05, -5.6120e-04,  6.3650e-05,
         1.7983e-04, -2.6755e-02, -1.5814e-07, -8.0653e-05, -1.9832e-02,
        -1.1391e-04, -1.5604e-06, -3.6674e-05, -2.4840e-06,  8.2207e-05,
        -3.6685e-05, -1.8703e-02, -1.8572e-04,  7.5473e-05,  2.2431e-04,
        -2.0342e-02, -4.5790e-05,  3.1909e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2756e-05,  2.7544e-05,  1.3795e-04,  6.1064e-05, -9.1022e-05,
        -1.1501e-01, -5.9870e-05,  5.2616e-05, -2.8687e-04,  2.9163e-04,
        -1.8620e-04, -1.0727e-02,  9.7573e-05, -3.4495e-05, -2.8006e-04,
        -1.8754e-02, -2.8914e-05, -2.0122e-03, -3.7194e-04,  3.5250e-05,
        -1.0095e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4651e-03, -2.0005e-04,  1.2980e-04, -1.3890e-05,  6.1181e-05,
         7.2358e-02,  9.1568e-05, -1.5058e-04,  2.8441e-04, -1.6703e-04,
         1.2652e-04,  8.0531e-03, -8.1037e-06,  8.6891e-05, -3.2263e-05,
        -9.1983e-05,  3.9118e-03, -6.0916e-05, -3.5588e-05, -4.5114e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8923e-05,  5.4938e-05,  1.8089e-06,  6.1681e-05,  7.1724e-05,
        -9.0162e-02,  3.6933e-06,  2.1062e-04, -2.8582e-04,  1.1822e-04,
         9.4009e-06, -1.3688e-02, -2.4907e-05, -1.4461e-04, -7.3067e-05,
        -1.3748e-02, -8.1966e-05, -1.7524e-03,  3.2609e-04, -3.0080e-04,
         7.9651e-06, -8.2132e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1566e-04, -1.3446e-02, -6.6870e-06,  5.1203e-05,  1.1090e-04,
        -2.1401e-02, -1.9647e-05,  6.5573e-05,  4.6086e-05,  4.4799e-05,
        -6.8460e-05, -1.2217e-05,  6.7499e-05,  4.3720e-05,  1.3127e-05,
        -1.0622e-05,  8.6828e-05, -6.3615e-05, -2.7889e-05,  7.3253e-05,
         7.7827e-05, -1.4818e-02, -3.5793e-05,  1.4042e-05, -4.5384e-06,
        -1.7506e-03,  8.5644e-05, -9.6955e-05,  1.7134e-05, -1.9814e-02,
         2.0996e-05, -1.1134e-05,  5.6212e-05,  1.2443e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5942e-04, -3.7937e-02,  4.3885e-05,  2.0958e-06,  1.1643e-04,
        -4.0541e-02,  7.1342e-05,  1.1423e-04, -4.2662e-05,  2.0913e-05,
        -1.4931e-04, -2.2989e-05,  8.7048e-05, -2.2571e-04, -3.3462e-05,
        -1.0143e-04,  9.4344e-05, -6.6630e-05, -3.6260e-05,  3.7528e-05,
         1.6196e-04, -1.7219e-02, -1.1802e-04, -3.4841e-05,  5.1204e-05,
        -4.6937e-03,  7.6918e-05, -1.4177e-04,  1.3174e-04, -1.1815e-02,
        -1.8590e-05,  3.6778e-05, -2.0022e-04,  7.5417e-05,  2.6158e-05,
        -6.0459e-05, -3.3267e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4188e-04, -4.0981e-02,  1.1187e-05,  4.7276e-05,  1.0901e-04,
        -4.0572e-02,  2.5024e-05,  7.1449e-05,  5.4068e-05, -1.9971e-05,
        -2.5003e-04, -5.8194e-05,  5.5830e-05, -1.7482e-04, -9.9924e-05,
        -2.7652e-04,  1.2388e-04, -6.4442e-05, -1.0498e-04,  5.5022e-05,
         1.1248e-04, -4.2848e-02, -1.1380e-04,  2.6759e-05, -3.6709e-05,
        -3.9813e-03,  4.6777e-05, -2.6076e-04, -6.0596e-05, -1.9024e-05,
        -7.5534e-05, -6.3838e-03,  1.1484e-05, -3.7480e-05,  9.3917e-06,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1369e-03,  1.2150e-02,  8.2577e-06,  9.6011e-04,  2.5393e-04,
         1.5541e-04,  1.9674e-04, -1.5673e-04,  6.6465e-02, -1.2271e-04,
         4.9819e-04,  1.5188e-04, -7.8889e-05,  7.7129e-05, -3.7928e-05,
        -5.8535e-05,  5.8529e-04,  3.3249e-04,  2.7686e-04,  3.3250e-05,
        -7.5880e-05,  6.7799e-05,  2.0559e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0153e-04, -7.1908e-03, -1.7584e-04, -4.7780e-04, -3.1591e-04,
        -1.0461e-04, -1.0597e-04,  9.3152e-05, -8.5215e-02,  2.2925e-05,
        -3.1685e-04,  3.9616e-05,  1.0330e-04,  3.7059e-05, -1.5309e-06,
        -8.3164e-03, -1.7859e-04, -7.3300e-05, -3.0252e-04,  7.6349e-05,
        -3.0520e-05, -1.4024e-04, -1.8593e-04,  6.5164e-05, -5.1761e-03,
         3.3894e-05,  2.5234e-05, -3.5863e-04, -4.2680e-04, -2.4270e-02,
         7.5839e-05, -4.3515e-05,  5.9248e-05,  4.3428e-05,  3.5242e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5951e-03, -5.3538e-03, -8.2064e-05, -6.5160e-04, -2.9420e-04,
        -9.4805e-05, -1.9920e-04,  1.1175e-04, -9.3412e-02,  1.2342e-04,
        -3.5282e-04, -3.9005e-05,  6.1402e-05, -1.3717e-04, -2.2896e-05,
        -1.4613e-02, -1.1556e-04,  2.2171e-04, -2.2375e-04, -7.0626e-03,
        -1.2304e-04, -7.5720e-05, -2.2287e-04,  5.7861e-05, -1.0498e-02,
        -2.0092e-05,  6.6424e-05,  1.6588e-05,  1.1066e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1900e-04, -4.4504e-02, -1.0947e-04, -1.7580e-04, -2.6994e-02,
        -9.8771e-05, -3.2569e-05, -3.3344e-04, -3.3644e-03, -4.0626e-05,
        -5.7819e-06,  3.2704e-05, -1.4248e-02, -7.2411e-05, -2.7266e-04,
        -9.5998e-05, -1.8357e-02, -2.2031e-04,  4.2986e-05,  3.0868e-05,
         8.8520e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 6.6514e-04, -1.1811e-02, -6.6915e-05,  1.6989e-04, -1.4533e-02,
        -2.5868e-06,  2.2331e-05, -6.3759e-05,  4.5086e-05,  1.0403e-04,
         5.7987e-05,  7.2763e-05, -8.9714e-05, -2.7296e-04, -1.1912e-04,
        -4.3915e-05,  4.4768e-05, -2.2639e-02, -2.2675e-05, -8.3629e-05,
         6.1591e-06, -2.7324e-02,  5.2946e-05,  5.1964e-05, -5.3328e-05,
         3.5861e-05,  4.3128e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1884e-03, -3.5811e-02,  2.7269e-05,  1.7684e-04, -1.3402e-02,
        -4.0069e-05,  7.0310e-05, -2.8252e-05, -5.7540e-05,  8.8394e-05,
         1.5157e-06,  3.3056e-05, -2.7726e-05, -9.0768e-05, -2.1759e-04,
         1.1599e-05,  7.1072e-05, -2.8673e-02,  6.0781e-05, -1.3006e-04,
        -8.1760e-05, -1.5633e-02,  4.8656e-05,  9.2114e-05, -3.5336e-05,
         1.1593e-04, -2.0997e-02,  4.4997e-05,  6.0593e-05,  5.7967e-06,
         1.7175e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2751e-04, -5.1007e-06, -1.4668e-05, -4.2474e-05, -1.9679e-04,
        -1.7386e-04, -3.3847e-02, -3.8124e-07,  1.9449e-04, -4.3686e-02,
        -2.2199e-05,  2.5440e-05, -1.7987e-05, -4.9590e-05, -5.9281e-05,
         1.2275e-05, -2.1973e-04, -3.0807e-02,  2.4714e-05,  7.7445e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1766e-04, -2.5030e-05,  1.0829e-04, -3.5176e-05, -1.9714e-04,
        -8.4052e-05, -7.7822e-02, -1.3048e-04,  1.2950e-04, -4.1132e-02,
        -1.0291e-04,  9.5387e-05,  1.2767e-04, -1.5774e-02, -1.8906e-04,
        -5.9220e-04, -2.2542e-04, -1.7739e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8004e-04,  5.3329e-05,  1.0257e-04, -1.1158e-04, -1.2765e-04,
        -8.3282e-06, -7.3989e-02, -2.6640e-05,  3.1118e-06, -2.3333e-02,
        -5.4750e-05, -9.4176e-06,  1.0752e-05, -1.4968e-02, -1.5060e-04,
        -4.6609e-04, -1.5270e-04,  5.3685e-05, -8.3472e-05,  1.0406e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5979e-03, -1.1819e-04, -2.5444e-04, -1.2427e-04,  1.1866e-04,
        -5.7883e-02,  5.5510e-05,  2.7015e-05, -1.5095e-04, -5.3009e-02,
         1.6301e-04, -2.3385e-05,  3.7846e-05, -2.3088e-02,  1.5804e-05,
        -1.4964e-04, -1.1203e-03,  3.3809e-05,  7.2301e-05,  4.0349e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0822e-04,  3.1189e-05, -1.3436e-04, -9.1753e-05,  1.8878e-04,
        -2.6678e-02,  3.3208e-05,  1.4430e-04, -2.4736e-04, -7.4276e-02,
         6.6416e-05,  1.8651e-05,  7.2968e-06, -2.5709e-02, -6.4329e-05,
        -2.5699e-05, -1.5052e-03,  6.9739e-05,  5.1505e-05,  2.8590e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0162e-03,  3.6024e-05, -1.7835e-04, -1.9239e-04,  9.8932e-06,
        -3.6580e-02,  3.5614e-05, -6.7151e-05, -1.2182e-04, -8.2276e-02,
         1.2771e-04, -5.4346e-05,  1.9937e-05, -1.6302e-02,  6.6643e-05,
        -1.3906e-04, -1.5613e-03, -7.2567e-05,  2.2198e-05, -4.4321e-05,
        -2.6697e-07,  9.4869e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8809e-04, -3.1543e-02, -3.3059e-05, -6.8888e-06, -3.2641e-05,
        -1.1604e-04, -7.8670e-05, -1.3467e-02, -8.8484e-05,  3.9485e-05,
        -9.7348e-03, -2.9705e-05, -7.6471e-05, -1.1582e-02, -4.3318e-05,
        -1.5095e-04, -9.7652e-06, -7.4169e-05, -6.6033e-05,  2.1957e-06,
        -3.3907e-02, -4.3548e-05, -2.6416e-05,  4.4822e-05,  5.3282e-05,
         1.7081e-05, -3.4024e-05, -7.2215e-05, -7.1273e-03, -2.4671e-05,
         1.3975e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1452e-03, -3.6874e-02, -4.1456e-05, -4.7477e-05, -9.3858e-05,
        -7.1173e-05, -4.3426e-06, -4.9342e-03, -7.6601e-06,  6.2762e-05,
        -1.1152e-02,  2.0982e-05, -9.9103e-05, -4.2747e-03, -3.4973e-05,
        -1.4635e-04,  7.6313e-05, -1.0693e-04,  2.1615e-05,  1.4033e-05,
        -3.5405e-02, -1.1596e-05,  6.4634e-05, -1.3735e-05,  8.1760e-06,
        -3.6938e-06, -1.7120e-05, -7.5303e-03,  1.2940e-05,  1.0387e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4943e-04, -4.6796e-02, -5.8326e-05, -3.2923e-05, -1.8928e-05,
         7.3635e-06, -6.6061e-06, -8.3450e-03, -7.6431e-05,  1.4974e-05,
        -1.7641e-02, -5.8569e-05, -1.3935e-04, -2.8183e-03, -8.1857e-05,
        -1.5932e-04,  2.7150e-05, -1.1703e-05, -2.4744e-05, -5.7345e-06,
        -1.9393e-02, -2.8899e-06, -4.6747e-05, -4.2820e-05,  6.8126e-06,
        -2.7500e-05, -1.3843e-02, -2.8587e-05,  3.4093e-06,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4848e-03, -5.9638e-05, -1.5584e-04,  1.6127e-04, -1.1520e-05,
        -2.2863e-05, -9.3787e-03,  5.8146e-05,  2.3048e-04, -4.0456e-03,
         1.6308e-04,  6.2496e-06,  3.7116e-05, -1.0071e-02, -7.1955e-05,
        -6.7761e-06, -4.5861e-02,  8.9360e-05, -4.0468e-05,  4.1308e-05,
         1.1847e-05, -1.1225e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-4.9048e-04,  7.6954e-05, -2.0373e-05,  3.1807e-05,  5.4567e-06,
        -7.8042e-06, -2.7950e-05, -5.7779e-02, -7.9902e-06, -6.7855e-05,
         8.5039e-05, -4.5324e-02, -1.7132e-05,  2.1737e-05, -4.2964e-04,
        -3.6226e-06, -2.5585e-05, -3.9002e-05,  7.9663e-05,  5.9621e-05,
         1.0426e-05,  8.9758e-07,  1.7209e-05,  1.6486e-06, -3.2824e-02,
        -6.3734e-05, -8.6432e-05, -2.3228e-05, -2.6128e-02,  8.2989e-06,
         7.9828e-05, -4.2407e-05, -4.0911e-05, -7.7088e-05, -3.6342e-05,
        -1.1579e-04,  2.3941e-05,  4.4161e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5545e-04,  1.8695e-05,  5.4687e-05,  7.3314e-06,  2.1468e-05,
         6.2040e-05,  5.9337e-06, -4.8487e-02, -1.8244e-07, -5.2035e-05,
         8.1374e-05, -2.7602e-02,  1.4800e-05,  4.1421e-05, -1.3440e-04,
        -5.9544e-05,  2.6901e-05,  9.6199e-06,  2.7139e-05,  7.0793e-05,
         1.1281e-06,  7.2060e-05, -3.2732e-03,  1.9698e-05, -1.7922e-05,
         3.8833e-05,  3.0433e-05,  1.8478e-05, -2.0422e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7241e-05,  2.3513e-05, -1.6141e-05, -2.2180e-05,  2.8842e-05,
        -5.6500e-05,  9.4734e-05, -1.2075e-01, -4.2558e-05, -8.0105e-05,
         3.2969e-05, -1.9951e-02, -7.7424e-05, -8.8412e-05, -9.1361e-04,
        -1.6003e-04, -1.1140e-05, -1.3941e-05,  1.4548e-06,  2.9275e-05,
        -3.8670e-05, -1.0578e-03, -2.1532e-04,  1.4656e-05, -6.7437e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9781e-03, -1.9422e-07,  1.5811e-04,  4.0540e-04, -8.8928e-06,
         2.1056e-05, -8.3767e-05, -6.7919e-06, -7.2786e-05,  4.3785e-02,
         8.2305e-05,  3.6035e-05,  3.9046e-04, -5.3387e-06,  1.1346e-03,
         3.4017e-05,  9.8339e-05,  2.0471e-04, -3.2030e-05,  1.6195e-05,
        -3.8579e-05,  9.7294e-05,  1.4318e-03,  2.7446e-05, -2.2307e-05,
         1.0942e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7087e-04,  1.4360e-05,  1.1640e-04,  5.6832e-04,  1.2197e-05,
         1.0904e-05, -6.1539e-05, -6.4102e-06, -3.4038e-05,  1.8161e-02,
         1.7454e-04,  3.7149e-05,  2.3345e-04,  5.7998e-05,  7.4027e-04,
        -1.0985e-05,  1.1600e-04,  6.5249e-05,  9.2063e-06,  6.5941e-06,
         2.0075e-05,  7.2877e-05,  1.6369e-03, -2.3615e-05,  2.3321e-04,
         9.4731e-06,  9.8037e-04,  1.1292e-04,  6.7419e-05, -2.7871e-06,
        -5.3315e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1396e-03, -6.3554e-05,  1.1639e-04,  2.1775e-04, -1.7688e-05,
        -4.9853e-05, -7.6133e-05, -8.5414e-05, -7.5036e-05,  2.1531e-02,
         8.9208e-05,  8.9189e-06,  1.8281e-04,  6.3990e-05,  4.2690e-04,
         2.3805e-06,  2.7646e-04,  5.4847e-05, -4.8029e-06, -1.8070e-05,
         1.0725e-05,  9.9847e-05,  1.8612e-03,  1.1237e-05,  4.7451e-05,
         8.9322e-05,  3.2263e-05,  2.1425e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2918e-05,  9.9172e-05,  1.5941e-04, -4.6475e-05,  9.2257e-05,
         2.9791e-04, -2.2531e-05,  6.3376e-04,  9.2273e-05,  2.8071e-06,
         3.1476e-04,  1.4978e-04, -7.6615e-05,  4.7636e-05,  1.9566e-04,
         8.6300e-06,  1.4470e-03,  1.0848e-04,  9.9825e-04,  8.6932e-05,
        -1.3418e-05,  1.4505e-05,  6.1541e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2197e-04,  1.2836e-04,  1.1309e-04, -5.1550e-05, -4.9466e-05,
         2.9962e-04,  3.5207e-06,  7.1684e-04,  2.2018e-04,  1.0020e-04,
         3.0749e-04,  1.0534e-04,  2.4107e-05, -4.0663e-05,  5.3014e-05,
         4.5549e-04,  1.4971e-04,  4.3688e-06,  1.0210e-05,  2.8589e-04,
         3.4878e-04, -1.1331e-05,  1.1328e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5181e-04,  1.0163e-04,  1.0336e-04,  6.7883e-07,  1.4603e-04,
         3.1852e-04, -7.5242e-06,  2.5796e-04,  9.6716e-05,  1.4767e-04,
         2.4450e-04,  2.5841e-04, -5.4322e-05,  2.6928e-05,  2.7771e-04,
         1.7447e-04, -5.9071e-05,  3.7479e-05,  1.1855e-04,  2.0663e-04,
         5.1344e-06, -8.2603e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6423e-04, -5.4859e-03, -8.3823e-05, -1.1414e-04,  2.3721e-04,
        -5.9299e-03, -9.2005e-05,  8.2513e-06, -8.4297e-05,  7.8487e-05,
        -5.8335e-02, -2.9838e-05, -1.0232e-04,  2.6637e-04, -2.6979e-02,
        -4.0507e-05, -2.4790e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2822e-03, -2.0554e-02,  9.5290e-05,  1.7387e-04,  2.4183e-04,
        -7.0675e-03, -2.1256e-04, -9.6583e-05, -9.0455e-05,  1.4760e-05,
        -1.1941e-01, -6.3660e-05,  3.2798e-05,  3.3542e-04, -9.0074e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0096e-04, -2.7512e-02, -1.2873e-04, -8.7258e-05,  5.5483e-04,
        -1.9463e-02, -1.1010e-04,  3.8936e-05, -1.5687e-04, -2.2202e-06,
        -1.0325e-01,  2.4041e-04,  8.6269e-06,  1.1367e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 5.1364e-04, -9.5499e-05, -1.7202e-04, -2.0037e-06,  1.4513e-04,
        -1.6221e-04, -6.7229e-02,  1.0943e-04, -1.9368e-04,  2.4976e-05,
        -1.0070e-05, -7.6412e-06, -1.0516e-04,  1.0412e-04, -9.0645e-05,
        -2.4030e-04, -9.5437e-05,  2.2122e-04, -6.8424e-02,  2.6395e-05,
         4.1075e-05,  5.2976e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1623e-04, -3.1394e-04, -1.0212e-04, -1.1663e-04,  6.5749e-05,
        -7.2132e-04, -1.5553e-01, -1.1809e-04, -3.0390e-04, -6.8305e-05,
         3.1441e-05, -2.6168e-05, -1.2809e-04,  2.9418e-05, -3.7768e-05,
        -1.9167e-04,  1.2427e-04, -1.3355e-05, -2.7879e-05, -1.7162e-04,
         1.4758e-05, -9.0418e-05, -2.4985e-04,  8.1105e-05, -3.4882e-06,
         8.7052e-06,  8.2122e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6685e-04, -3.4351e-04, -1.6747e-04, -1.8974e-05,  1.1948e-04,
        -7.3160e-04, -1.9570e-01, -9.1958e-05, -2.3156e-04, -2.4796e-05,
         7.5222e-05,  5.4828e-05, -2.6991e-04,  1.3511e-05,  9.6104e-05,
        -2.7649e-04, -2.0184e-05, -4.2386e-04,  1.6637e-04,  1.2078e-04,
         1.0510e-04,  1.8712e-04, -2.4916e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7535e-04, -4.7840e-02,  2.0308e-05, -4.0189e-05, -2.2506e-02,
         9.2725e-06, -4.3304e-05, -1.4136e-04, -1.8064e-04, -1.9427e-02,
        -5.9444e-05, -4.0044e-05, -7.6761e-06, -9.3039e-03,  2.7766e-05,
        -1.3812e-04, -6.6216e-05, -4.5929e-06, -2.2209e-02,  2.5707e-05,
         3.8030e-06, -7.4483e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2225e-05, -4.9852e-02,  2.9228e-05,  6.9115e-06, -1.3184e-02,
         5.8308e-05,  2.0213e-05, -1.3265e-04, -7.3940e-05, -4.0000e-02,
         4.2962e-05,  7.2795e-06,  1.6483e-05, -2.8177e-03,  1.6797e-05,
        -1.3135e-04, -2.5853e-05, -1.1309e-02, -3.2631e-05, -6.3895e-05,
        -3.0074e-06, -7.4861e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5181e-04, -5.6610e-02,  1.5313e-05,  2.5955e-05, -1.1684e-02,
         1.7415e-04,  3.8750e-05, -2.3855e-04, -9.4474e-05, -1.7082e-02,
         4.9142e-05, -3.4057e-05,  3.9556e-05, -4.5392e-02,  6.9779e-05,
        -1.9833e-04, -1.5865e-04, -1.2253e-02,  7.7064e-05, -8.7186e-05,
         7.1258e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2418e-03, -1.0086e-01,  7.5477e-05, -1.1746e-04, -6.8352e-05,
        -1.4109e-03,  2.5730e-05,  1.0738e-04, -4.1177e-02, -1.2987e-04,
         1.1333e-04, -9.1357e-06, -4.2151e-02, -2.3709e-04, -3.6963e-04,
         9.0414e-05,  2.2987e-04, -1.6715e-02,  5.5552e-05, -4.7036e-05,
        -9.1257e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6513e-03, -9.5966e-02,  8.0718e-05, -6.8258e-04, -1.7613e-07,
        -4.7194e-03,  1.3124e-04,  2.6329e-04, -3.7038e-02, -7.6354e-05,
        -3.3949e-05,  1.2860e-04, -4.5784e-02, -5.1885e-05, -2.7008e-03,
         1.4557e-04,  1.7783e-05,  3.3084e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3278e-05, -8.9371e-02,  1.7279e-05, -4.9396e-04, -1.3955e-05,
        -6.7887e-04,  1.2527e-04,  3.1298e-04, -4.4213e-02, -7.0250e-05,
        -7.0247e-05,  8.4723e-05, -2.5232e-02, -2.1346e-05, -2.7845e-04,
        -2.4746e-05, -1.1519e-04, -1.3814e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9057e-04, -3.8447e-02, -1.4392e-05,  2.7665e-05, -1.0207e-04,
        -7.5007e-05, -5.1961e-05, -4.0960e-02,  5.9145e-05,  1.1505e-05,
         1.3898e-05, -4.7526e-03, -1.5462e-05, -3.1723e-05, -1.1074e-04,
        -7.3120e-05,  3.1602e-06, -1.0900e-02, -5.1377e-05, -4.8928e-05,
        -6.7141e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2191e-04, -5.7629e-02, -8.2714e-05, -1.1847e-04, -2.2931e-04,
        -1.5517e-04, -3.7323e-05, -1.1222e-01, -2.4689e-04,  2.5614e-05,
         2.5198e-05, -2.7570e-02, -7.8907e-05,  1.4177e-04, -2.0188e-04,
         2.9742e-05, -1.2002e-05,  1.0213e-06,  3.8591e-05, -4.3510e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4423e-04, -3.8345e-02,  4.2918e-05,  1.2731e-05, -1.2032e-04,
        -8.0046e-05,  1.3865e-04, -8.0623e-02, -6.0734e-05,  3.6071e-05,
         6.0629e-05, -1.7553e-02,  3.3840e-05,  5.0453e-05, -1.1638e-04,
         1.0257e-05,  1.0317e-05, -8.4394e-05, -1.2700e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-8.2616e-04, -8.4729e-02,  3.1709e-05,  2.5781e-05,  5.0710e-05,
        -4.5512e-02,  5.0783e-05, -1.9961e-05, -9.4531e-05,  2.1473e-05,
         1.0345e-04, -1.0435e-02,  6.2015e-05, -1.2915e-05,  3.4490e-06,
        -1.3516e-02, -6.5687e-06, -4.4889e-05, -6.5696e-06,  3.6219e-05,
        -4.1869e-05,  3.2432e-05, -1.1561e-04, -1.3643e-05, -1.0283e-02,
         4.2330e-05, -3.4945e-05, -8.2346e-05, -9.3151e-05, -2.1903e-05,
        -2.0853e-05,  2.1312e-05, -3.8979e-05,  1.5641e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9594e-04, -3.8237e-02, -3.5401e-05,  6.2336e-05,  1.2887e-04,
        -4.9115e-02, -4.9501e-05,  1.0198e-05, -1.3408e-04, -1.1903e-05,
         1.4141e-04, -1.4218e-02,  9.8197e-05,  1.7538e-05, -3.2486e-05,
        -4.4661e-02,  4.7781e-06, -1.4854e-06, -4.0029e-05,  8.6905e-05,
        -8.2514e-05,  1.2268e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8711e-04, -1.4938e-02,  4.0106e-05,  1.4339e-05,  3.0699e-04,
        -1.0094e-01,  9.1932e-05,  1.0088e-04, -8.8511e-05,  3.9110e-05,
         2.0923e-04, -1.6055e-02,  1.0240e-04,  7.4843e-05, -3.2975e-05,
        -1.6212e-02, -2.8331e-05, -2.4378e-05, -5.6035e-05, -7.6621e-05,
        -2.6112e-05,  5.3361e-05, -1.3213e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0698e-03, -4.9867e-02,  1.4030e-06,  4.4533e-05, -3.2856e-05,
         1.1928e-04,  2.2759e-05, -1.8204e-02, -9.2458e-05,  1.4910e-05,
         5.1808e-05, -5.3167e-02, -8.6764e-05,  9.4491e-05,  9.3907e-05,
        -2.5609e-03, -7.2406e-05, -8.4143e-05,  2.7940e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6089e-04, -1.6068e-01, -8.7321e-05, -3.5749e-05,  7.1020e-05,
         3.9113e-06, -1.7218e-04, -6.3944e-03,  3.4083e-05,  3.9543e-05,
         2.7124e-05, -3.9144e-02, -2.5704e-04, -4.5871e-06, -2.2527e-04,
         3.3393e-04, -3.0804e-02,  1.7091e-04, -3.4060e-05,  6.8769e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4370e-04, -9.9035e-02, -1.7643e-04, -1.0640e-04, -7.8880e-06,
         3.7135e-05, -2.0763e-04, -6.5706e-02, -1.5029e-04,  2.0941e-04,
         2.5829e-05, -4.0647e-02,  1.8497e-04,  1.3022e-04,  9.9026e-06,
         2.3993e-04,  1.9667e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4414e-05, -6.6104e-05, -6.1711e-05, -4.8206e-05,  5.9429e-05,
        -3.8959e-02, -9.5071e-05, -1.0601e-04, -2.4476e-05, -2.5938e-02,
         5.3490e-05,  2.0559e-05,  4.1873e-05, -4.7103e-02, -1.6233e-05,
        -1.7446e-04, -7.7498e-05, -2.7970e-02, -6.9314e-05, -7.0311e-05,
        -6.4791e-05,  2.8813e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0679e-04, -3.9481e-05, -4.9954e-05,  7.0703e-05,  1.4358e-04,
        -3.6678e-02, -1.2069e-04, -9.3776e-05,  3.4709e-07, -6.7464e-02,
         2.6353e-05,  5.7211e-05,  3.2966e-05, -4.2458e-02,  3.9183e-05,
        -1.7666e-04, -5.3968e-05, -1.1375e-02, -6.5915e-05, -1.2425e-04,
         6.1035e-05, -5.6103e-05, -4.3086e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3679e-04, -2.4603e-05, -7.8750e-05,  2.0621e-05,  1.0819e-04,
        -6.4179e-02, -9.2382e-06, -1.1251e-04,  1.4406e-05, -3.2387e-02,
         6.5680e-05,  2.7294e-05,  7.1452e-05, -3.8898e-02,  5.2140e-05,
        -1.6794e-04,  6.3858e-05, -3.3388e-02, -4.8522e-05, -1.2218e-04,
         5.1385e-05, -1.4496e-05, -1.1817e-04,  6.4671e-05,  4.8680e-05,
         3.6385e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6595e-03,  2.9820e-04,  1.4538e-04, -6.0431e-05, -6.3459e-05,
        -2.6731e-04, -5.0250e-05,  2.0057e-04, -6.3086e-05,  1.0584e-04,
        -2.5725e-05,  1.6810e-01,  5.4191e-05, -9.8366e-05,  7.4261e-05,
         3.5738e-04,  1.6027e-04,  1.3892e-04,  4.7487e-04,  1.5948e-04,
         1.0199e-05, -1.1010e-04,  6.5691e-05, -1.0771e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9802e-04, -2.7423e-04, -3.0024e-04, -7.3694e-05,  1.5513e-05,
         1.1456e-04, -1.6188e-04, -5.0026e-04, -1.1766e-04, -3.1041e-04,
        -1.4580e-04, -1.4932e-01, -1.0124e-04, -1.3882e-05,  1.5444e-05,
        -3.1031e-04, -1.0742e-04,  9.2744e-05, -4.1338e-04, -3.0117e-05,
        -8.7122e-05, -6.0129e-02,  4.2370e-06,  5.2937e-05, -4.7818e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8558e-04, -3.0780e-04,  5.7931e-05,  9.2653e-05,  6.8008e-06,
         1.4658e-04,  1.6952e-05, -2.3280e-04,  9.9331e-06, -3.2739e-04,
        -2.4680e-05, -1.0298e-01,  1.2329e-04,  1.2863e-05,  1.1308e-04,
        -5.7833e-02, -1.4033e-05, -4.0781e-04,  4.7368e-05, -6.9048e-05,
        -4.9264e-05, -2.4667e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.6669e-04, -3.7811e-02,  5.6104e-05,  9.9746e-05,  9.4049e-05,
         9.2338e-05, -4.2239e-05,  7.4033e-05,  1.1993e-05, -4.6455e-02,
         1.2169e-06,  2.6502e-05, -1.0127e-04,  1.3421e-04,  9.4159e-05,
        -4.9409e-05, -6.7595e-05,  3.5759e-05, -3.1938e-02,  1.1377e-04,
        -1.7119e-04, -6.6114e-02, -1.7413e-05, -6.4229e-05, -1.5895e-02,
         1.4698e-05,  1.6116e-07, -9.2823e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6438e-04, -9.9453e-02, -1.6823e-05,  1.2865e-04,  1.6947e-04,
        -9.1968e-06, -1.4592e-04, -7.2496e-05, -2.2772e-05, -7.4585e-02,
        -2.0919e-05, -1.7573e-05,  3.4152e-05,  7.7054e-05,  4.3007e-05,
        -2.0578e-05,  3.4789e-05,  8.7116e-05, -3.0854e-02,  4.5693e-05,
         9.5827e-07, -9.2750e-05, -1.5501e-05, -7.1533e-05,  4.3667e-05,
         8.7153e-06,  4.9657e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5720e-03, -5.2572e-05,  3.1849e-04, -5.1362e-04,  2.2245e-01,
        -1.9482e-04,  2.5160e-04,  1.5933e-04, -6.4588e-05,  2.7901e-04,
        -1.6530e-04, -3.0930e-04, -2.6747e-04,  1.2798e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5768e-04,  2.9228e-06, -3.4373e-05, -1.0182e-04,  1.0836e-01,
         8.3284e-05, -5.7550e-05, -6.4684e-05,  1.1603e-04,  9.2827e-05,
         1.3411e-04,  7.6929e-05,  1.0599e-04, -8.7600e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4511e-04, -1.7974e-05,  1.9843e-04, -3.5940e-04,  8.6887e-02,
         3.9519e-04,  1.4822e-04, -8.4599e-05, -1.1407e-05, -3.9793e-05,
        -2.1529e-04, -7.4875e-05, -7.0960e-05,  8.2618e-06, -1.4205e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6700e-04, -9.6588e-02, -3.8651e-05, -6.4330e-05, -2.3640e-02,
        -1.1047e-05, -1.4453e-04, -6.9529e-05,  1.4886e-05, -3.0989e-02,
        -2.2605e-05, -1.8548e-04,  3.0425e-06, -3.1216e-02, -9.1876e-05,
         2.8123e-05, -1.2060e-04, -1.2259e-05,  2.4446e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5056e-04, -1.3046e-01,  1.6290e-04,  2.1075e-04, -8.2676e-02,
         1.4167e-04, -2.8322e-04, -5.1674e-05, -8.2160e-05, -3.9279e-02,
        -2.4828e-04, -1.9942e-04, -1.4252e-04,  1.2430e-04,  5.5441e-05,
         7.0269e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1027e-04, -6.0411e-02, -6.7651e-05,  2.2876e-04, -5.7464e-02,
         5.5447e-05, -1.9698e-04, -7.9099e-05, -1.0764e-05, -6.1131e-02,
        -1.1049e-04, -3.6466e-04, -2.6593e-04, -4.5836e-05,  2.0583e-06,
        -1.0222e-04, -6.6182e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1771e-04,  1.5669e-02, -1.2788e-04, -6.3006e-05, -5.2380e-05,
        -1.8252e-04, -7.2684e-05, -1.8830e-04,  2.6215e-04,  6.3458e-05,
        -8.1819e-05, -5.8788e-06,  6.9557e-05,  1.3553e-01, -7.1198e-06,
        -7.0043e-05, -4.8356e-04,  3.1552e-02, -2.4372e-04, -2.0892e-05,
         2.7821e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1311e-03,  1.3667e-01, -1.1958e-04, -3.3073e-05,  6.3847e-05,
        -1.4410e-04, -1.7771e-05, -7.4515e-07,  2.1737e-04, -4.1727e-06,
         5.6969e-07, -3.0233e-05, -5.2675e-05,  1.2706e-04, -6.3756e-05,
         8.1504e-05, -1.3293e-04, -1.0069e-05,  6.0046e-02, -2.2145e-05,
         4.5372e-05,  1.4841e-05,  3.7763e-05,  2.2548e-04,  9.8797e-06,
         9.7170e-06, -5.0488e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3450e-04, -4.9314e-02,  1.8684e-04,  6.1501e-05, -1.0593e-04,
         1.0487e-04,  1.1831e-05,  2.4804e-05, -3.1622e-04, -5.7609e-05,
         1.6052e-06, -2.0300e-05,  6.1818e-05, -5.6162e-02,  1.1262e-04,
        -1.0880e-04, -7.4609e-02,  1.7968e-04, -1.9974e-04,  2.5712e-05,
        -1.1242e-04,  9.9921e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6574e-04, -1.9704e-02, -6.8979e-05, -1.9469e-04,  7.7091e-05,
        -1.7778e-02,  9.5244e-05,  1.3400e-04, -1.1109e-04,  4.3848e-05,
        -2.6642e-02,  6.3168e-05, -1.4006e-05, -6.8268e-02,  1.2757e-04,
        -6.8201e-05,  7.7709e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #600: [tensor([ 9.3033e-05,  3.6168e-05,  6.2301e-03,  3.2193e-05, -5.3265e-06,
         3.4557e-04, -3.4953e-05, -5.9038e-05,  1.9831e-05,  9.7511e-05,
         1.3317e-03, -2.8191e-05,  6.2434e-05, -3.4843e-05,  9.9479e-05,
        -2.8486e-05, -1.6537e-04,  2.4682e-05,  8.0332e-04,  7.5503e-02,
        -4.4440e-05, -4.6368e-07, -5.1404e-05,  1.7766e-06, -6.7931e-06,
         4.8001e-05,  2.2332e-05, -1.8699e-05, -2.7908e-05,  4.0621e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8499e-04, -9.0601e-06, -1.5394e-03, -1.6870e-04, -1.0883e-05,
        -5.5331e-05,  2.6980e-05,  8.8415e-05,  1.4053e-05, -4.5590e-05,
        -2.6584e-04, -5.9914e-05, -1.7755e-04, -8.0548e-05,  4.5122e-06,
        -4.6420e-06,  9.7949e-05, -4.1577e-05, -2.8376e-04, -1.1189e-01,
         5.3555e-05, -1.2912e-05,  3.0427e-05, -9.5421e-02, -9.3016e-05,
        -2.0424e-04, -1.0326e-05, -1.5521e-02,  7.0737e-05,  2.2023e-04,
        -1.8709e-02, -2.8288e-05,  1.2595e-04, -1.4050e-05,  1.6232e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6167e-04,  4.3617e-05, -6.5461e-04,  3.8386e-05,  3.3188e-05,
        -1.0595e-04, -4.6562e-05,  1.5951e-05, -1.1699e-05, -2.0766e-05,
        -9.9490e-05, -3.7895e-08,  3.2001e-06,  9.9234e-05,  7.3364e-05,
        -4.7244e-05,  1.2179e-04,  3.4987e-05, -1.3170e-04, -2.0593e-02,
         6.8724e-05, -1.3172e-06,  1.2746e-05, -7.4015e-05, -7.3833e-06,
         9.8777e-05, -5.4159e-02, -2.0272e-05,  2.5688e-05, -4.1984e-06,
        -3.6742e-02, -6.5247e-05,  4.2847e-05, -4.4331e-05,  6.2229e-05,
        -5.6648e-02,  5.8736e-05, -5.4380e-05, -3.8932e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8164e-04,  3.1580e-05,  2.6135e-03,  4.1628e-05,  1.0891e-04,
         1.2074e-04, -2.5998e-05, -1.1843e-04,  9.5771e-05,  5.1932e-05,
         7.2216e-04,  2.7900e-05,  9.8511e-05, -6.0253e-05, -3.3990e-06,
        -1.3081e-05, -7.6762e-05,  4.4081e-05,  6.3169e-04,  7.6142e-02,
        -2.9590e-06,  3.5544e-05,  2.4685e-05,  6.0576e-02,  2.3768e-05,
         5.0081e-04,  1.5836e-05,  1.2823e-02, -1.1403e-04, -5.6244e-06,
        -3.3076e-05,  2.7199e-05, -2.9500e-05,  1.1900e-05, -1.1345e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1040e-04, -5.3475e-05,  2.8371e-03,  7.5686e-05, -6.1691e-05,
         1.3645e-04, -4.9285e-06, -2.9255e-05, -1.4402e-05, -4.4773e-05,
         1.3562e-03, -5.1711e-05, -1.1458e-05, -1.0062e-04,  6.3246e-05,
         6.7983e-07, -9.2495e-05, -3.3039e-05,  4.5289e-04,  6.8425e-02,
        -3.3865e-05, -1.5449e-05, -3.7545e-05,  1.6894e-05,  6.9943e-05,
        -2.5718e-06,  8.1139e-05,  9.5360e-05,  1.6484e-05,  9.5397e-05,
        -4.1626e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6763e-04, -3.3981e-06,  5.8487e-03,  2.7695e-05,  2.5135e-05,
         1.4247e-04, -2.5030e-05, -1.1661e-04,  1.8991e-05,  2.2529e-05,
         4.3843e-04, -5.8085e-05,  7.9712e-05, -6.2373e-06,  7.0845e-05,
         5.8380e-05, -1.0551e-04,  7.7273e-05,  1.2034e-03,  1.0146e-01,
        -4.0364e-05,  3.0305e-05, -5.4702e-05,  6.7342e-05, -3.0206e-05,
        -1.7188e-05,  6.7569e-05, -3.0351e-05, -2.4783e-05,  3.1492e-05,
        -6.7626e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8317e-06, -9.2371e-06,  2.4827e-03, -1.0876e-05,  3.8710e-05,
         6.2198e-05, -2.6347e-05, -5.0524e-06, -2.9332e-05, -8.6387e-06,
         1.2428e-04, -6.5660e-05,  6.8125e-05, -7.3678e-05,  3.3902e-05,
        -2.5252e-05, -1.2166e-04,  1.4861e-05,  3.4097e-04,  4.8835e-02,
        -3.2253e-05, -1.3648e-05,  4.5471e-05,  6.0334e-06, -6.8810e-05,
         3.1788e-05, -9.5734e-05,  7.9359e-02, -5.4596e-05, -7.3069e-05,
        -2.3494e-05, -4.7051e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3094e-04,  3.0489e-05,  5.6949e-04,  2.9419e-05,  2.6441e-05,
         3.8533e-08,  1.2735e-05, -1.0845e-04,  5.7571e-05,  1.0725e-04,
         2.5940e-04, -1.6852e-05,  1.1632e-04, -3.0636e-05, -8.5577e-05,
         8.0509e-05, -1.0451e-04,  3.7483e-05,  3.1225e-04,  9.7049e-03,
        -2.7234e-05,  3.5462e-05,  1.0477e-06, -4.4001e-05,  8.0644e-05,
        -1.2046e-05,  9.5647e-02, -9.2082e-05, -7.5304e-05, -5.1772e-05,
        -6.4305e-05,  2.1273e-05,  5.1370e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8074e-04, -2.2195e-05,  7.7882e-03,  5.9220e-05, -2.7768e-05,
         5.5249e-04, -1.2330e-05, -4.1921e-05, -1.8466e-05,  3.3664e-05,
         3.1367e-04, -9.6589e-06,  6.2225e-05, -6.7834e-05,  7.9654e-05,
         8.3282e-06, -1.4486e-04,  3.8914e-05,  5.2418e-04,  7.2498e-02,
        -4.3852e-05,  4.8550e-05, -2.8716e-05, -5.6484e-05, -6.7651e-05,
         1.1280e-02, -3.5984e-05,  8.8181e-05, -8.4175e-05,  1.7457e-05,
         1.3343e-05, -8.1650e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5301e-04, -9.9290e-05, -3.3412e-03, -3.1830e-05, -4.3102e-05,
        -3.4472e-04, -5.2453e-05,  6.2081e-05, -2.7128e-05, -4.8816e-05,
        -7.0321e-04, -1.0151e-04, -8.3351e-05,  6.1944e-05,  3.5058e-07,
        -1.0901e-04,  1.2299e-04,  6.3982e-05, -4.5904e-04, -2.2807e-02,
         1.6151e-04,  7.3634e-05,  5.4373e-05, -4.3174e-02, -3.0148e-05,
        -1.1226e-04, -2.7098e-05, -5.3111e-05, -1.7717e-05,  1.1324e-04,
        -7.5035e-05, -8.4641e-05, -1.4773e-01,  1.1852e-04,  1.2148e-04,
        -5.2496e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7048e-04,  1.3931e-05,  2.5141e-03, -2.1434e-05,  2.7245e-05,
         1.2890e-04,  3.5700e-05, -3.0589e-05,  9.1535e-05,  3.7701e-05,
         6.1170e-04,  1.5432e-05,  9.3413e-06, -3.3313e-05, -3.1288e-05,
         3.7686e-05, -6.6453e-05, -6.1081e-06,  5.3051e-04,  9.1385e-02,
        -1.0361e-05,  2.4992e-05, -3.6049e-05,  1.9416e-05,  1.8181e-03,
         1.7443e-05, -7.5177e-05, -3.3396e-05,  1.3404e-05,  2.7100e-05,
        -2.3372e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1033e-04,  4.6226e-06,  2.8172e-03, -2.2020e-05, -1.1771e-05,
         2.3714e-04, -9.4323e-05,  2.8504e-05,  5.2929e-05,  7.2490e-05,
         2.7109e-04, -6.2547e-06,  1.0982e-05, -1.2801e-04, -9.7423e-05,
        -5.7229e-05, -1.3714e-04,  5.0292e-05,  2.1305e-04,  6.8468e-02,
        -3.7711e-05,  3.5245e-05, -5.9768e-05,  4.4884e-02, -3.4426e-06,
         4.7010e-04, -3.8042e-05,  5.5184e-03, -2.8487e-05,  3.7221e-03,
         1.2709e-02,  6.7631e-05,  2.9183e-05, -1.2462e-04,  6.8326e-03,
         2.9142e-05,  1.5552e-05, -1.0388e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #650: [tensor([ 4.0155e-04, -5.5079e-02,  2.5129e-05,  4.8023e-05, -2.3374e-04,
         9.6999e-05,  9.4493e-05, -6.6004e-05, -1.1135e-02, -8.3723e-05,
         5.0627e-05, -4.5047e-02, -2.3502e-05, -4.1853e-05,  5.5048e-05,
        -9.9164e-02, -2.5856e-05,  2.1067e-05, -8.2090e-05,  5.6839e-05,
        -6.0320e-05,  1.0117e-04,  1.6789e-05, -1.0035e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7019e-04, -1.0312e-01, -2.7196e-05,  2.5588e-05, -2.4424e-04,
        -7.6022e-05,  6.7565e-05, -1.7130e-04, -1.3904e-02, -2.2123e-04,
        -1.2157e-04, -8.3077e-02, -1.0913e-04, -7.1218e-05, -7.1351e-05,
        -6.5437e-02, -1.8568e-04, -1.3073e-06, -2.1248e-04, -1.4725e-05,
        -1.4747e-04, -1.0729e-04,  2.3895e-05, -4.5556e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7427e-04, -7.1535e-02, -1.5826e-06,  7.4155e-05, -2.2608e-04,
        -4.0170e-05,  9.3933e-05, -6.6272e-06, -1.5853e-02, -1.5273e-04,
        -8.4729e-05, -7.4586e-02, -5.7150e-05, -1.2853e-05,  1.4842e-04,
        -6.9387e-02,  1.3957e-04, -5.9199e-06,  1.0864e-04, -1.5909e-05,
        -1.3325e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7201e-04, -8.9054e-06,  2.8816e-05, -1.9362e-05,  7.0892e-05,
         9.7769e-05,  1.0760e-04,  2.3848e-05, -3.6248e-05, -1.2273e-05,
        -5.6090e-04,  1.3271e-04,  8.9074e-05,  5.0591e-05, -2.3348e-05,
        -3.5916e-05, -5.8399e-05, -3.1728e-02,  2.7751e-05, -2.0488e-04,
        -1.4790e-05,  1.8322e-04,  1.0099e-04,  1.0575e-05,  2.8482e-04,
         1.7384e-04, -1.2475e-01,  8.8835e-05, -3.9668e-05, -6.9303e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0743e-04, -4.1692e-05,  3.7274e-05, -4.6889e-05,  1.6590e-05,
        -3.6129e-05,  1.9682e-03, -6.0751e-05, -4.1523e-05, -3.4966e-05,
         7.4309e-03,  5.3047e-05,  3.1603e-06, -7.2312e-05, -8.4066e-05,
         2.3997e-05,  6.9338e-06,  4.9469e-02,  1.7513e-05,  1.1828e-02,
        -2.8544e-05, -5.5989e-05, -9.8269e-05, -1.6431e-05, -5.8920e-05,
         2.9006e-05, -1.6620e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3860e-04,  3.8060e-06, -5.1316e-06, -1.9997e-07, -6.2906e-06,
        -7.8279e-05,  2.0421e-03,  5.0843e-05, -2.3888e-05, -1.3673e-05,
         7.2818e-03,  1.5564e-04,  2.4336e-06, -1.4271e-04,  4.5220e-05,
        -1.4226e-05,  1.8639e-05, -6.2435e-05,  1.1605e-02,  3.6619e-06,
         2.0590e-04, -4.8212e-06,  1.4187e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2477e-04, -2.7856e-05,  8.6664e-07, -9.9714e-05, -2.4781e-05,
        -1.7693e-05,  2.7895e-05,  3.2985e-05,  2.3132e-05,  3.7746e-02,
         4.1628e-06,  4.5112e-05, -1.6968e-05,  1.6298e-05,  1.8592e-05,
        -1.5115e-05,  1.0592e-05, -1.3669e-04, -2.7571e-06,  1.2262e-05,
         1.9393e-05, -1.5454e-05,  1.0073e-05,  5.7757e-02, -1.1565e-04,
         6.2199e-05, -7.4158e-05, -2.8842e-05,  1.7788e-05,  1.3247e-02,
        -7.6115e-05, -1.4620e-05, -2.2104e-05, -7.5886e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6767e-04, -2.1338e-05,  5.4048e-05, -2.5329e-04, -2.7637e-05,
         1.0079e-04,  4.7918e-05,  6.8187e-05,  3.8660e-05,  6.8697e-02,
         5.7790e-05,  4.3278e-06, -6.5988e-05, -2.7735e-05,  2.2376e-05,
         1.7936e-05,  2.1304e-05, -1.3333e-04, -5.9148e-05,  4.3628e-05,
         4.1440e-05, -6.9140e-06,  5.5954e-05,  9.5393e-02,  1.1113e-04,
        -1.2406e-05, -1.0227e-04,  6.6953e-05,  3.2199e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0089e-04,  4.0550e-05, -7.0212e-05, -4.1548e-05, -6.8793e-05,
        -3.3910e-05,  2.8756e-06, -4.0563e-05,  5.4960e-05,  8.6669e-02,
        -1.3847e-05,  1.3675e-06, -8.1788e-05,  3.1360e-06,  2.9970e-05,
        -8.2600e-05, -2.6972e-05, -1.1485e-04,  1.3085e-05, -8.2518e-05,
        -1.7217e-05, -2.9823e-05,  1.3310e-02,  2.4078e-05,  8.0185e-05,
        -3.7502e-05,  1.6444e-04, -1.6385e-05, -2.8748e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0675e-04,  5.6853e-02, -6.3991e-05, -6.3373e-05,  7.8566e-05,
        -2.6324e-04, -2.0955e-04,  1.8588e-01, -5.3406e-05, -8.8915e-05,
         9.2218e-05,  9.8079e-05,  8.5079e-05,  1.5627e-04, -6.1156e-05,
         1.4830e-02,  9.6422e-05, -4.1495e-05,  8.1895e-05,  1.4360e-02,
         2.6466e-05, -1.6069e-04,  1.8179e-04, -2.1270e-04,  1.7337e-04,
        -2.9179e-04, -3.8816e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3854e-03, -1.5395e-01,  1.9187e-04, -3.7039e-07, -1.2899e-05,
        -1.8466e-05,  1.2183e-04, -5.8152e-02, -1.0558e-04,  1.4541e-05,
        -6.9444e-04,  6.5210e-05,  5.1897e-06, -3.3690e-05,  1.1745e-04,
        -5.3705e-02, -5.0224e-05,  1.7265e-05, -7.6983e-06, -9.8227e-03,
         6.7811e-05,  9.3845e-06,  5.7152e-05, -3.0037e-05, -1.5807e-04,
         3.2142e-05, -2.4594e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7539e-04, -6.0221e-02,  4.4467e-05,  4.8531e-05, -1.7179e-04,
         3.4341e-05,  9.0942e-05, -8.2043e-02, -2.8785e-06,  3.7981e-05,
        -2.2866e-04, -2.8926e-05, -1.3481e-04, -9.5664e-05,  1.0509e-04,
        -2.1304e-02, -1.9005e-04,  9.8174e-05,  1.8252e-05, -3.7302e-02,
         1.3531e-04,  5.4313e-05, -3.2062e-05,  3.2631e-05, -1.3437e-04,
        -4.0622e-02,  3.0303e-05, -2.6564e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #700: [tensor([ 1.2847e-03, -1.3416e-04, -9.5853e-05,  1.1190e-06, -4.5564e-05,
         1.1623e-04, -3.1921e-05, -1.9902e-05,  6.0644e-06,  5.5964e-02,
        -1.8904e-05, -1.7649e-05, -1.7423e-04,  5.0863e-02, -6.2311e-05,
         1.3137e-02, -1.4207e-04,  2.4100e-05, -1.4189e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6782e-04, -4.0746e-05, -7.0443e-05, -2.4927e-04, -3.0986e-04,
        -3.2648e-04, -3.7334e-05, -1.5981e-04, -1.1965e-04, -5.4625e-05,
        -5.8255e-02, -5.2929e-05,  5.8516e-05,  2.3146e-04, -1.6060e-01,
         1.0160e-04,  1.8653e-04,  1.0631e-05, -6.2445e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8937e-04, -3.7198e-05, -4.4430e-07, -3.5135e-05, -8.0256e-05,
         8.4844e-05,  1.7676e-05,  1.2490e-04,  3.1137e-05,  3.3181e-02,
         3.4071e-05,  7.2543e-04, -5.9747e-05,  1.8977e-02, -1.0112e-04,
         2.4065e-05,  4.4720e-02, -5.6020e-05,  8.0146e-05,  3.0724e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8001e-04, -9.0090e-02, -1.4949e-05,  6.5435e-06,  4.1188e-05,
        -1.0366e-02,  3.5033e-05, -2.7983e-05, -1.4317e-04, -2.1132e-05,
         1.3117e-04, -5.4618e-02,  2.8598e-05, -7.3649e-04,  2.9748e-05,
         3.2520e-05,  9.6751e-05,  1.2776e-06,  2.6034e-05, -2.4516e-05,
        -1.0739e-05, -1.9557e-05, -9.2397e-03, -1.8234e-05, -2.1929e-04,
         3.0457e-05, -4.9481e-05,  5.7712e-05, -1.2513e-02,  1.8698e-05,
        -3.9678e-05, -2.5052e-02,  2.5218e-05,  1.9581e-05,  1.0994e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4543e-03, -4.8853e-02, -1.1713e-06,  3.1565e-05,  2.0199e-05,
        -4.0565e-02,  5.4057e-05,  5.4313e-06, -1.8220e-04, -5.8831e-05,
         1.5961e-04, -1.6556e-01, -9.2580e-05, -2.3936e-04,  1.4451e-04,
         6.0028e-05,  1.3763e-04,  7.6885e-05, -9.9367e-05, -2.0399e-05,
         4.6963e-05, -8.0174e-05, -2.9355e-02, -3.2366e-05,  2.3214e-05,
         3.6945e-05, -3.1475e-05, -1.1174e-04, -5.5862e-05, -5.9793e-04,
        -9.7700e-05, -3.3014e-05, -6.8461e-07, -7.4421e-05,  4.6226e-05,
        -7.3019e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5670e-04, -1.3567e-02, -4.1420e-06,  6.8753e-06,  1.3884e-05,
        -1.7946e-02,  2.4178e-05,  9.1024e-06, -1.3278e-04,  3.1985e-05,
         7.5778e-05, -1.0462e-01, -2.3175e-05, -4.1524e-04,  6.7462e-05,
         6.4125e-05,  4.5646e-05,  3.7611e-05, -1.8751e-05,  7.4154e-05,
         1.2914e-05, -2.8326e-05, -7.5524e-04, -3.0969e-05,  3.0458e-05,
        -3.9647e-05, -1.1293e-02, -4.0878e-05,  1.5897e-04, -2.7794e-02,
         1.6698e-08, -1.1570e-05,  3.3744e-05, -3.1787e-05, -1.0380e-05,
        -1.5670e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3633e-03, -8.6864e-05,  2.9447e-04,  1.4057e-04,  1.4089e-05,
        -6.5249e-05, -1.0627e-04, -1.8842e-04,  2.9566e-05,  1.9520e-02,
        -1.1550e-04,  5.0869e-05,  8.5230e-05,  1.6216e-01, -5.8910e-05,
         2.4406e-03, -1.3779e-04, -2.0059e-04, -9.0134e-05,  1.2151e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3212e-04, -5.5527e-05,  1.9112e-04, -5.7504e-06, -1.5758e-05,
         1.7567e-06,  2.4292e-05, -5.9223e-05, -8.5200e-05,  4.3044e-02,
         4.3161e-05,  1.0061e-05, -4.3449e-05, -7.8903e-05,  3.2337e-04,
         6.5153e-05,  3.5096e-05, -1.0624e-04, -3.1513e-05, -4.6834e-05,
        -1.4749e-04,  7.2355e-02, -6.4702e-06, -2.7243e-07, -2.2354e-05,
         7.9985e-05, -3.9277e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5143e-04, -3.4623e-05,  3.2654e-04,  1.7407e-05,  1.1560e-05,
        -2.9491e-05, -1.7619e-05, -1.9022e-05,  5.3139e-05,  7.2532e-02,
         9.6273e-05,  9.8443e-06,  1.9593e-05, -1.0756e-04,  2.7197e-04,
        -1.0953e-05, -8.8725e-05, -5.9875e-05, -4.1944e-05, -2.8138e-05,
         3.5615e-05,  4.2746e-02, -8.1677e-07, -7.0172e-05,  1.8972e-05,
         2.3260e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7506e-04, -1.4251e-04,  5.8018e-06, -5.3980e-05,  3.0757e-04,
        -1.1578e-01,  1.9761e-05,  1.2647e-05, -9.0903e-05,  1.2044e-04,
        -7.4031e-02, -1.9500e-04, -3.3793e-02,  2.2859e-04, -5.0797e-05,
        -3.4034e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1763e-06, -2.6317e-04, -1.6633e-04, -8.6034e-05, -1.1204e-04,
         1.1771e-01, -1.6984e-04, -5.6136e-05,  4.3336e-05, -5.5151e-05,
         7.8406e-02, -2.3931e-06,  1.6959e-02, -8.2558e-05,  3.8297e-02,
         4.7209e-05, -1.6120e-05, -8.3696e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3753e-05,  3.4018e-05,  9.6422e-05, -4.1330e-05, -8.6089e-06,
         1.1504e-01,  1.6900e-04,  5.6049e-05,  6.5043e-05, -8.4988e-05,
        -7.7362e-05,  3.5745e-05, -4.7348e-06, -3.7991e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #750: [tensor([-5.3558e-04,  6.5462e-05,  1.5749e-04, -1.7803e-05, -3.6517e-05,
         7.9325e-05, -4.0204e-05,  1.2926e-04,  4.0610e-05,  3.0035e-05,
         1.8227e-04,  2.2558e-05, -6.5959e-06,  2.2053e-04,  9.8698e-05,
         1.0839e-05,  2.0770e-04, -1.7287e-05,  4.4552e-05, -2.3153e-06,
        -7.9680e-05,  2.3281e-05,  3.5161e-04,  2.5469e-05, -1.1763e-05,
         2.0144e-04, -1.4312e-05,  3.8215e-05,  3.1665e-05,  2.0604e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7804e-04,  5.1843e-05,  1.5911e-04,  5.7008e-05,  3.2803e-05,
         4.6988e-06, -1.8039e-04,  3.4825e-05,  7.6770e-06, -6.1082e-05,
         3.2170e-05,  9.8342e-06, -1.5702e-05,  2.3627e-04,  7.0035e-05,
         1.1813e-05,  1.2618e-04,  5.5989e-05, -1.0891e-05,  9.9915e-02,
         5.8150e-05,  6.2575e-05,  1.0951e-04,  1.2369e-05,  4.6482e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7995e-04,  8.1764e-04,  2.0262e-04, -3.4965e-05, -1.9067e-05,
         1.9042e-04, -7.6958e-05,  6.3634e-04,  4.3207e-05,  3.9612e-05,
         1.9697e-04, -7.2944e-05, -3.0080e-05,  3.4554e-04,  2.3486e-04,
        -3.8509e-05,  6.5161e-04, -4.0997e-05, -4.4151e-05,  6.4337e-06,
         5.8202e-06,  1.8609e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5886e-04,  1.6224e-05,  4.2255e-06, -1.7469e-05,  3.3939e-05,
         5.5864e-04,  3.3089e-05, -1.9626e-05,  7.4967e-03, -1.7224e-05,
         1.9014e-05,  2.0117e-03,  6.8597e-06,  6.0472e-06, -4.0213e-05,
         1.5676e-04, -2.0125e-05,  3.0902e-04,  1.6370e-05,  1.2888e-06,
        -3.3637e-05,  1.4887e-02,  4.5486e-06,  3.3029e-02, -3.5935e-05,
         3.7204e-05, -9.3531e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8207e-04,  4.6938e-05,  2.6682e-05, -8.9229e-06,  2.6878e-05,
         2.5048e-04,  3.4187e-05,  3.7010e-05,  6.6642e-03, -5.7349e-05,
        -4.4313e-05,  2.1700e-03, -3.5541e-06,  1.9432e-05,  2.4184e-05,
         9.2903e-05,  6.5660e-05,  7.6385e-05, -1.2937e-05,  5.4293e-05,
         7.8224e-02, -4.7819e-05,  1.3636e-02,  3.5401e-05, -4.7495e-06,
         3.0349e-05, -7.8565e-06, -2.4691e-05, -2.1879e-05,  3.3947e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7478e-04,  1.7908e-07,  5.0684e-05,  2.1786e-05,  7.3907e-05,
        -2.9027e-05,  8.8517e-05,  3.4905e-06,  6.9371e-03, -5.5213e-05,
         2.8290e-05,  5.7289e-03,  6.3774e-07,  8.0088e-06, -2.1091e-05,
         1.5934e-04,  8.2607e-07,  1.2343e-04,  8.0367e-05,  7.2671e-05,
         2.0272e-02, -1.0715e-04,  1.9271e-03, -4.6002e-05, -2.6633e-05,
         8.9650e-02,  1.3711e-04,  3.8807e-05, -4.3399e-05,  6.1810e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1583e-03, -2.5702e-04, -1.6340e-04,  1.2390e-04,  1.0688e-05,
        -1.2193e-04, -7.5362e-04, -7.1257e-05, -5.5363e-05,  1.3541e-04,
         5.3112e-05, -2.3551e-01, -1.4221e-04,  1.8110e-04,  1.0797e-04,
        -5.1244e-05, -2.3416e-04, -7.4312e-03,  1.2391e-04, -2.7475e-05,
        -4.5607e-02, -1.3329e-04,  1.3029e-04,  3.8864e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1541e-04, -1.0796e-04, -1.2257e-04,  3.8780e-05, -5.6761e-05,
        -1.1499e-04, -5.6835e-05, -5.1735e-05, -3.8005e-05, -6.1715e-07,
         1.2939e-04, -1.9022e-01,  4.9768e-06, -5.1453e-05, -2.8514e-05,
        -4.8148e-02, -6.9693e-05,  1.1241e-04, -4.1487e-04, -1.3529e-06,
        -1.3143e-04, -1.5107e-02, -2.8381e-05,  6.8906e-05, -2.8318e-02,
         5.4345e-05,  1.5140e-04, -3.7462e-06, -7.5104e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4126e-04, -2.0906e-05,  6.4378e-05, -1.5241e-04, -8.6688e-05,
        -9.0034e-05, -1.7883e-04, -6.4235e-05, -4.1454e-06,  3.7200e-06,
        -1.4874e-04,  1.3810e-01, -1.4179e-04, -9.4068e-05, -4.3224e-05,
         8.3744e-02, -1.1529e-04, -1.0819e-04,  9.4950e-05,  5.9381e-05,
         1.6639e-04,  6.9068e-02, -2.1765e-04, -6.5649e-05,  1.6321e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7647e-04, -2.5764e-05,  1.2194e-04, -1.3878e-04, -2.0513e-06,
         1.9724e-04,  1.9096e-04, -5.0434e-05,  1.9565e-05,  2.0264e-04,
        -1.4655e-04,  2.5876e-01, -9.9279e-06, -4.1775e-05, -1.2166e-04,
         2.1204e-02,  1.5357e-05,  7.7314e-03,  1.0853e-04, -1.2096e-05,
         1.3513e-04,  1.3688e-07,  3.4763e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4338e-04, -1.0580e-04,  1.3554e-04, -7.9320e-05, -8.5895e-05,
         1.3115e-04, -7.4816e-05,  5.6613e-05,  2.1569e-04, -2.9785e-05,
        -5.9594e-05,  2.5563e-01, -1.2452e-04,  1.0633e-04, -6.1634e-05,
         2.1570e-04,  1.4090e-04, -8.2718e-06,  6.7841e-05,  2.8170e-05,
         1.2943e-04, -3.1724e-05, -4.8533e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2267e-03,  7.9181e-06, -2.9642e-05,  2.5459e-04, -1.7951e-06,
        -1.6552e-04, -1.1790e-03,  3.5466e-05, -1.5640e-04,  1.3200e-06,
         2.3585e-04, -1.2869e-01, -1.1755e-04, -3.8580e-05, -4.7085e-05,
        -3.0576e-04,  5.3493e-05, -4.7384e-02, -7.3642e-05, -5.2335e-02,
        -1.7064e-04,  3.4365e-05, -3.2744e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #800: [tensor([-5.0244e-04, -6.3750e-02, -1.7675e-04,  3.9578e-05, -1.1276e-04,
        -7.6964e-05, -7.2222e-06,  3.6128e-05, -3.1963e-02,  1.2075e-04,
         7.5316e-05, -2.2916e-02,  2.8808e-05,  4.6867e-05, -1.1859e-05,
        -1.2063e-01, -7.3409e-05, -9.3786e-05,  8.2519e-05,  1.0861e-04,
        -4.0106e-03, -4.1925e-05, -8.8056e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4091e-04, -8.6360e-02, -1.0695e-05,  5.8010e-05, -4.4417e-05,
        -1.1753e-04,  5.1826e-05,  5.0528e-05, -6.5926e-02, -4.5851e-05,
        -2.0025e-05, -1.4631e-02, -7.6922e-05,  2.3670e-05, -3.8130e-05,
        -1.7315e-02, -7.9881e-05,  5.9266e-05, -1.9510e-05, -1.3888e-05,
        -3.4774e-05, -8.8994e-03,  1.7697e-05, -4.8766e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3975e-04, -5.8644e-02,  4.2254e-05, -6.3473e-07,  2.7281e-05,
        -2.2371e-02,  1.0020e-05, -1.9497e-03, -2.3983e-02, -4.5677e-05,
        -4.1053e-05,  1.2515e-04, -1.0611e-02,  4.5141e-05,  1.1804e-05,
        -5.3415e-06,  1.9122e-05, -3.4403e-06, -5.2765e-02,  1.3944e-05,
        -2.9946e-02, -2.7862e-05,  5.8095e-05,  3.7989e-05, -8.5136e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1525e-03, -6.9791e-02, -2.4633e-05,  7.2648e-05,  1.4998e-04,
        -5.5282e-02, -4.8158e-05,  2.7770e-04, -1.0959e-01, -3.3089e-07,
        -4.8696e-05,  2.7952e-05, -1.7579e-03,  3.2682e-05, -4.7335e-05,
         2.0125e-05,  7.0724e-05,  2.3955e-05, -1.2120e-02,  5.3022e-05,
        -3.8814e-02,  3.9420e-05,  1.5060e-05,  3.6673e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1723e-04, -1.2571e-01,  4.0335e-05, -1.7751e-05,  8.0582e-05,
        -5.1961e-02, -6.3073e-05, -7.0869e-04, -3.3880e-02,  2.4034e-05,
        -3.5900e-05, -7.6263e-06, -1.0745e-02,  7.1678e-05,  7.2027e-06,
         1.4337e-04, -3.2801e-05, -3.5555e-05, -2.0358e-02,  3.8125e-05,
        -4.9860e-02,  2.0603e-05, -6.4598e-05,  3.9812e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0296e-03, -7.7151e-03, -1.8138e-05,  2.9027e-05, -3.0070e-05,
         1.0652e-04, -1.0529e-01, -1.6331e-05, -4.2023e-05, -7.9796e-05,
        -5.6022e-03, -5.9269e-05, -5.4160e-06,  4.0562e-05, -7.6769e-06,
         3.2483e-05, -3.5920e-03, -5.0213e-05, -5.9560e-05,  1.6441e-04,
        -6.0208e-03, -2.5935e-05, -1.3413e-05, -4.2374e-03, -4.6145e-05,
        -1.4624e-04, -4.7680e-02, -6.6417e-05, -6.7925e-05, -3.4753e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6552e-05, -7.5504e-02,  5.9082e-05,  6.9815e-06, -6.7772e-06,
         1.4156e-04, -7.6533e-02,  6.6456e-05, -4.6027e-05, -9.6365e-06,
        -1.0253e-02, -1.1861e-05, -7.1776e-06, -5.9620e-05, -2.2095e-05,
         4.7538e-05, -2.4740e-02,  8.2162e-06, -5.6594e-05, -1.7664e-05,
        -6.2009e-03, -8.7377e-05, -6.0231e-06, -2.9289e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7813e-05, -9.7818e-02,  3.9485e-05, -4.8012e-05,  1.8834e-05,
         1.7002e-04, -1.6932e-02,  2.3557e-05,  1.7874e-05, -3.3761e-06,
        -4.4324e-03,  6.5646e-05,  1.8520e-05, -6.5469e-05,  1.9409e-06,
        -7.3544e-06, -1.6705e-02,  2.0637e-05,  1.3957e-04, -5.9519e-04,
        -2.1049e-05, -1.2255e-05, -5.7729e-03,  3.4094e-05, -2.0481e-05,
        -6.2564e-05,  4.1582e-05, -1.0843e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4411e-05,  1.5956e-04, -1.1434e-04, -1.1444e-04, -3.6248e-05,
         9.4896e-05, -1.9015e-04, -7.3106e-05, -1.5437e-05,  7.4567e-05,
        -2.1575e-04,  5.8115e-02, -4.2533e-06, -1.4887e-04, -7.4394e-05,
         1.6866e-01,  2.8238e-04,  4.7645e-04,  4.2983e-06,  2.9311e-02,
        -6.3263e-05, -2.3521e-05, -3.2622e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9660e-04, -1.4225e-04,  2.3142e-04,  1.0763e-04,  8.3542e-05,
        -7.6902e-06, -7.9794e-04, -2.0046e-05, -7.8970e-05,  1.6527e-05,
        -1.2714e-05, -5.0883e-02, -4.6088e-05, -2.0316e-05,  7.1533e-05,
        -1.3421e-01,  6.9983e-05, -5.0563e-04,  7.9316e-05, -8.0138e-02,
        -6.9938e-06, -1.1240e-04, -2.8938e-05, -8.1867e-05, -6.5159e-05,
        -1.0093e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9879e-04,  2.9918e-06,  3.3756e-05,  4.8023e-05,  1.1108e-04,
         1.4140e-05,  5.4196e-04, -4.5048e-05,  1.5202e-04,  4.4244e-05,
        -5.5742e-05,  1.3071e-01, -1.4304e-05, -1.4813e-05, -4.9275e-06,
         9.2606e-02,  8.3001e-05,  2.9991e-04, -1.4819e-05, -9.9146e-06,
         2.5939e-05, -1.7460e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4346e-04,  2.1684e-01, -1.3738e-04,  2.6736e-05,  1.6636e-04,
        -2.0901e-05, -2.1323e-04,  3.2439e-04,  6.0594e-05, -4.8979e-05,
        -1.1943e-04, -2.9162e-05, -4.6098e-05, -4.3113e-05, -3.8676e-05,
         1.1194e-04, -2.8943e-05, -1.1083e-04,  2.8599e-02, -1.0717e-04,
         1.5388e-05, -1.1190e-04, -7.9600e-05,  4.1317e-05, -3.4887e-05,
        -1.5147e-04, -3.3361e-05,  3.4132e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #850: [tensor([ 6.0701e-06,  1.9321e-02, -3.9112e-05, -5.8729e-05,  1.4062e-02,
        -3.0098e-05, -2.0394e-05,  4.8609e-05,  5.8955e-05, -3.7978e-06,
         5.1202e-02, -1.2915e-05, -2.7667e-06, -3.2398e-05,  7.4832e-03,
        -9.2014e-05,  5.4598e-05,  3.0444e-05,  2.5080e-05, -2.3594e-05,
         8.5585e-02, -2.6682e-05,  5.0982e-05, -3.1064e-05, -1.1009e-04,
         3.8264e-05, -2.8498e-05,  1.4277e-04,  6.9241e-05, -5.7870e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3521e-04, -5.1985e-02, -8.0673e-05,  3.0882e-05,  1.7934e-05,
         5.5393e-05, -6.3769e-02, -7.7396e-05, -2.3662e-05, -8.1446e-03,
         6.6889e-05, -3.5978e-05, -7.4231e-05,  9.6236e-05, -3.4074e-03,
         5.0600e-05,  4.2745e-06, -3.0485e-05, -8.3348e-02,  4.5384e-05,
        -4.6553e-05, -1.1530e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9023e-04, -9.9898e-02, -2.7198e-05,  6.4424e-05,  8.0451e-05,
         6.6757e-05, -3.6359e-02, -2.3435e-05,  3.3320e-06, -7.9798e-02,
        -8.2113e-05, -1.1581e-04,  2.6135e-05,  4.6535e-05, -2.1144e-03,
        -4.2752e-05, -2.4270e-05,  7.5026e-05, -1.3656e-02,  1.7189e-06,
         1.3150e-04,  1.6195e-05,  5.2095e-05,  3.5135e-05,  5.3245e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6836e-06, -9.6813e-02,  4.2794e-05,  9.4040e-05, -7.2708e-06,
         1.3574e-04, -5.7661e-02, -5.5685e-05,  7.2130e-06, -5.4312e-02,
         6.2048e-06, -7.3475e-05,  8.3485e-05, -3.6920e-05, -8.7194e-03,
        -8.0444e-05,  6.5108e-05,  6.2248e-05, -4.7686e-05,  1.0970e-04,
        -3.2925e-02,  2.5247e-05, -4.6524e-06, -1.3758e-02, -8.2259e-05,
         8.2800e-05, -3.1479e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1114e-04, -1.3878e-04,  1.6694e-04,  1.1344e-04,  8.8710e-05,
         1.3032e-02,  5.2548e-05,  1.5198e-04,  1.6465e-01,  2.7011e-05,
        -6.6252e-05,  1.8012e-05, -2.3880e-05, -9.7323e-05, -2.3128e-06,
         7.0021e-05,  7.9872e-02, -1.8486e-04, -9.0885e-05, -1.5312e-04,
         2.7127e-05, -1.2031e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8184e-04, -7.2217e-06,  2.8031e-05,  8.7370e-05, -3.7605e-05,
        -3.4336e-02,  4.5893e-05, -3.7130e-04, -1.4219e-01,  2.4061e-05,
         3.6720e-05, -2.5007e-05,  4.0017e-05, -8.2533e-02, -8.0569e-05,
         4.9180e-05, -1.6063e-05, -1.6295e-02, -5.4879e-05, -4.5993e-05,
        -9.9198e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7199e-04,  1.4748e-05,  1.1331e-04,  1.3457e-05, -2.1747e-05,
        -5.3973e-02,  1.1758e-05,  9.3557e-05, -4.8627e-02, -6.3853e-06,
         1.0040e-04,  4.5202e-05,  2.7486e-05, -9.3231e-02,  6.8379e-05,
         3.6004e-05, -8.7743e-05, -9.7328e-03,  1.1425e-04,  9.1681e-05,
         3.5360e-05, -1.4555e-04,  8.0312e-05,  6.4668e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1684e-04, -2.3906e-04, -3.6034e-05,  6.0042e-05,  5.3279e-05,
        -1.4553e-05, -4.1514e-05, -3.0481e-05,  1.0253e-04,  2.5363e-01,
         3.2656e-04,  1.1557e-04,  1.2485e-04,  2.1717e-05,  6.7089e-05,
        -1.1869e-04,  6.2777e-05,  2.0108e-05, -1.5714e-04, -3.2328e-05,
         6.5263e-06,  1.2277e-02,  1.1760e-04,  1.8784e-05, -9.8849e-05,
         5.1763e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1086e-04, -9.3578e-05,  7.9475e-05, -4.1271e-06, -4.6354e-05,
         5.1369e-05, -8.1608e-06,  3.1563e-05,  3.5364e-05,  1.6208e-01,
         8.7854e-05, -4.4690e-05,  1.2533e-04,  1.4199e-05, -9.1483e-06,
         4.2716e-05,  3.8855e-06,  3.2885e-05, -5.2196e-05,  3.0062e-05,
        -1.6886e-05,  5.7980e-03, -6.6822e-05, -2.8281e-05,  1.3929e-04,
        -6.8215e-05, -1.9174e-05, -3.1577e-05, -3.1606e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2812e-04,  5.6680e-05, -5.7723e-06,  1.2914e-05,  8.3996e-05,
        -1.9837e-05,  6.6989e-05,  2.2767e-05,  9.6070e-05,  2.3216e-01,
        -9.9338e-05,  5.5252e-05, -1.2172e-04, -1.3560e-05, -1.8511e-04,
         1.7067e-04, -2.2982e-05,  1.0027e-04, -6.0138e-06, -1.0989e-04,
         2.6253e-03, -6.9329e-05, -5.0126e-05,  9.8305e-05,  3.9144e-05,
         7.9586e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9383e-04, -5.0563e-05,  1.8929e-05,  1.8288e-02, -1.3698e-05,
        -5.9956e-05, -1.5872e-07,  2.1082e-06, -8.1439e-05,  1.2668e-02,
         2.6625e-05,  8.7793e-04,  6.9370e-02, -5.3832e-05, -2.1042e-05,
        -1.6327e-05,  1.7402e-05,  2.2787e-02, -4.2369e-05,  2.5199e-06,
        -1.2893e-04,  1.9948e-02, -2.5810e-05, -3.6042e-05,  2.8408e-05,
        -1.0064e-04, -4.2837e-05,  1.1118e-05, -6.4234e-05,  1.6882e-05,
        -3.0869e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9578e-04, -1.4993e-04, -7.8000e-05,  9.5662e-03, -5.6721e-06,
        -1.1767e-04, -7.3202e-05, -4.5661e-05, -1.0383e-04,  4.9325e-02,
         2.3265e-05,  1.4047e-03,  2.5433e-02,  9.4717e-06, -2.9058e-05,
         3.7119e-05, -1.3034e-05,  7.0788e-02, -9.4712e-05,  3.0128e-03,
         1.6877e-07, -7.5495e-05, -3.1129e-05,  6.1785e-05, -1.0175e-04,
        -4.3352e-06, -2.8746e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #900: [tensor([-9.5270e-04, -1.0387e-01, -6.5100e-05, -9.3581e-05, -4.8585e-05,
        -1.1216e-01, -2.2201e-05, -1.0332e-04,  5.4296e-05, -9.6781e-05,
        -8.9822e-05,  2.1738e-04, -1.7659e-04,  7.2157e-05, -5.9043e-05,
        -4.4155e-02, -7.8778e-05, -7.8539e-05, -3.5126e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4627e-04, -1.0283e-01, -4.0694e-05, -2.4876e-04, -6.5281e-05,
        -1.0811e-02, -2.5365e-05,  1.1481e-04, -1.2811e-01, -2.0634e-05,
        -6.8435e-05, -6.8617e-05, -8.0140e-03, -6.2864e-05, -3.0947e-04,
        -7.5068e-05, -1.9272e-02, -1.2062e-04, -1.8332e-04, -6.0421e-02,
        -1.9751e-05,  6.8901e-06,  6.8315e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7573e-04, -6.6305e-02, -1.9603e-04, -1.2066e-03, -2.3035e-05,
        -1.5063e-02, -6.8325e-05,  5.6742e-05, -5.3375e-02,  2.0988e-07,
        -2.3168e-05,  9.5958e-06, -8.1092e-02, -1.5331e-04, -5.3877e-04,
         2.7494e-05, -3.6709e-02, -2.6282e-05, -2.9570e-05, -4.8257e-05,
         8.1565e-05, -2.8027e-02, -1.7774e-05, -7.9924e-05,  1.3165e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9211e-04,  1.8711e-01, -2.0958e-05,  1.2835e-03, -7.2171e-05,
         5.0181e-03, -6.0222e-05, -6.7182e-05,  6.9109e-02, -1.5820e-05,
         2.8000e-05, -1.1330e-05,  2.9583e-02, -3.5992e-05, -1.2225e-04,
        -3.8423e-05, -3.5243e-05,  8.1611e-05, -9.4810e-05,  4.8885e-02,
        -2.1817e-06,  1.1610e-04,  9.9990e-06,  7.1107e-05, -9.5320e-05,
         5.5163e-05, -3.0382e-04, -3.5453e-05, -2.2071e-05, -2.7131e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0444e-05, -1.0163e-01,  9.5921e-06,  2.3893e-05, -1.4456e-05,
         2.6461e-05, -6.4411e-03,  8.4385e-06,  8.0816e-06, -3.3840e-05,
        -3.4212e-02, -1.7142e-05, -8.8716e-05, -1.1897e-05,  2.1693e-05,
        -1.7797e-02,  1.4791e-04, -1.0935e-05,  3.5193e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9407e-04, -6.7045e-02, -1.1117e-05,  4.3872e-05,  1.0434e-05,
         1.2398e-04, -8.1280e-02,  4.4649e-05,  1.1309e-05,  1.1922e-05,
        -3.3810e-03,  4.3302e-05, -2.2748e-05, -4.0981e-05,  1.2133e-04,
         4.7559e-05, -7.3032e-03, -3.4995e-05, -7.7914e-05, -4.8300e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1898e-04, -1.8860e-01,  5.3968e-05, -4.4456e-05,  2.6245e-05,
         2.6776e-04, -1.5211e-02, -1.9974e-04,  3.7307e-05,  4.4073e-05,
        -8.2715e-02, -4.5400e-05, -2.5592e-04, -1.2185e-05,  2.0978e-05,
        -8.7123e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2119e-03, -6.7870e-02, -1.6113e-05,  1.6175e-04, -1.5542e-01,
        -9.0936e-05, -6.1371e-06,  1.2524e-04, -9.5754e-02, -6.1280e-05,
         1.0493e-04, -6.4097e-05, -1.0959e-04,  1.8102e-05,  6.1167e-05,
         8.8878e-05, -3.2099e-05,  4.2266e-05, -7.6229e-05,  1.5137e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4564e-04, -3.5024e-01,  5.6681e-05,  8.9343e-06, -4.8132e-02,
         2.0326e-04, -3.7738e-05, -8.0689e-05, -6.0649e-02, -8.1901e-05,
         5.0826e-05,  1.0431e-04, -1.1587e-04,  6.2127e-05,  1.5423e-04,
         6.4571e-05, -1.1978e-05,  6.4702e-05,  5.1556e-05,  1.9130e-04,
         1.6855e-04,  2.0315e-05,  7.3114e-05,  7.5802e-05, -3.7338e-04,
         5.3272e-06,  1.6972e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3000e-05, -2.6474e-01,  1.2942e-04, -2.2558e-04, -1.6469e-02,
        -2.5603e-04, -8.8025e-05, -1.4654e-04, -1.2965e-01,  1.6261e-05,
         1.5338e-04, -4.7164e-05,  1.0336e-04, -1.1061e-04,  9.9813e-05,
        -8.0274e-05, -1.0656e-05, -2.0053e-04,  1.1617e-06, -1.6674e-04,
         4.0022e-05,  6.9895e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7024e-04, -4.1250e-05, -5.4173e-05, -1.0049e-05, -5.8109e-05,
         1.0181e-04,  7.5246e-05, -9.3598e-05, -1.0974e-04, -1.5600e-04,
        -3.3602e-05,  8.9434e-05, -3.9210e-02,  6.8700e-05, -2.3406e-04,
         4.6847e-05,  1.9821e-04,  8.9581e-05, -1.4270e-01, -7.1995e-05,
         4.1896e-05,  1.3767e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4144e-03,  1.1918e-04,  1.0861e-05,  5.7543e-06,  1.0217e-04,
        -2.5240e-05, -5.7957e-05,  1.0661e-04, -9.2694e-05, -8.5345e-05,
         6.3428e-05, -8.3446e-06,  1.4784e-01, -4.3926e-05,  1.1372e-04,
         1.1764e-05, -1.3312e-04, -4.3083e-05, -6.7121e-05,  2.9703e-02,
        -1.7319e-04, -1.2282e-05, -1.1704e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #950: [tensor([-3.2410e-04, -1.1985e-01, -4.0499e-05, -9.6997e-06, -3.4777e-03,
         1.8779e-05, -3.2021e-04, -2.3450e-02, -9.1574e-05, -4.7994e-06,
        -1.4376e-05, -7.8303e-06,  1.1862e-05, -8.1763e-05, -9.4283e-05,
        -2.8022e-02, -9.0272e-05, -1.7912e-03, -4.0040e-07, -7.5667e-05,
        -2.2294e-02, -1.1688e-04, -2.7005e-05, -4.2773e-05, -5.8024e-06,
        -9.1080e-02, -6.6292e-05,  6.3602e-06, -7.3708e-05, -2.3427e-05,
        -1.0818e-05,  6.7042e-05, -8.7757e-05,  6.2260e-05, -1.4364e-02,
        -3.3436e-05,  1.0169e-04, -4.1731e-05,  2.4344e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6777e-04,  4.0713e-02,  4.9431e-05,  1.7183e-04, -4.1073e-05,
         2.4991e-05,  1.3905e-01,  7.8037e-05, -9.3008e-06, -2.6559e-05,
         4.1358e-02,  2.6336e-05, -1.6125e-05,  1.5695e-05, -7.8851e-06,
         8.7763e-05,  4.0218e-05, -4.9741e-05,  4.0498e-05,  1.9025e-05,
         6.6547e-02, -4.0738e-06,  5.1656e-05,  6.8644e-03,  4.6231e-06,
        -2.7814e-05, -4.2318e-05, -4.9128e-05,  1.4266e-05, -2.1631e-05,
        -3.3639e-05,  8.1943e-06, -6.0437e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5785e-05, -1.7858e-01, -1.3719e-05, -2.8349e-04,  5.6046e-05,
         8.6245e-05, -4.9575e-02, -1.1288e-04, -3.3852e-05, -3.5843e-05,
        -3.0454e-02,  1.2138e-05, -2.6282e-05,  4.4796e-05, -2.0615e-05,
        -7.1881e-05,  2.1138e-05,  4.3425e-05,  7.4974e-06,  5.4496e-05,
        -3.1019e-02, -2.0842e-04,  1.3715e-05, -8.7982e-03,  9.5621e-05,
        -6.5857e-05, -2.4353e-05, -6.1286e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1126e-04, -2.8251e-02, -3.6237e-05,  6.0404e-05,  1.0164e-05,
         5.3132e-06, -7.1479e-02,  1.0559e-05,  4.5165e-05, -5.5158e-05,
        -2.0975e-02, -4.4362e-06,  1.0961e-05, -2.4100e-05,  3.1486e-05,
         3.0942e-05,  1.4880e-05,  6.0565e-05, -3.1782e-06,  8.2046e-06,
        -5.1394e-02, -7.5384e-06, -7.7051e-05,  3.3370e-05,  5.1316e-05,
        -3.3601e-02,  4.0155e-05,  2.1868e-05, -1.5325e-05, -2.5945e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4246e-04,  1.2485e-01,  9.2271e-05, -4.7468e-06,  3.4719e-05,
        -2.3211e-05,  1.9230e-04,  2.2478e-06,  1.1221e-01,  1.2753e-04,
         1.3383e-05,  1.3561e-04, -4.3000e-05,  5.4766e-05,  6.0323e-03,
         1.6179e-04,  6.3090e-05,  5.6822e-05, -4.7195e-06, -4.1828e-05,
        -2.4739e-05,  1.0656e-05,  4.2865e-05, -5.2977e-06,  1.5075e-05,
         2.0054e-04,  1.9780e-05,  5.5847e-02,  3.8719e-05,  2.8673e-05,
        -3.8550e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5513e-05,  2.9173e-02, -2.7645e-05, -7.8886e-05, -1.0442e-04,
         7.1114e-05,  2.1669e-04, -2.9709e-05,  4.4076e-03, -1.0589e-04,
        -5.7166e-05, -2.5469e-05, -1.1311e-04,  2.6410e-06,  2.0474e-02,
        -5.6429e-05,  2.0413e-06,  4.3515e-05, -2.1035e-06, -3.0081e-05,
         4.7114e-05,  8.8390e-06,  9.1884e-06, -4.0807e-06,  2.9092e-02,
        -8.0648e-06,  2.2803e-05,  1.5879e-01, -1.1449e-04, -4.9028e-05,
        -4.3892e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2958e-03, -5.0930e-02,  8.6397e-05,  1.0626e-04,  1.0370e-05,
         1.3038e-04, -2.9687e-05, -1.5232e-06, -4.8303e-02, -4.9026e-05,
         1.0595e-04,  9.2364e-05,  3.1932e-05, -1.0145e-04, -9.9665e-02,
         2.7738e-05,  6.2346e-05,  3.0408e-05,  1.8617e-04, -5.7234e-05,
        -3.5820e-08,  1.6607e-04, -8.6813e-06,  1.2114e-04, -1.2438e-01,
        -5.3729e-05, -1.2329e-05,  9.1770e-05, -1.7548e-02,  5.3103e-05,
        -1.0571e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5780e-04, -1.0328e-01, -7.4813e-05,  1.5316e-04, -5.1673e-02,
         7.6302e-06,  1.8616e-05,  6.3154e-05,  1.2999e-05, -7.3889e-05,
         4.0418e-06, -6.6400e-02, -1.1949e-05, -1.1050e-04, -1.2733e-05,
        -6.8508e-02, -3.1237e-05, -3.1764e-05,  1.3064e-04, -7.8543e-03,
         7.1349e-05, -7.1025e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8462e-04, -9.1818e-02, -1.2221e-04,  3.1804e-05, -1.2458e-02,
         7.8859e-05,  1.8892e-04, -4.8786e-05,  4.8528e-05,  1.0005e-04,
         1.4111e-04, -1.0165e-01, -7.6419e-05, -9.6990e-05,  1.3250e-05,
         4.9954e-05,  2.4953e-05, -1.6627e-04, -1.2187e-05, -7.9123e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3678e-04,  6.8549e-02, -4.9222e-05, -1.8156e-04,  1.8084e-01,
        -5.9606e-05, -8.9532e-05,  8.2809e-05, -4.3992e-05, -8.0343e-05,
         3.3216e-05,  5.1549e-02, -2.3355e-05,  1.5339e-04,  6.6848e-05,
         8.2262e-05,  1.7257e-06,  4.0038e-05, -3.2740e-05, -6.9540e-05,
        -1.5576e-04,  1.5528e-04,  7.1156e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5621e-04, -4.8164e-02, -2.7618e-05,  9.5272e-05, -4.3615e-02,
        -4.3926e-05, -3.8246e-05,  1.4655e-05, -7.5335e-05, -1.4403e-02,
         9.7558e-06,  2.8877e-05,  8.8949e-06,  6.1558e-05, -1.9696e-05,
         5.4117e-05,  9.7769e-05, -5.9448e-05, -4.0736e-05, -3.0159e-02,
         5.7332e-05, -6.0962e-05, -2.9469e-03, -5.0558e-05,  3.9411e-05,
         1.0259e-04, -8.8374e-02, -5.5614e-05,  8.3035e-05,  2.3916e-05,
        -1.1057e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7041e-04, -6.0249e-02,  8.2539e-05,  1.6715e-04, -6.0964e-02,
         5.2611e-05, -3.2565e-04,  2.5184e-05,  5.4770e-06, -1.4423e-02,
         3.1259e-05,  9.7096e-06, -6.5474e-06, -5.4599e-05, -1.6582e-05,
        -7.0348e-06,  1.9622e-05,  3.5057e-06, -2.1856e-05, -8.8544e-02,
        -7.1387e-05, -3.9358e-04, -1.4282e-04, -8.1196e-02,  1.7523e-05,
         2.6020e-06, -8.2279e-06,  5.7402e-05, -8.2922e-05,  3.8616e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1000: [tensor([-2.2018e-04, -1.2629e-01,  2.9327e-05,  5.9216e-05,  5.4227e-05,
        -6.5918e-02, -3.2038e-05,  5.1363e-05,  6.4459e-05,  4.4470e-05,
        -1.8827e-05,  5.9645e-05,  9.2121e-05, -3.2319e-02, -2.8384e-05,
        -7.6147e-06,  3.8637e-05,  1.4812e-05, -2.7115e-03, -1.9776e-06,
        -6.7544e-05, -8.7959e-05, -7.7670e-05, -6.8988e-05,  2.1443e-05,
        -9.8981e-03,  8.7165e-06, -1.4398e-06, -4.2045e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2592e-04,  1.3626e-01, -8.7205e-05, -9.2945e-05, -7.8945e-05,
        -2.3443e-05,  1.6817e-01,  9.5463e-06,  1.0795e-06,  8.4348e-05,
         5.3058e-02,  3.4137e-05, -3.4638e-05, -1.5935e-04,  1.7621e-05,
         8.9208e-06, -1.1163e-04, -7.5631e-05, -7.6191e-05,  4.0745e-02,
        -9.3334e-05,  6.2834e-05,  1.0408e-04, -2.3319e-04,  2.9937e-05,
         1.6481e-04, -1.8497e-04, -5.0207e-05,  5.2880e-05, -5.6709e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0700e-04, -3.3050e-02,  1.1553e-04,  7.4762e-05, -1.2442e-04,
         3.1305e-05, -1.4689e-01,  2.4681e-04,  5.7355e-05, -3.5929e-05,
        -1.9063e-01,  2.1853e-05,  1.1118e-04, -1.2635e-04, -9.5906e-05,
        -2.1550e-04,  1.8820e-05, -1.3581e-05,  1.4887e-04, -3.3117e-03,
         1.9353e-04, -6.5638e-04,  7.3719e-05,  8.8698e-05,  1.2385e-05,
        -5.5154e-05,  1.0487e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4632e-04, -1.7337e-02,  1.2061e-05,  5.5176e-05, -2.5414e-06,
         6.8923e-05, -3.6014e-02,  7.8071e-05,  1.8036e-05, -2.3173e-05,
        -1.1358e-01, -2.7214e-05,  1.2823e-05, -5.7885e-05,  3.0194e-05,
        -8.6990e-05,  2.0979e-05,  1.6453e-05,  7.9028e-05, -9.8989e-04,
        -3.8949e-05,  5.7227e-05, -7.6307e-05, -8.7682e-06,  2.2166e-04,
         5.4254e-05,  1.3504e-05, -1.7923e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1814e-04, -1.6421e-01,  2.7412e-06, -2.2438e-04, -9.1685e-05,
         6.7974e-06, -1.7662e-04, -4.9131e-02,  4.5871e-05, -3.5972e-04,
         3.4192e-05, -5.3270e-05, -2.2429e-05, -3.2081e-05, -7.9826e-05,
         1.0387e-04,  9.7429e-05, -5.7046e-05,  1.4861e-05, -4.6538e-02,
        -2.6818e-05, -3.4359e-04, -7.0381e-05,  4.5641e-05, -6.7814e-02,
         2.2128e-06, -1.2966e-05,  2.6195e-05,  3.5174e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6774e-04, -1.6706e-01,  4.1276e-05, -5.0595e-04,  4.6045e-05,
        -5.9867e-05, -4.5227e-06, -5.5821e-02, -5.9330e-06, -3.8654e-05,
         1.1391e-04, -4.3963e-05, -5.4629e-05, -3.4947e-05,  1.4684e-05,
         4.0712e-05,  6.6104e-05,  2.4346e-05, -2.9285e-05, -4.4753e-02,
         3.7422e-05, -1.5261e-04, -7.1303e-05, -7.6985e-05, -1.3661e-02,
        -6.9880e-05, -2.3737e-06, -6.9666e-05, -4.6848e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8020e-04, -1.0964e-01, -2.4199e-05, -2.1592e-04,  7.0596e-05,
         1.7680e-05, -4.9171e-04, -6.6075e-02,  7.2930e-05, -4.8105e-04,
         6.6114e-05,  7.4223e-05,  1.2063e-04, -1.5597e-04, -7.1996e-05,
        -1.0239e-04, -1.6696e-07,  2.0770e-06, -1.3926e-05, -8.2633e-02,
         2.3460e-05, -3.7733e-04,  5.4611e-06, -1.7754e-05,  3.0856e-06,
        -2.5901e-05,  9.9407e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5574e-04,  1.0649e-01, -4.6776e-05,  1.5490e-04, -4.2240e-05,
        -2.9501e-06,  8.0346e-03, -1.3277e-04, -2.5399e-05, -1.0259e-04,
        -6.7634e-05, -8.6642e-06, -2.1152e-04, -6.3302e-05, -1.3380e-04,
        -4.2947e-05, -2.2846e-04, -5.7630e-05,  2.4116e-02, -1.6098e-05,
        -1.5614e-05, -3.5709e-05, -9.7389e-05,  7.3643e-05, -2.6258e-05,
         9.2201e-02, -5.3268e-05, -5.4015e-05, -6.2895e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4189e-04, -3.3643e-02,  9.9821e-06, -3.0340e-03,  1.2498e-05,
        -1.2083e-04, -2.9479e-02, -1.7763e-04, -1.0985e-04,  3.9880e-05,
        -1.2036e-03, -2.7018e-05, -1.5949e-04,  5.2504e-05,  1.0361e-04,
         2.9367e-05, -5.1673e-04,  4.2664e-05, -9.4144e-02, -4.7181e-05,
         6.5743e-05, -1.3568e-05,  4.3755e-05, -2.9499e-04, -1.1274e-05,
         4.7637e-05, -4.5043e-06, -1.1642e-01,  3.2549e-05,  3.5317e-05,
         4.9220e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1985e-04, -2.4090e-02, -6.5149e-05, -3.2202e-04, -4.6835e-06,
        -7.8232e-05, -1.0650e-02, -1.0543e-04, -1.2311e-04,  4.1526e-05,
        -5.3291e-04, -8.6634e-05,  7.4131e-06, -2.7411e-05,  3.4300e-06,
         3.6752e-05,  2.6193e-04,  7.4134e-05, -1.3230e-01, -1.0892e-04,
         5.5865e-05,  4.1700e-05, -9.1003e-05, -2.5853e-04,  3.6815e-05,
        -1.0177e-01,  5.1757e-05,  6.4943e-06,  3.7040e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9064e-04,  6.2236e-06,  1.1351e-04,  2.5967e-05, -8.0034e-05,
        -5.3130e-02,  1.3601e-04, -4.7809e-05, -8.4712e-05, -9.2806e-06,
         2.5321e-05,  5.9818e-05, -5.5883e-02, -1.8069e-05, -1.1595e-04,
        -3.6566e-02, -3.7684e-06, -1.8906e-05, -6.4824e-02, -1.0036e-05,
         4.5648e-05, -5.1966e-05, -5.8384e-02, -9.6614e-05, -4.5823e-04,
        -2.7116e-05, -6.9879e-05,  1.1081e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3699e-04, -4.6615e-05,  9.6409e-05, -7.0242e-05,  3.2819e-04,
         9.2393e-02, -7.4419e-05, -2.2801e-05,  2.9700e-05, -9.8381e-06,
        -2.2771e-05, -1.2185e-04, -2.3752e-05,  6.1901e-04, -3.6344e-05,
         1.9777e-04,  4.9469e-05, -5.7348e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1050: [tensor([-3.7783e-04, -2.7321e-01, -8.1023e-05, -1.2223e-04, -4.7860e-05,
        -3.3598e-02, -1.7388e-04,  2.2733e-05, -1.2901e-04, -1.2818e-04,
        -3.5344e-05, -8.9941e-05, -1.3579e-02,  1.0941e-04, -2.9554e-05,
         2.0445e-04, -4.4511e-04,  1.0731e-04,  2.5080e-05,  2.1956e-05,
        -7.1694e-04, -1.0145e-02, -1.1790e-04, -1.8874e-04,  6.2231e-05,
        -1.1602e-02, -5.1623e-05, -2.0093e-06,  6.1190e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9209e-04, -1.0316e-04,  9.2155e-05,  7.4893e-05, -2.5316e-05,
         3.2971e-03, -4.7891e-05, -5.4069e-05,  2.2946e-05,  8.1268e-05,
        -4.1467e-05,  6.0506e-02, -1.3849e-04, -1.1297e-04, -2.0900e-04,
         1.2790e-01,  2.0805e-05, -3.1743e-05,  6.3033e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1036e-03,  6.0140e-05, -5.8493e-05, -2.5252e-04,  4.3887e-05,
        -2.1216e-04,  1.9829e-04,  2.9996e-06,  4.7763e-05,  1.1888e-04,
        -1.5447e-01, -7.4910e-05, -1.0316e-04, -1.2400e-04,  7.3832e-05,
         3.0265e-04, -5.5427e-02,  4.4968e-06, -4.3738e-05,  1.5590e-05,
         1.1602e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0851e-04, -1.6937e-04,  9.5331e-04,  7.6643e-04, -3.7716e-05,
         1.5635e-03,  6.5608e-05, -5.3256e-05, -4.8619e-05, -6.3214e-05,
         9.8264e-02, -2.7566e-05, -1.0669e-04,  6.2564e-02,  1.2983e-04,
        -6.8078e-05,  2.3975e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3703e-04, -1.0355e-04, -1.9827e-04, -2.3755e-04, -3.5486e-04,
        -1.2523e-04, -5.4556e-05, -3.8400e-05, -2.7864e-04,  1.5887e-04,
         1.2460e-01,  1.9772e-05, -6.3148e-05, -3.3854e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0675e-04,  5.8957e-04, -3.5599e-05,  3.9082e-05, -2.3465e-04,
         7.4466e-04, -5.9196e-05, -5.0430e-05, -5.5430e-06,  1.5795e-04,
         6.7819e-04,  3.0432e-05, -1.8063e-04,  7.7062e-04, -4.4136e-05,
         2.0031e-05, -1.5859e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4326e-04,  5.2597e-04, -8.4807e-05, -1.2105e-05, -1.5081e-04,
         3.1800e-04, -9.3409e-06,  3.6104e-05,  1.3924e-06,  1.1812e-03,
        -5.7696e-06,  3.1829e-02,  1.0342e-04, -9.3017e-07,  1.3525e-03,
        -1.6866e-05,  5.5854e-05,  1.0526e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0995e-04,  7.8246e-08,  8.9814e-02,  1.4573e-04, -1.0930e-04,
        -3.8220e-05,  1.2984e-04,  1.6583e-04, -5.0704e-05,  2.2956e-03,
        -1.7011e-04, -8.5603e-05,  3.6306e-04,  8.9735e-05,  6.1980e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6779e-04, -9.8660e-05,  1.4990e-03, -3.3208e-04, -2.0433e-04,
         1.0742e-04,  1.2579e-05, -3.2632e-05,  1.7529e-01,  1.0362e-04,
         1.1737e-02,  3.5447e-05, -8.9937e-05, -1.6267e-04,  1.2171e-04,
         6.1732e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3145e-04,  3.9625e-05,  9.8470e-03, -2.8443e-05, -1.0687e-05,
         8.4741e-05,  6.5639e-05,  1.6865e-04,  2.0871e-01,  1.6540e-04,
         1.5947e-02,  1.4497e-04, -9.7568e-05, -2.0646e-04,  5.3689e-05,
         1.1286e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0682e-04,  2.7493e-01,  6.4077e-05, -7.7562e-05,  1.7047e-04,
        -8.6191e-05, -3.0282e-05, -1.7163e-04, -2.2992e-04,  4.3990e-02,
         2.5269e-04, -8.2476e-05,  3.6530e-05,  5.0721e-02, -1.2283e-04,
         2.7348e-04,  7.5025e-06, -1.2186e-04, -4.8355e-05, -2.2804e-04,
        -5.8592e-05, -3.9879e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2767e-04, -3.6932e-02, -5.9753e-06,  1.1659e-04, -2.4075e-05,
         7.0580e-05, -1.8842e-04,  3.1463e-05, -5.3967e-05, -9.6677e-02,
         1.4989e-06,  8.1987e-05, -6.7142e-05, -8.4186e-02, -1.6654e-04,
         8.8017e-05, -1.0095e-05, -7.6368e-05, -5.0266e-02, -1.2452e-05,
        -6.4605e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1100: [tensor([-4.1513e-04,  1.1423e-02,  4.4443e-05,  1.0978e-03, -1.0757e-04,
        -1.0802e-04,  2.3686e-06,  1.8524e-01, -2.0574e-04,  1.3672e-03,
         4.6549e-05,  1.0441e-06, -1.2066e-05, -6.6018e-04, -6.2137e-05,
        -3.5017e-05, -5.2488e-04, -8.2236e-05, -1.5571e-04,  9.9661e-06,
         9.7717e-05,  5.8642e-03,  1.5853e-05, -2.4516e-06,  1.0468e-05,
         1.8055e-05, -3.7312e-05, -4.9868e-05,  2.4806e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6662e-04,  9.3110e-02,  3.4011e-05,  5.7620e-03, -4.6968e-05,
        -8.8435e-05,  2.2114e-05,  6.4259e-02, -5.7557e-05,  1.2642e-02,
         3.9769e-05,  4.0987e-05, -2.8561e-05,  8.6812e-04, -5.0506e-05,
        -3.0884e-05,  6.5153e-03, -2.6559e-05, -4.2525e-05,  2.2422e-05,
         5.8094e-05,  3.8042e-02,  8.8962e-07,  3.8300e-03, -2.1312e-05,
         1.8969e-05, -4.4568e-05,  6.3628e-05,  2.8230e-05,  4.4344e-07,
         5.6267e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9137e-04, -1.2931e-05, -2.3371e-05, -1.8826e-05,  1.6994e-05,
         9.9898e-05,  4.3744e-05,  1.6089e-03, -3.5493e-05, -8.1588e-06,
        -3.6502e-06, -2.4019e-05, -1.2260e-05, -1.5476e-05,  7.4223e-05,
         8.9558e-02,  3.2153e-05,  8.1153e-06, -2.6437e-06,  3.4139e-05,
        -2.7324e-05, -2.4503e-05, -1.7985e-05, -4.1029e-06,  1.0554e-04,
         4.8467e-06, -4.4180e-05,  2.6668e-05,  4.8749e-02,  2.8171e-05,
        -2.5584e-05, -5.6847e-06, -3.1029e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8915e-04, -1.3174e-04, -3.9538e-05,  7.8778e-05, -4.7761e-05,
         1.7468e-04,  1.7209e-04,  9.8474e-03, -4.1409e-06,  8.7553e-05,
         7.3164e-05, -1.3841e-05, -4.1429e-05,  7.0306e-05,  8.0649e-05,
         1.7664e-01,  9.9230e-05,  6.5778e-05,  2.1061e-06,  1.2887e-04,
         1.4863e-05, -5.1050e-05, -2.8182e-05,  2.0804e-05,  2.2448e-05,
         9.2691e-05, -1.4771e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2618e-04, -6.0779e-05, -3.4012e-05,  7.1485e-05,  7.0689e-05,
         1.8019e-05,  7.0056e-05,  8.6771e-03, -2.7250e-06, -9.8687e-07,
        -2.2102e-05,  7.0978e-06,  1.6424e-05,  1.7322e-05,  6.7946e-05,
         1.1029e-01, -1.0530e-05,  4.6261e-05,  4.1760e-05,  3.1584e-05,
         5.0648e-06, -6.7559e-05, -3.7320e-05, -3.5966e-06,  7.7645e-06,
        -2.8696e-05, -2.8386e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0861e-03, -1.9010e-05, -1.1863e-01,  2.7837e-05,  2.9566e-05,
         7.4034e-05, -9.4004e-05,  2.9064e-05, -1.1915e-04, -1.6968e-04,
        -1.1558e-04, -9.8119e-03,  4.1775e-05,  1.2040e-04, -1.0736e-01,
        -1.5742e-04, -7.8631e-05,  5.5237e-05,  1.2793e-04, -1.0518e-02,
        -4.2235e-06,  3.5470e-04, -3.0942e-03, -1.6239e-05,  8.5301e-05,
         1.0057e-05,  5.1898e-05,  4.2578e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6883e-05, -2.0161e-07, -9.3969e-02,  6.4921e-05, -1.9744e-05,
        -1.1289e-05, -2.5271e-04,  3.5449e-05, -5.4354e-05, -1.8404e-03,
         2.0293e-05, -3.7967e-03, -4.5401e-07, -4.9447e-04, -2.2659e-02,
        -9.0607e-05,  7.2747e-05, -2.5310e-05, -2.1481e-05, -9.1360e-03,
        -1.6545e-05,  3.8338e-05, -6.9591e-06, -3.5433e-04, -1.1273e-05,
        -5.5862e-05, -1.5649e-06, -2.1971e-04,  2.2968e-05, -1.1321e-02,
        -8.7248e-05,  3.3207e-04, -1.0402e-01, -5.2348e-05, -3.9435e-06,
        -1.6538e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4865e-04, -9.1097e-06, -2.6988e-02, -5.9700e-05, -2.3979e-06,
        -2.6334e-06, -7.4108e-04, -6.2146e-05, -5.8787e-05, -1.3038e-04,
        -2.8143e-05, -8.5583e-03,  4.9821e-05, -5.7575e-04, -8.5913e-02,
        -2.8773e-05, -6.9112e-06,  3.2818e-05, -1.0060e-04, -2.2234e-05,
        -3.2972e-05, -8.8886e-04, -3.2225e-05, -9.7463e-05, -2.2434e-05,
        -2.8313e-05, -5.1404e-05, -7.0294e-02,  2.0549e-05,  3.9579e-05,
        -2.9206e-02,  1.8813e-05,  4.6407e-05,  3.2122e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6469e-04,  1.1534e-04, -1.3085e-04, -8.2182e-05, -8.4602e-05,
        -5.4379e-04, -4.7851e-05,  1.4231e-05, -7.1704e-02, -5.6305e-05,
         2.2718e-04,  3.9998e-05, -1.3429e-04, -1.2721e-01, -8.6108e-05,
        -3.9555e-02,  5.9871e-06, -4.4679e-02, -1.4876e-04,  5.0635e-05,
         4.7272e-05, -3.3585e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1947e-03, -8.4354e-05,  1.2967e-04, -6.6739e-05, -5.3030e-05,
        -1.1754e-04,  3.4121e-05,  4.6535e-05,  8.5944e-02, -1.2568e-05,
        -6.3631e-05,  1.3950e-06, -5.9035e-05,  7.4081e-02, -1.3101e-04,
         1.1141e-01, -8.2982e-05, -1.0109e-04,  5.2930e-05, -1.4893e-04,
         2.7595e-05, -4.6480e-05,  3.5634e-05, -5.7398e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2336e-05, -2.3576e-04,  2.8169e-04, -3.1683e-04,  2.1449e-04,
        -3.4494e-05,  1.1026e-05, -7.2183e-05,  2.9315e-01,  1.4955e-04,
        -7.8880e-05,  2.8375e-04,  3.8669e-05, -2.5256e-04, -4.1221e-04,
         3.2681e-02,  5.5028e-04,  6.7167e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5029e-04,  6.3371e-05, -1.2647e-04,  1.6708e-05, -6.0218e-05,
         4.0402e-05,  4.8835e-05,  6.3869e-05, -1.9647e-05, -3.4686e-06,
         3.2882e-05,  1.2548e-01,  7.3836e-05,  1.4985e-06,  1.1303e-04,
         2.3383e-05,  2.5159e-05, -1.6981e-04,  5.9528e-02, -1.4954e-04,
        -1.9033e-05,  1.5014e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1150: [tensor([ 2.1246e-04,  8.4852e-05, -4.0231e-07,  8.7756e-06,  4.5985e-05,
        -8.3128e-02, -1.5744e-04, -7.1717e-05, -1.9582e-02, -8.8897e-08,
         1.9695e-05,  1.6548e-05, -2.6491e-05,  5.9925e-05,  2.8067e-07,
        -3.1223e-03, -2.4886e-05, -3.7973e-02,  5.0388e-05, -7.8198e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4391e-04,  1.3124e-05, -5.5047e-05, -2.0115e-05, -9.6950e-05,
        -1.5937e-01, -1.3330e-04, -7.1928e-05, -1.0128e-01, -1.5007e-04,
         8.9363e-05, -9.5492e-05,  2.1296e-04,  1.4612e-04, -2.0156e-04,
        -1.3183e-02,  1.1406e-04,  9.9701e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9684e-05,  3.0158e-06, -8.5566e-05, -1.0338e-05,  1.7876e-01,
        -3.6936e-05,  1.6831e-04, -6.5650e-05,  1.0403e-01, -1.4594e-04,
        -4.8821e-05,  5.6838e-05,  7.5565e-05,  1.9447e-02, -9.9634e-05,
         1.4168e-04, -3.2081e-05,  1.6222e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7845e-04, -4.9436e-05,  9.7546e-05, -1.4485e-04,  2.2171e-01,
         9.6862e-05,  1.7764e-05, -1.2408e-04, -3.4607e-04, -3.5492e-05,
         6.5850e-05, -1.3599e-04,  2.2964e-04,  1.4836e-02,  1.5193e-04,
        -5.8343e-05,  1.8706e-02,  8.4365e-05, -2.2385e-04,  8.7789e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1024e-04,  1.0646e-04, -2.0929e-04, -2.1144e-05, -1.2974e-02,
        -2.2336e-04, -1.3373e-04, -1.3430e-04, -1.3813e-01, -6.8637e-05,
         3.1086e-05, -6.0554e-05,  2.3028e-05, -1.6783e-01,  1.7686e-04,
        -2.2166e-04,  1.4452e-04,  1.6426e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0706e-04, -1.5820e-04, -2.2617e-04, -3.3510e-06, -1.3267e-04,
        -1.9883e-04, -1.5003e-04, -2.1652e-01, -4.0463e-05, -4.4319e-05,
         3.0688e-04, -2.0650e-04,  1.2077e-04, -1.3815e-05, -7.7702e-05,
        -1.2678e-02, -1.7761e-06, -2.0267e-05,  8.7902e-05, -6.7012e-03,
        -2.9931e-04, -8.1891e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6292e-05, -8.1064e-05,  2.5202e-04, -2.8154e-04,  6.8145e-05,
        -5.5055e-05,  1.6747e-04, -2.2186e-01,  1.3468e-04,  8.8637e-05,
         2.0301e-04, -1.1086e-05,  1.2586e-04,  4.0349e-05,  2.9779e-05,
        -1.2742e-02,  3.5193e-05, -3.5148e-04,  1.6634e-04, -9.5253e-05,
        -1.3660e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4554e-04, -9.7997e-05,  1.3016e-04, -1.5933e-04, -2.8528e-05,
        -1.1393e-04, -2.1236e-04,  2.1744e-01, -1.5134e-04, -3.9595e-05,
        -9.7741e-04, -1.6804e-04, -1.8317e-04,  9.5241e-05,  1.4006e-04,
         3.1509e-02,  1.7545e-05,  5.7138e-05,  1.0889e-04,  1.3144e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5652e-04, -3.4977e-02,  5.2950e-05,  1.1710e-04, -1.7575e-01,
        -5.1688e-05,  9.5713e-06,  2.6550e-05, -6.0571e-06,  8.5175e-05,
        -8.5256e-05, -7.9173e-05, -5.2143e-03, -1.5953e-04,  4.9257e-05,
        -5.6370e-05, -3.8246e-05,  2.6721e-05, -3.4276e-03,  8.2206e-06,
         1.2670e-04, -2.0689e-02, -1.0305e-04,  7.4735e-05, -1.7614e-04,
        -8.9955e-05, -2.6186e-05, -8.9878e-05, -1.0281e-05, -5.8806e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7253e-04,  1.0412e-01,  8.6258e-06, -2.5309e-05,  1.0770e-02,
         3.6856e-05, -8.4157e-06, -1.5218e-06, -2.8464e-08,  3.7115e-05,
         4.2009e-06, -8.6180e-05,  1.0374e-01, -1.4479e-05, -1.4349e-05,
         1.0823e-05, -3.5642e-06, -2.6072e-06,  6.8731e-02, -5.2102e-05,
        -8.2966e-05,  4.7634e-04, -1.8661e-05,  1.0260e-05, -2.8721e-05,
         5.5588e-04,  1.6309e-02,  3.4575e-05, -5.0884e-05,  1.7423e-04,
         6.2244e-04,  2.7774e-05, -9.9735e-05, -1.0776e-04,  5.8012e-06,
        -1.3987e-05, -1.3181e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2704e-03, -4.7699e-02,  7.3328e-05,  1.9229e-06, -1.3584e-01,
        -2.4627e-05, -7.0317e-05,  3.1357e-05,  6.7412e-06, -2.3339e-04,
         9.6634e-07,  5.1682e-05, -1.0026e-01, -2.4074e-05,  8.1822e-05,
        -1.0547e-04, -4.0265e-05, -3.9148e-05, -3.9873e-02,  2.5341e-04,
        -6.0944e-06,  1.1562e-04,  2.7585e-06,  6.4287e-05,  1.3059e-06,
         9.1205e-05, -7.9426e-05, -2.0683e-05, -5.3328e-06, -1.5089e-05,
         6.4327e-06,  2.5170e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5282e-04, -1.0014e-01,  4.3796e-05,  4.6516e-05, -1.0836e-01,
        -1.6000e-05, -1.1937e-04, -5.0979e-04, -6.1686e-05,  2.9226e-05,
        -2.7278e-04,  1.5547e-07, -3.1804e-02, -4.9913e-05, -1.3317e-05,
         6.3897e-05, -1.6481e-05,  4.6791e-05, -5.4123e-05,  9.6009e-05,
        -2.7937e-02,  8.5459e-05,  1.7411e-05, -1.0928e-05, -5.5765e-02,
        -1.7705e-05, -3.7806e-04, -4.8533e-05, -2.9977e-02, -5.0817e-05,
         1.1442e-05, -5.3923e-07, -3.8273e-05, -2.7099e-05, -2.0654e-05,
         6.0260e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1200: [tensor([ 6.8242e-04,  1.2198e-02, -1.0073e-04,  3.0840e-04,  3.2889e-05,
         2.0723e-01,  2.0551e-05, -1.2337e-04, -1.9166e-04, -3.1933e-05,
        -7.3883e-05,  6.5213e-05, -1.4383e-05,  8.5506e-05, -4.7601e-05,
        -6.0546e-05,  1.2362e-04,  8.4418e-02, -1.0710e-04,  7.9737e-07,
         6.0485e-03, -3.1309e-05,  1.0030e-04, -8.7284e-05,  9.2549e-05,
         5.2573e-05,  1.4175e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7961e-04, -3.5602e-03, -5.7684e-05, -1.6675e-04, -1.2783e-04,
        -6.5488e-02, -1.3588e-05, -9.0239e-05, -4.8691e-05,  8.3453e-05,
         9.6412e-05, -1.6015e-05, -1.1351e-04, -2.6018e-05, -3.6470e-05,
        -1.4653e-01, -4.7328e-05, -2.2417e-04, -1.0620e-04,  2.4880e-05,
        -1.0650e-05, -8.8741e-03,  6.8303e-05, -3.8079e-05,  1.6605e-05,
         6.7216e-05, -3.8935e-03, -3.2662e-05,  4.0369e-05, -2.7474e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5819e-04,  2.2390e-02, -1.7821e-05,  7.6693e-05, -3.6056e-05,
         2.0170e-01, -8.5466e-05, -1.8869e-04, -1.1573e-04,  2.5190e-05,
        -6.4388e-05, -2.2385e-05, -8.7294e-05,  7.1566e-05, -8.7619e-05,
        -7.0486e-05,  3.0029e-05, -7.3730e-05,  2.9595e-05, -1.8085e-05,
        -1.1748e-05, -6.6947e-05, -3.4809e-05, -4.3445e-05, -9.3606e-05,
         2.1707e-05, -5.9048e-05,  4.1672e-02, -8.9868e-05,  5.0957e-05,
        -8.9963e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1044e-04, -7.4065e-02, -4.0065e-05,  2.5376e-05,  7.0078e-06,
        -5.7378e-02, -4.8563e-05, -2.4948e-05, -1.3449e-03,  4.5952e-05,
        -9.5638e-05, -5.9622e-02, -1.4052e-05, -8.3252e-05, -2.9112e-05,
        -7.8654e-03, -1.3717e-05, -5.1923e-05,  3.0888e-06, -1.7479e-03,
        -3.6019e-05, -1.0165e-04,  2.6676e-05,  3.2152e-05, -2.1799e-02,
        -5.6468e-05, -8.2482e-05,  2.4799e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5920e-04, -3.6294e-02, -5.0952e-05, -9.5345e-06,  3.5086e-05,
        -2.3611e-02, -1.7692e-04,  5.4921e-05, -7.8250e-02, -4.8854e-05,
        -1.1211e-03, -1.6432e-01,  6.7521e-05,  6.6596e-05, -2.8844e-05,
        -3.9823e-03, -7.6351e-05, -2.9740e-05, -4.3689e-05, -4.6314e-03,
         3.5397e-05, -3.0213e-04, -3.6669e-06,  7.7737e-05,  1.3034e-04,
         9.2375e-05, -3.4498e-05, -4.4523e-05,  2.3863e-05, -2.1863e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6194e-05, -2.6663e-02, -1.4957e-04,  1.7304e-05,  4.6830e-05,
        -1.4174e-01, -4.1446e-05,  4.2200e-05, -1.9339e-02, -2.4203e-05,
        -2.5655e-05, -1.6291e-02, -1.2109e-05, -1.3138e-05,  1.0249e-04,
        -2.0979e-02, -1.0593e-04, -1.8303e-05,  5.9518e-05, -1.4027e-02,
        -5.0702e-06, -1.2899e-04,  1.3355e-05,  8.1285e-06,  1.4579e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.2885e-04,  7.0064e-02,  3.8511e-05,  1.5923e-05,  1.4534e-01,
         7.3285e-05,  2.4084e-05, -1.7207e-04, -5.4752e-05, -3.9591e-07,
        -9.3105e-06, -2.4287e-06, -5.2658e-05,  1.0536e-01, -2.6881e-05,
         1.2235e-05, -5.2333e-05, -4.2779e-05, -1.3122e-05,  3.4369e-05,
         7.9595e-05,  8.9633e-06, -3.7732e-05, -1.2064e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7838e-03,  2.3298e-01, -1.4504e-04, -6.8430e-05,  2.4726e-02,
        -5.1629e-05,  2.5631e-06, -3.1608e-04,  1.4492e-04,  2.0926e-04,
         5.9479e-06, -1.7798e-04, -1.7522e-04,  1.2254e-02, -1.1918e-04,
        -1.5200e-05,  6.5407e-05, -1.0758e-05, -4.6441e-05,  6.7070e-05,
         1.5241e-05, -5.2699e-05,  1.7246e-05, -7.2354e-05, -3.1827e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6460e-04, -1.1332e-02, -9.3157e-05, -5.1107e-05, -2.2770e-01,
        -2.4019e-04,  7.4443e-06, -5.1254e-04, -7.4517e-05, -2.8243e-04,
        -6.5875e-05, -1.2203e-04,  3.0086e-05, -3.9670e-02, -1.0484e-05,
         1.6064e-04, -8.1588e-05,  1.1275e-04,  2.7425e-05,  1.4529e-04,
        -1.2818e-04, -1.2529e-04,  7.5005e-05,  8.9006e-06,  1.9622e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4966e-04, -1.9487e-01,  2.0720e-05, -5.8850e-05,  1.0358e-04,
        -7.8762e-02, -6.1168e-05,  2.8496e-05, -4.3361e-05, -6.1445e-04,
        -5.6818e-05, -3.5591e-05,  1.0139e-04, -4.0254e-04, -2.0234e-05,
        -3.9584e-05,  4.7169e-05,  1.7428e-04, -4.9523e-02,  9.5357e-05,
        -5.8908e-06,  4.9857e-05, -3.2447e-02, -1.8586e-04, -1.6935e-04,
        -2.1986e-05, -5.2846e-05,  1.8752e-05,  4.6263e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5132e-05,  1.5605e-02, -5.6499e-05, -1.6525e-05, -1.4706e-05,
         1.2225e-01, -4.0685e-05,  9.4012e-05,  2.1531e-05,  2.0029e-03,
         6.4514e-05,  1.0097e-04,  9.7589e-06,  2.6250e-04,  2.6165e-05,
         2.9943e-05, -1.3775e-05, -5.9494e-05,  3.8870e-02,  4.6372e-06,
        -8.3484e-05,  4.3434e-05,  6.9286e-03, -4.5998e-05,  5.3641e-06,
         5.2634e-05,  5.9525e-06,  2.5030e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1196e-04, -8.8336e-02, -9.2185e-05,  2.0829e-05, -3.6183e-05,
        -1.5182e-01, -2.2203e-06,  5.5277e-06,  2.6940e-05, -7.2171e-06,
         7.5480e-05, -6.1404e-05,  9.9641e-05, -5.8705e-05, -2.7921e-05,
         4.0814e-06, -1.2147e-04,  1.2433e-04, -3.6305e-03,  4.8803e-05,
         4.9703e-07,  2.9232e-05, -1.8672e-02,  1.9365e-05, -8.9034e-05,
        -7.7145e-05, -3.3448e-05, -1.4724e-05, -5.1250e-05,  1.8248e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1250: [tensor([ 2.7284e-04, -9.5131e-05, -4.3714e-06, -1.3424e-05,  5.1927e-06,
        -4.5268e-06, -2.1594e-05, -3.5180e-06, -1.1893e-01, -3.8010e-05,
        -1.1735e-05, -2.1836e-05,  2.9866e-05,  5.6828e-07, -2.0429e-03,
        -3.1753e-05,  4.1281e-06, -1.3665e-05, -1.1180e-04, -6.8641e-02,
        -3.4878e-05, -4.0090e-05,  7.0611e-05, -9.2968e-05, -3.1184e-02,
        -9.7576e-05, -1.5292e-05, -6.4185e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0178e-05, -5.1409e-02, -5.9808e-06, -1.1765e-04, -7.2301e-07,
        -1.4316e-05, -4.2588e-02, -1.4625e-05, -2.2728e-04, -9.4114e-05,
        -1.1511e-04, -2.4119e-05, -1.8410e-04, -4.8399e-05, -1.3340e-04,
        -6.4874e-05, -1.1159e-04, -8.3885e-06, -3.9889e-05, -9.6702e-05,
        -6.7378e-05, -1.0947e-01, -2.4557e-05, -4.4963e-04,  2.2336e-05,
        -8.4192e-05, -8.2060e-02, -1.3977e-04, -8.7305e-05, -4.5382e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3788e-04, -9.0748e-02, -8.5736e-05,  1.0045e-05,  3.7335e-05,
         1.8018e-05, -1.1701e-01, -8.4113e-05, -3.8434e-04, -1.0492e-04,
        -7.5072e-05, -5.8052e-05, -2.2714e-04, -2.2478e-05, -5.4064e-05,
        -6.8682e-06, -4.7007e-05,  5.6275e-05,  1.0303e-04, -6.9940e-02,
        -4.9505e-05, -1.7854e-04, -2.8285e-02,  1.1264e-04, -6.4699e-06,
        -5.5037e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2695e-04,  4.4362e-02,  4.5020e-05, -2.4451e-04, -1.1847e-04,
         4.9224e-05,  2.1688e-01, -4.7936e-05,  9.6135e-04, -6.8922e-05,
        -4.6202e-05, -9.2252e-05,  3.1680e-04, -5.1457e-05, -9.9282e-05,
        -3.2120e-06, -4.8044e-05,  1.0180e-04, -9.3720e-05, -1.9789e-05,
        -7.5349e-05,  2.1575e-02, -1.0254e-04,  1.4305e-03, -1.7158e-05,
         2.7194e-05, -5.4654e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1623e-04, -6.7644e-05, -1.4225e-05,  3.8263e-05, -1.1982e-02,
        -2.3134e-05, -2.1225e-04, -2.5579e-05, -8.5581e-04, -6.6128e-05,
         4.2098e-07, -1.0339e-04, -8.9749e-05, -1.7107e-05, -8.6735e-05,
        -1.5071e-01, -1.3964e-04, -2.6429e-04, -1.9667e-04, -1.3511e-05,
        -7.3298e-02, -4.9155e-05, -1.5653e-04, -6.9806e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7673e-04,  1.2082e-04,  6.0954e-05,  5.9813e-05,  1.6062e-02,
         5.2739e-05,  1.6656e-04,  1.0992e-04,  5.5777e-04,  2.1124e-04,
        -8.0403e-05,  1.4896e-04, -4.5617e-05,  6.7835e-05,  5.3734e-06,
         1.5554e-01,  5.8657e-05,  3.6895e-05,  1.3518e-04, -4.9095e-05,
         7.6666e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7033e-04, -7.2121e-05,  6.9329e-05, -4.0829e-05,  3.9459e-02,
         1.0988e-04,  2.1159e-05,  1.0326e-04,  7.9459e-04,  1.5463e-04,
        -6.4621e-05, -2.3394e-06, -7.3886e-05,  1.1576e-04, -3.2879e-05,
         1.1187e-01,  7.6465e-05,  3.8999e-05, -1.3037e-04, -9.6392e-05,
        -1.6520e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6087e-04, -3.1981e-01, -2.3893e-04, -5.3104e-04, -1.0755e-04,
        -3.1107e-04,  3.8227e-05,  2.6167e-05, -7.2852e-05, -1.0318e-04,
        -1.3560e-04,  2.1026e-04,  1.9777e-05, -5.6815e-02,  1.0027e-04,
        -4.0130e-04, -2.0793e-04,  1.1719e-05, -7.3404e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1477e-05,  2.1119e-01, -9.5726e-05,  7.2556e-03,  1.7817e-04,
        -3.8290e-05, -7.0861e-05,  1.2617e-04, -1.2351e-04, -6.6662e-05,
         2.2356e-04,  9.1138e-05, -3.1144e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2692e-04, -3.1864e-01, -4.3575e-05,  6.1269e-04, -1.3977e-04,
        -5.3764e-05, -1.5355e-05, -9.4540e-05, -1.7609e-05,  5.8594e-05,
        -5.2514e-05, -2.6456e-04, -5.5886e-02,  2.8128e-06, -1.3837e-04,
        -2.4749e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1813e-04,  4.3973e-02,  9.6988e-05, -3.8323e-05, -4.7310e-05,
        -8.6622e-05, -9.3538e-05,  4.6174e-05, -8.7462e-06, -2.1885e-04,
        -9.7606e-05, -1.2400e-04,  1.5908e-04,  5.2923e-05,  2.2907e-01,
         5.7059e-05, -7.3655e-05,  3.2706e-04,  1.3205e-04,  1.4403e-05,
         1.4466e-04, -1.9522e-04,  1.0517e-04, -4.0370e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7194e-04, -1.1137e-01,  4.6515e-06, -7.7755e-05, -2.3342e-05,
        -5.7690e-05,  9.6787e-05,  6.1764e-05,  1.7694e-05,  2.6443e-06,
        -1.3289e-04, -2.4425e-05, -3.6640e-05,  3.1463e-05, -1.0915e-01,
         1.3105e-04, -1.2395e-04,  3.0048e-05,  7.3723e-05,  9.7189e-05,
        -3.7237e-02, -1.9897e-05, -8.6157e-05,  2.8628e-05,  2.0784e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1300: [tensor([-5.8904e-04,  2.5489e-01,  1.0521e-04, -2.4783e-05,  1.6022e-01,
        -2.1238e-05, -7.2856e-05, -1.1912e-04,  1.5977e-04,  1.3269e-02,
         3.5656e-05, -3.3384e-05, -4.2852e-05, -5.8535e-05, -5.2919e-05,
         1.4015e-04,  3.4306e-05,  2.3105e-05, -7.7547e-05, -9.5537e-05,
        -8.0229e-05,  7.7873e-06,  2.0583e-03,  3.1020e-05, -2.9966e-05,
         1.5069e-04,  1.6725e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4258e-04,  9.4119e-02,  3.0947e-06,  7.4560e-05,  5.5601e-02,
        -1.5631e-05,  3.4189e-07, -4.4105e-05,  3.5442e-05,  2.9033e-02,
        -1.3932e-05,  2.6193e-06,  3.4743e-05,  5.5202e-05, -2.4891e-05,
         1.6525e-05, -2.9965e-06, -4.2865e-05,  5.1042e-05,  7.7313e-06,
         9.6453e-02,  6.8228e-05,  7.2954e-05,  2.8566e-05,  1.4971e-05,
         7.0539e-05, -3.8257e-05,  6.2600e-06, -1.8183e-05,  5.2882e-04,
         5.7402e-05,  5.0120e-05,  1.0583e-04, -1.0002e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.5293e-06, -5.4663e-02, -3.7477e-05, -8.5056e-05, -8.7619e-03,
         3.9405e-05, -8.6325e-06, -9.6286e-05, -7.0657e-05, -6.3267e-02,
         5.4213e-05,  2.2463e-05, -9.2467e-05,  1.7184e-05, -2.2968e-05,
        -4.0758e-06, -2.1040e-05,  4.9166e-05, -1.4676e-05, -4.8113e-05,
        -7.4120e-03, -3.7217e-05, -4.9972e-05, -6.6709e-05, -9.0025e-02,
         1.3855e-05, -4.3303e-05, -3.2139e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3921e-04,  5.4041e-02,  3.0645e-05,  2.0705e-05,  1.5441e-02,
         1.7965e-05, -3.4153e-05,  1.1044e-04,  6.7378e-05,  1.2318e-02,
         1.1871e-05,  6.4369e-05, -1.5064e-04,  7.6855e-05, -6.4908e-05,
         8.4691e-05, -1.5037e-05, -8.1995e-05, -4.5086e-05, -2.2054e-05,
         1.6469e-01,  1.4548e-05,  3.0984e-04,  5.2025e-05,  1.1986e-01,
        -7.4079e-06,  1.8318e-04,  3.4068e-05, -2.8435e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8772e-04,  7.2012e-02,  3.6792e-05, -3.2920e-05,  5.2348e-02,
         5.1434e-05, -1.2191e-04, -4.3377e-06,  5.9991e-05,  2.2379e-02,
         2.2312e-05,  9.8454e-06, -5.6314e-05, -1.2532e-05,  1.5067e-05,
        -9.8627e-06,  7.1016e-06, -6.3215e-05,  6.1975e-05,  4.9347e-06,
         6.3277e-02,  3.9059e-05,  5.0303e-05,  4.4231e-02, -1.1543e-05,
         3.8048e-05, -1.5911e-05,  5.0306e-05, -1.3706e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9239e-04,  5.2682e-03, -6.7154e-05, -7.0126e-07,  2.9885e-02,
        -3.9800e-06, -6.0989e-05, -6.9884e-05, -3.1438e-05,  2.8890e-02,
        -2.4150e-06,  4.2271e-05,  9.8127e-05, -2.7610e-05, -1.7068e-04,
        -9.9501e-06, -1.7056e-05, -9.4918e-06, -2.4331e-05, -9.9194e-06,
         1.6815e-01, -6.2717e-05, -5.3227e-05,  2.8521e-05, -1.0866e-05,
         3.1065e-05,  1.1123e-04,  1.7492e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.6060e-04, -8.5115e-02,  5.2079e-05, -5.4907e-05, -2.1146e-01,
         3.9946e-05, -5.2718e-04, -5.9819e-05, -1.2301e-04, -2.3845e-02,
        -1.2980e-04, -3.3040e-05, -9.9192e-05, -8.2986e-06, -3.7430e-05,
        -5.4886e-05,  1.6746e-06, -1.1022e-04, -7.4683e-06,  8.9708e-05,
         1.7808e-05, -5.4999e-05, -8.7852e-02,  1.5256e-05, -1.7796e-04,
         9.4550e-05, -1.4601e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2679e-04,  8.4826e-03, -2.7107e-05,  2.8527e-05,  9.8205e-02,
        -1.8310e-05, -1.8712e-04, -1.6863e-05,  4.6571e-05,  1.4989e-03,
        -5.8128e-05,  3.9610e-05, -5.3107e-05, -2.3733e-05, -8.0261e-05,
         9.3112e-05, -4.0484e-06, -5.6781e-05,  5.2162e-05, -5.9353e-05,
         3.5579e-04,  6.9160e-05,  6.0148e-05,  6.7757e-05,  4.1387e-05,
         1.7031e-01, -9.1605e-05, -2.1500e-04, -8.9537e-05,  3.9310e-05,
        -2.3620e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9774e-04, -2.9121e-02,  1.4289e-04,  1.2683e-05, -1.8959e-01,
        -1.3717e-05, -4.2476e-05, -3.0286e-04, -1.2103e-04, -9.7391e-05,
         9.0739e-05,  5.4456e-05,  9.4672e-06,  6.4360e-05,  3.9398e-05,
        -1.5436e-01, -9.7618e-05, -8.1117e-04, -2.0980e-02, -5.9826e-05,
        -2.4559e-04,  9.5941e-05, -1.7606e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7049e-04,  1.8118e-02, -1.2830e-04,  1.4046e-04,  8.8203e-02,
         2.2657e-05, -4.6283e-05, -1.8420e-04,  4.8213e-05, -2.9801e-05,
        -6.8105e-05,  5.5523e-05,  1.4872e-04,  3.2532e-05,  3.9478e-05,
         1.9436e-01, -1.6370e-04, -1.1174e-04, -1.1585e-04,  1.0468e-04,
         7.9444e-05, -2.5810e-05,  1.6091e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6877e-04, -4.9251e-02, -7.4551e-05, -5.3042e-05, -5.1823e-03,
         2.9451e-05, -3.6270e-05, -1.3769e-04, -4.2049e-06, -3.0982e-05,
        -2.1737e-04,  7.7046e-05,  1.0668e-05,  1.3613e-05,  6.9251e-05,
        -1.2992e-01,  7.0738e-05,  4.9870e-05, -1.3846e-03, -9.4094e-06,
        -1.0891e-04,  4.3444e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8380e-04,  2.5420e-05,  3.3654e-03,  1.3542e-05,  4.2725e-06,
         1.1712e-02,  2.3164e-05, -4.9997e-05,  6.4797e-05, -1.5540e-05,
         1.4683e-02,  6.7298e-05,  2.0579e-05,  2.4751e-05, -2.9212e-05,
        -4.3484e-05, -4.5403e-05, -1.1147e-05, -4.5650e-05, -8.2136e-05,
         1.5954e-01,  4.6052e-05,  8.8687e-04,  5.2836e-03,  3.1626e-06,
         8.1621e-05,  2.2834e-05,  1.0446e-05,  1.8693e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1350: [tensor([-9.1076e-04, -9.5789e-02,  1.6082e-06,  4.5056e-06, -8.7002e-02,
        -3.7789e-05,  1.0061e-04, -7.1539e-02,  2.5412e-05,  1.9982e-05,
         1.7110e-05,  9.5952e-05, -5.9255e-02,  1.0307e-05,  1.4134e-05,
        -8.2723e-06, -5.0258e-04, -3.1678e-02, -1.4750e-05, -1.0200e-05,
         2.3429e-05,  1.0785e-04,  6.0845e-05, -2.8174e-04, -8.8767e-07,
        -3.9168e-05, -3.4643e-05, -6.5705e-05, -1.2915e-04, -4.5808e-03,
        -3.3971e-05, -8.4671e-05, -8.7443e-05, -6.7028e-05,  5.6430e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6795e-04, -1.2621e-01, -8.8136e-05,  2.2541e-05, -5.9532e-03,
        -9.0872e-06,  1.2855e-05, -2.1522e-02, -1.5064e-05, -3.9745e-05,
        -5.1221e-05, -3.4942e-05, -1.0477e-02, -5.5260e-06,  2.8289e-05,
        -9.6647e-06,  1.5218e-04, -5.5782e-03, -3.8661e-05, -3.7194e-05,
         6.6551e-06, -7.0043e-05, -2.2958e-02,  1.3724e-05,  3.0230e-04,
        -6.5611e-06, -2.6113e-02, -1.3850e-06,  2.5220e-05,  1.5198e-05,
         6.8957e-06,  1.1900e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3072e-04, -1.1600e-02, -9.9559e-06, -3.7217e-05, -5.6893e-05,
        -3.5023e-05, -5.7997e-05, -4.7352e-02, -5.2589e-06, -1.0384e-05,
        -3.3370e-06,  1.0966e-04,  1.3567e-05, -5.7654e-05,  2.2671e-05,
         1.6361e-05, -6.6148e-03,  2.0684e-05,  1.6793e-05, -6.8357e-02,
        -1.3574e-05, -1.0653e-05,  6.5365e-05, -2.2946e-05, -1.1821e-05,
        -5.4018e-05,  2.8665e-05,  6.7222e-06,  1.0432e-06,  1.2402e-05,
        -5.9266e-02, -7.5153e-05, -7.6124e-06, -7.2864e-06, -1.6925e-02,
         1.3074e-05,  3.6624e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6788e-04, -1.2515e-01, -8.6179e-05, -5.9670e-05, -1.2163e-04,
        -3.3040e-05, -7.8279e-05, -3.7283e-02, -6.8563e-05, -1.8306e-05,
        -4.4377e-05,  3.2011e-04, -5.0127e-05, -6.9983e-05, -1.6384e-05,
         3.3840e-05, -2.5528e-02,  2.0808e-05, -4.2582e-05, -1.1121e-01,
         5.4192e-06, -4.2206e-05, -1.5431e-05,  2.0737e-06, -4.3624e-05,
        -4.6480e-05,  3.0832e-05, -3.5954e-06, -3.5737e-05,  2.7754e-05,
        -1.9172e-02, -4.3773e-05, -3.3141e-05,  4.4312e-04, -1.6204e-05,
        -2.4183e-05,  4.0708e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4911e-05, -8.8644e-02, -1.2190e-06,  6.9705e-05,  2.0732e-06,
         7.3311e-06, -4.9421e-05, -1.1806e-01, -4.8621e-06, -3.6384e-05,
         3.5761e-06,  1.3939e-04,  4.4941e-05, -1.8174e-05,  1.1328e-04,
        -4.6324e-05, -4.4460e-02, -5.6025e-06, -2.5271e-06, -3.9356e-02,
        -4.6853e-05,  7.8597e-06,  1.2801e-05,  5.1499e-05,  4.5388e-05,
        -4.1994e-05,  3.8160e-05,  2.9260e-05, -3.6083e-05, -8.6600e-06,
        -3.7667e-03, -8.9673e-05,  4.3110e-05,  7.7581e-05, -5.3576e-06,
        -2.8914e-05,  1.9004e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3586e-04, -1.8134e-02, -5.6358e-05, -2.4836e-05,  2.5175e-04,
        -3.0422e-05, -1.9459e-05, -3.1254e-05, -4.2242e-04, -5.2724e-05,
         6.9970e-05, -4.4264e-05,  2.5880e-05,  4.9756e-07, -4.0404e-02,
        -1.7198e-05, -2.7161e-05,  2.9376e-05, -6.4144e-02,  4.2074e-05,
        -2.8421e-05,  4.7586e-05,  4.2176e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5596e-04, -2.2426e-01,  1.2937e-05,  6.7539e-05,  1.3191e-04,
        -1.6603e-04, -2.7984e-05,  7.3010e-05, -7.1940e-02,  3.2328e-04,
         8.1796e-05,  9.7696e-05,  2.9698e-05, -4.8663e-05, -6.9212e-02,
        -4.5338e-05, -1.0442e-04,  1.0568e-05,  8.4333e-05,  1.2074e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3691e-04, -3.6987e-02, -7.1869e-07,  5.5846e-06,  3.6160e-04,
         8.3534e-05,  1.2751e-05,  2.7550e-05, -4.3453e-02,  3.3221e-05,
        -1.1148e-06, -4.5932e-05,  1.1995e-05,  8.5615e-06, -6.5370e-02,
         2.2169e-05, -4.1275e-06, -4.0582e-05,  5.2676e-06, -2.9530e-02,
         2.7955e-05, -2.0079e-05, -1.8426e-05, -4.2958e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6346e-04, -2.3681e-03,  7.1546e-05,  5.9831e-05, -3.7940e-02,
         1.3500e-04, -5.3297e-06, -7.5258e-03, -7.8468e-06,  6.9882e-05,
         3.0071e-06, -7.6812e-02,  5.7193e-05, -8.5230e-06, -6.5458e-05,
         1.7172e-05,  1.0753e-05,  3.0311e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7584e-05, -4.4290e-02,  8.8290e-05,  1.0397e-05, -5.9389e-03,
        -2.8956e-06,  4.5313e-05, -2.4547e-02,  1.1461e-04,  4.6578e-06,
         6.1632e-06, -8.9879e-02,  2.7438e-05,  2.0963e-05,  4.5429e-05,
         1.9538e-04,  1.4836e-04,  3.4483e-06,  4.0961e-05, -6.6624e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4669e-04, -7.9564e-02, -5.0833e-05,  1.8877e-05, -1.3900e-02,
        -3.1665e-05,  2.4282e-05, -1.6708e-02,  2.1821e-05, -2.5772e-05,
         9.3833e-06, -7.2174e-02, -2.9821e-05, -3.0670e-05, -4.2357e-02,
        -3.0249e-05, -3.3308e-05, -1.7859e-04, -2.9104e-02, -6.3986e-05,
        -1.0363e-04, -3.5721e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5526e-04, -8.1735e-02,  1.0291e-04, -8.0409e-05,  6.3466e-05,
         2.1147e-04,  2.8469e-05, -1.4492e-01, -3.9531e-05, -9.7943e-05,
        -2.9137e-05, -3.0463e-02,  9.6240e-05,  5.1950e-05,  8.2135e-05,
         9.2601e-06, -6.3914e-05, -1.2944e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1400: [tensor([ 3.0004e-04, -5.4341e-02, -1.5152e-04, -1.1860e-04, -1.1326e-01,
        -7.5605e-05, -9.5579e-05, -3.4211e-05, -1.7214e-04,  4.8302e-05,
        -1.5202e-04, -4.1255e-05, -1.8590e-01, -1.4090e-04, -8.3442e-05,
        -2.5319e-04, -4.5480e-05, -9.7107e-06,  7.8289e-05,  4.8268e-05,
        -1.4101e-04, -1.2621e-04,  6.4458e-05, -2.3945e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1972e-05, -1.5016e-01, -1.4310e-05, -1.4482e-05, -1.3112e-01,
        -9.5794e-06,  2.9103e-05, -2.0848e-04, -1.2659e-04,  1.2883e-04,
        -3.8147e-05,  2.8440e-05, -2.7305e-02,  4.9029e-05,  1.1049e-04,
         2.6761e-05,  1.0864e-04,  9.0593e-05, -2.5357e-05, -5.2132e-03,
        -5.3851e-05, -8.6901e-04, -7.9248e-02,  8.1819e-05, -2.3351e-05,
        -1.4288e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3394e-04, -3.0405e-02,  8.9377e-05, -2.0472e-05, -1.5855e-01,
        -2.5830e-05, -1.0594e-04,  3.4630e-05, -2.0459e-05,  1.7267e-04,
        -2.2372e-05,  1.7688e-05, -1.7956e-02, -6.9935e-05,  4.2910e-07,
        -1.4569e-04,  4.7566e-05, -1.4338e-05, -8.0350e-05, -8.7656e-02,
         1.2592e-04, -4.3667e-04, -7.8594e-06,  5.7993e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8305e-04, -1.3575e-01,  4.4306e-05, -1.6915e-04, -5.2652e-02,
        -2.4601e-05, -1.5456e-05,  3.6512e-05, -3.0324e-05,  1.7810e-05,
         1.1331e-05,  6.3087e-05, -6.6338e-05,  3.8586e-05, -6.6748e-02,
         1.2438e-05, -3.4972e-05, -1.1137e-04, -1.8561e-04,  2.9100e-05,
         3.0152e-05,  3.3593e-05, -4.1044e-02, -2.7424e-05, -7.2991e-05,
         1.4963e-05, -1.0373e-05, -3.6262e-05, -6.2562e-06,  3.8739e-05,
         4.9821e-05, -4.3770e-06, -1.6577e-05,  3.4982e-05, -4.3800e-05,
         7.7460e-06, -1.2951e-05, -6.9916e-05, -4.7454e-06, -5.0660e-06,
        -2.0477e-06, -3.9014e-02,  1.0643e-05, -2.0437e-05, -2.0219e-05,
        -2.8040e-05, -1.0495e-04,  1.7631e-05, -1.3480e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6345e-04, -2.3480e-02, -4.0028e-05, -1.2575e-04, -1.0268e-01,
        -1.4270e-06,  1.3458e-05, -6.2716e-05,  3.6545e-05, -1.2408e-05,
         9.7704e-06,  9.8722e-05,  3.2825e-06, -1.8037e-05, -3.2684e-02,
        -1.1107e-05, -7.5400e-05,  9.2475e-05, -3.3824e-05,  3.4737e-05,
        -4.4066e-05, -1.3442e-05, -2.1406e-02, -1.5203e-05, -2.1066e-05,
        -3.5810e-05,  6.8427e-05,  4.2527e-05,  6.7348e-06,  2.2245e-05,
        -8.9899e-06, -5.8247e-02,  2.0424e-05, -3.8412e-05,  3.6074e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5256e-04, -3.1668e-02,  3.4488e-05, -1.1768e-04, -5.1692e-02,
        -4.6494e-05,  5.4544e-05, -1.8845e-06, -4.6997e-05,  3.8494e-05,
         1.4817e-05,  1.4353e-04,  1.1579e-04, -1.2149e-05, -1.0823e-01,
        -3.0540e-05,  3.7587e-05,  2.4963e-05,  1.0423e-04, -4.7921e-06,
        -5.2191e-05,  1.1322e-05, -1.3810e-02,  1.0641e-04,  1.4081e-04,
         1.7293e-06,  3.0765e-05, -8.5037e-02, -6.5703e-05, -2.6163e-05,
        -6.6092e-06, -2.6793e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4357e-04,  4.9493e-02,  1.1223e-05, -2.1632e-05,  2.0858e-05,
         1.3104e-05,  3.3409e-05,  2.6292e-06, -2.0288e-06,  7.9443e-06,
         5.7315e-04,  1.0761e-05,  2.2289e-04,  2.7305e-05,  1.3878e-03,
         2.9733e-05,  2.6502e-05, -6.7093e-07,  5.7491e-04, -1.5406e-06,
        -4.7961e-06, -2.6606e-05,  1.9273e-02,  2.0984e-05,  6.1448e-07,
         4.0466e-05, -2.4652e-05, -8.4149e-05,  3.0699e-05, -4.0026e-05,
         2.6851e-05, -7.7378e-06,  2.1067e-04, -3.9038e-05,  2.1333e-05,
         4.8856e-05,  2.4466e-05,  2.3305e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.9723e-06, -2.2461e-02,  8.6115e-05,  4.1465e-05,  1.4348e-04,
        -1.3426e-04, -7.8263e-05, -7.3665e-05,  6.7878e-06,  1.5052e-04,
        -1.4435e-04, -1.4473e-04, -3.1045e-03, -3.0096e-05, -4.1413e-04,
        -1.3643e-04, -4.8252e-05,  1.0187e-04, -1.1810e-03, -5.7898e-05,
         1.2484e-04, -1.0992e-07, -3.3448e-01,  8.7884e-05,  3.7507e-05,
        -4.3747e-04, -5.5647e-04, -1.7816e-05,  8.3240e-05, -1.4975e-05,
        -5.3854e-04, -5.4733e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7642e-04,  1.8514e-01, -4.4386e-05, -4.4264e-05, -1.1847e-04,
        -4.6515e-06,  8.1006e-05, -8.6013e-05, -1.7193e-05, -6.2397e-06,
         6.6145e-04, -8.1334e-05,  1.9235e-04, -6.2047e-05,  2.4236e-03,
         5.4313e-05, -5.2760e-05, -1.0383e-04,  2.0523e-03,  7.7287e-05,
        -2.6276e-06, -2.2731e-06,  1.2726e-02, -3.1043e-05,  2.5263e-04,
        -4.3843e-05,  2.1883e-05, -4.8648e-05,  2.2732e-05, -1.0311e-04,
         5.6434e-02, -3.1280e-05, -1.4766e-04, -4.4266e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2139e-04, -4.0843e-05,  5.2852e-06,  7.8035e-07,  5.1469e-05,
         3.9174e-04,  3.5519e-02, -8.9286e-06,  3.4345e-04, -1.9493e-05,
         5.7115e-03, -5.4923e-05, -1.4619e-04,  1.6210e-02,  4.5521e-05,
        -1.3183e-05, -5.2593e-05,  1.8907e-03, -9.2291e-06,  6.1842e-06,
        -2.0273e-05,  1.9283e-03, -1.4310e-06,  1.8129e-07, -1.9062e-05,
         4.6278e-02, -2.3595e-05, -1.7080e-05, -1.1916e-05,  1.4078e-05,
         2.4616e-05, -4.5047e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9994e-04, -4.7085e-05, -6.6275e-05, -2.5367e-05,  1.6066e-05,
        -1.5080e-03, -9.2047e-02,  1.9744e-05, -1.9804e-04, -6.6347e-05,
        -1.0263e-01, -5.6825e-05, -1.5857e-04, -8.4321e-02,  2.8992e-05,
        -1.4034e-05,  2.6788e-05, -1.0275e-02,  3.8087e-05,  9.3382e-05,
        -1.0778e-05, -1.9614e-02, -1.0298e-05,  2.2465e-04,  7.1547e-05,
        -5.3149e-05, -2.4513e-05, -7.8950e-03, -3.8787e-05,  2.6670e-05,
        -1.2846e-02,  1.0354e-04,  4.7027e-05, -1.6385e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9147e-05, -1.1041e-05,  2.4148e-05, -2.1422e-05,  1.0465e-05,
         1.5312e-05, -1.6820e-01, -4.6066e-05, -1.2131e-03, -6.1848e-05,
        -7.7476e-03, -2.1027e-05,  3.5916e-04, -6.8445e-03,  5.5368e-05,
        -3.0405e-05,  2.6480e-05, -2.3928e-03, -4.3881e-05, -1.2357e-05,
         1.1768e-05, -5.2738e-02, -4.7591e-05, -3.7583e-04, -4.6962e-05,
         5.6396e-06, -6.9534e-05,  4.2776e-04, -8.4941e-06, -2.6980e-06,
        -3.8777e-05,  1.1339e-05,  7.9250e-05, -3.6732e-02,  3.6244e-05,
         5.9734e-06,  1.4107e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1450: [tensor([-7.9684e-04, -8.4225e-02,  1.6890e-04, -1.5368e-04,  1.6860e-04,
        -1.1940e-04, -2.0932e-03, -1.0786e-01, -5.9672e-05,  1.4957e-04,
         1.8406e-04, -4.1900e-05, -1.0198e-01,  1.4273e-04, -7.0302e-04,
        -5.5365e-05, -8.6076e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7584e-04, -1.0218e-02, -7.4466e-05, -1.2545e-04,  8.2720e-05,
         1.2662e-04,  9.0338e-04, -1.8286e-01, -1.7474e-05,  2.3261e-04,
        -1.2836e-04,  5.5539e-05, -6.3288e-05, -1.5901e-04, -9.2443e-05,
        -2.6911e-05,  8.3871e-05,  5.1519e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2856e-03, -1.4547e-05,  5.4617e-05,  4.2188e-05, -6.2807e-02,
        -1.3406e-05,  5.5355e-05,  5.8543e-05,  1.8109e-05,  2.6753e-05,
         4.2490e-05, -1.5911e-01, -6.2085e-05, -7.3818e-06, -9.8915e-04,
        -1.0019e-05, -5.5718e-05,  3.7234e-05, -8.8912e-03, -7.9918e-06,
        -5.6697e-05, -1.3528e-04, -4.9301e-06, -8.8368e-05,  5.8818e-06,
        -6.3668e-05, -1.4907e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3526e-05, -2.0490e-05,  3.3269e-05, -5.2423e-06, -1.2625e-03,
         2.8336e-05, -2.9385e-07,  9.5665e-05, -2.4524e-05, -1.0609e-06,
         1.3143e-06, -1.3902e-02,  1.0245e-05,  7.5456e-06,  5.9369e-04,
         1.9896e-06,  8.0090e-06, -1.8890e-05, -2.4459e-02,  4.1173e-05,
         6.4459e-05,  4.4549e-05, -2.0998e-05,  7.7124e-06, -4.4507e-02,
         7.2528e-06, -1.8700e-05,  3.1686e-05,  2.1394e-05,  1.3723e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7550e-05, -1.2443e-05, -2.5146e-05, -4.4895e-05, -2.3422e-02,
        -7.3267e-06,  1.6984e-06,  2.1624e-05, -5.8490e-05,  2.1046e-06,
        -4.8599e-05, -2.8358e-02, -2.0383e-05, -2.0896e-05,  1.6354e-04,
        -1.3795e-05,  8.3018e-07, -2.5227e-05, -2.5949e-02,  5.3302e-06,
         7.4522e-06,  8.5554e-06, -4.6399e-05, -2.1688e-05, -1.8803e-05,
        -4.1398e-05, -1.8283e-02, -3.3594e-05, -2.3539e-05,  6.0601e-06,
        -1.1634e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8612e-04, -5.2859e-02, -8.8539e-06, -6.4250e-05, -7.4576e-05,
        -3.6411e-05, -7.4709e-06, -2.2077e-02, -4.3330e-05, -7.7281e-06,
         9.1278e-06, -3.6388e-05, -7.4044e-03, -9.8855e-06,  2.4862e-05,
        -8.8772e-02, -8.2002e-05, -1.0616e-04, -7.2743e-06, -4.5589e-02,
        -2.0263e-05,  6.4142e-05,  2.8902e-05, -9.7049e-03, -1.3477e-05,
        -5.3611e-05,  4.7499e-05, -9.9373e-05,  1.7482e-04, -4.1526e-02,
        -6.1941e-05, -1.4242e-05, -2.6031e-07,  2.3406e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6783e-04, -1.1441e-01, -4.8075e-06, -1.0513e-04, -7.4327e-05,
        -5.1766e-05,  4.1130e-05, -3.3176e-02,  9.3102e-06,  1.7232e-05,
         4.1196e-05,  2.3365e-06, -3.2255e-02, -1.0366e-05,  2.3481e-04,
        -1.0570e-02, -1.4258e-04, -1.6442e-05,  3.6294e-05, -2.1310e-02,
        -2.9682e-05, -9.4366e-06,  2.7725e-05, -4.5674e-02,  2.1453e-07,
         5.5922e-04, -1.7302e-05, -5.2549e-05, -8.2692e-06, -1.2145e-04,
         1.8657e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4665e-06, -5.2001e-02, -1.0112e-05, -5.5071e-05, -6.2020e-05,
        -1.6075e-05, -3.4919e-05, -3.3849e-02,  2.2250e-06, -7.9615e-06,
         8.7841e-05, -1.3580e-05, -3.5601e-02, -7.0074e-05, -1.0847e-02,
        -1.2792e-02, -7.2449e-05, -9.0171e-06, -4.7380e-05, -1.9745e-02,
        -4.9365e-05,  3.4044e-05, -1.2338e-05, -5.2870e-02, -4.4273e-05,
         3.7990e-04,  2.6911e-05,  3.1489e-05, -6.7940e-05, -7.8073e-06,
         1.4226e-05, -4.7990e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2943e-04, -2.9779e-02,  6.4151e-06,  8.5289e-06, -2.4211e-05,
        -5.2743e-05, -2.6030e-06, -1.4036e-02,  6.8133e-05,  1.4539e-05,
         8.0568e-06, -2.9861e-05, -1.9422e-02, -1.2403e-05,  4.0727e-04,
        -1.4266e-02, -7.9437e-05,  3.0155e-05,  2.2492e-05, -6.4981e-02,
        -3.1473e-05,  2.0522e-05,  1.8256e-05, -2.1539e-02, -2.6978e-05,
         2.1301e-06, -1.6589e-05, -3.5189e-02, -7.7018e-06,  6.7400e-06,
         4.3030e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8908e-04, -1.0040e-01, -4.8015e-05,  9.1559e-05, -3.9108e-05,
         5.6628e-05,  5.6874e-05, -7.0838e-02,  1.2469e-05, -9.9146e-05,
         1.8499e-04,  3.7221e-05, -8.9838e-02,  9.5839e-05, -1.7522e-03,
        -1.6590e-02,  2.3861e-05,  9.0314e-07,  4.1441e-06, -9.6258e-03,
        -4.2120e-05,  4.3731e-05,  2.0566e-05,  1.8129e-05, -7.0821e-06,
        -8.9428e-03,  4.2235e-05, -1.4921e-06, -3.1729e-05, -5.4715e-05,
        -4.6989e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1168e-05, -3.7042e-02, -4.0460e-06,  1.0616e-05, -3.5546e-06,
        -4.6926e-05,  7.6219e-06, -7.6187e-03,  6.6101e-06,  1.8719e-05,
        -2.2616e-05, -3.3727e-05, -9.0555e-02, -7.3309e-06, -4.9208e-04,
        -2.2433e-02, -1.2904e-04,  1.9591e-05, -2.9039e-06, -3.3690e-02,
         1.6612e-05,  3.3600e-05,  3.2626e-05, -4.3228e-02, -4.0349e-06,
         6.0510e-06, -2.4882e-03,  6.2339e-07, -1.7946e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6064e-05, -6.8849e-02,  2.6754e-05, -1.9834e-05,  4.4730e-06,
         3.3699e-06, -4.6521e-05, -1.9138e-02, -9.6120e-06, -2.5797e-05,
         5.9217e-05,  1.4597e-05, -2.4589e-02,  2.2139e-05,  6.9567e-05,
        -2.7101e-02, -3.7843e-05, -3.7706e-05, -7.3910e-05, -5.6341e-03,
        -7.1021e-06, -2.0867e-05,  2.3158e-05, -4.3518e-03, -2.8357e-05,
        -1.8618e-05, -2.5392e-05,  1.9895e-04, -3.5643e-05, -3.8329e-05,
         7.8139e-06, -4.5537e-05, -3.9017e-02, -4.9328e-05, -1.0108e-06],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1500: [tensor([-8.1230e-05, -3.8805e-05,  2.8222e-05,  6.1099e-06, -3.8895e-06,
        -1.5066e-02, -1.2987e-05,  7.9564e-06, -7.0763e-06,  1.2690e-05,
         1.3300e-05, -8.0917e-03, -1.6124e-03, -2.0159e-05, -5.3758e-03,
        -1.2520e-05, -6.2663e-06, -3.9515e-05,  2.4739e-05,  1.4553e-04,
        -4.1092e-05, -1.3809e-05,  3.3641e-05,  7.0030e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5751e-04, -1.0107e-01,  6.3407e-05, -7.0688e-05, -6.3669e-05,
         3.5416e-04,  4.7680e-07, -6.5451e-06,  2.6242e-05, -1.0452e-05,
        -9.5363e-04,  1.0828e-05, -4.2071e-05, -2.6442e-05, -4.8588e-05,
         4.2208e-04, -1.3784e-05, -4.6800e-05, -3.5748e-05,  1.0423e-05,
        -7.1186e-03, -2.1141e-06, -1.7988e-05, -2.3339e-05,  2.5417e-05,
         2.6029e-05, -4.6933e-05,  7.6139e-06, -3.1143e-02, -2.8745e-05,
         1.0059e-05,  1.2385e-05, -3.2402e-05, -1.7248e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0352e-05, -6.0090e-02, -2.1980e-05, -6.9740e-05,  2.4870e-05,
        -7.8617e-03,  9.6713e-06, -6.7776e-05,  2.1786e-05, -3.9748e-05,
        -1.3332e-02, -1.3386e-06,  3.2973e-05, -6.4870e-05,  7.0630e-06,
        -1.9333e-04,  8.9835e-06, -4.3556e-05, -4.3600e-05,  5.7740e-06,
        -1.8256e-02, -4.8933e-06,  3.2094e-05, -1.1341e-05, -2.4807e-05,
        -2.3359e-05,  3.6581e-07,  1.2545e-05, -4.9306e-02, -3.5282e-05,
         3.6599e-06, -1.8138e-04, -1.1357e-05, -3.1467e-05,  1.3589e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7909e-04, -5.5576e-02, -1.1593e-05, -6.8110e-05, -3.5045e-05,
        -9.7866e-03, -3.9814e-05, -4.1538e-05,  1.9502e-05, -3.3747e-05,
        -4.1568e-02, -1.8853e-05, -6.1844e-06, -3.6110e-05,  2.6021e-06,
         3.2324e-04, -3.4340e-05, -9.3249e-06, -1.1800e-04, -9.9774e-06,
        -1.6029e-02, -2.0591e-05,  1.9637e-05,  2.4235e-06, -1.3851e-06,
         2.4724e-05, -2.8194e-05, -3.0191e-05, -4.6958e-02,  5.1008e-05,
        -4.9283e-05,  3.1418e-05,  5.9144e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4271e-05, -2.7430e-05, -3.7296e-04,  1.3922e-05, -1.8925e-05,
         2.1474e-05, -3.7656e-06,  3.4101e-05,  1.9699e-04, -4.0512e-02,
         2.1161e-05, -4.1998e-05, -7.4409e-06, -8.0047e-06, -2.1000e-02,
         2.7160e-05,  1.0431e-05,  5.3840e-05, -1.7984e-05,  2.7998e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9319e-05, -1.8115e-05, -2.3380e-03, -1.0747e-05,  2.6786e-06,
         6.7627e-06,  1.0965e-05,  3.8899e-05,  5.2199e-05, -3.3303e-02,
        -2.8291e-06, -9.4362e-06,  4.9721e-05, -1.0622e-05, -1.4122e-02,
         1.1363e-05, -2.5920e-05, -1.8892e-05, -9.2412e-03, -7.0595e-06,
         6.2406e-06,  1.2013e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0832e-04,  1.7715e-06, -8.5098e-03,  1.8325e-06, -2.3623e-05,
        -3.0569e-07, -1.7195e-05, -7.4585e-06,  6.1468e-04, -2.8943e-02,
         1.8052e-05, -1.8731e-05,  5.1733e-05, -1.3992e-03,  3.7663e-05,
        -2.0089e-02, -6.1370e-06,  5.5652e-05, -1.4766e-06, -3.6188e-06,
         2.6042e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1292e-05, -7.8390e-02,  2.4971e-05, -1.8169e-04, -1.8119e-05,
        -5.7029e-05, -5.2336e-02, -1.0780e-05,  1.2519e-04,  2.4176e-05,
        -3.8292e-02, -1.0762e-04, -6.0713e-05, -6.6127e-05, -7.0707e-05,
        -2.0233e-05, -5.3784e-03,  6.0944e-06, -2.4538e-05, -1.1921e-04,
        -1.3775e-05,  1.4563e-06, -3.1393e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7884e-05, -4.1651e-02,  1.0467e-04, -7.3040e-05, -2.3944e-05,
        -3.7156e-05, -4.5959e-02,  2.7059e-06,  4.5107e-05,  6.3839e-07,
        -5.2274e-02, -3.0103e-05, -6.7746e-05, -1.7109e-05, -1.5550e-05,
         3.2642e-05, -5.8985e-03,  4.7633e-05, -4.2215e-05,  1.6764e-04,
        -5.2474e-06, -1.9491e-02, -3.6412e-05,  7.6978e-05,  9.0823e-06,
        -2.4987e-05, -1.9822e-05, -2.8396e-05, -3.9048e-06, -2.2383e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1322e-04, -1.0345e-01,  1.2735e-04, -9.2317e-05, -4.6281e-05,
        -6.8200e-06, -7.6608e-02,  1.3219e-05, -3.2255e-04,  1.1750e-04,
        -3.5285e-02, -6.7885e-05, -1.6735e-04, -1.6848e-04,  5.3113e-06,
        -5.9682e-06, -4.5747e-02,  1.1423e-04, -1.4534e-04, -2.2659e-03,
        -4.1760e-05, -4.6924e-02,  1.0014e-05, -1.8025e-05,  5.8705e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3457e-04, -1.0678e-04,  2.6801e-05, -6.3332e-05,  9.0982e-06,
        -4.8190e-05, -1.7668e-01,  8.0212e-05, -4.5880e-05, -1.0476e-04,
        -6.9372e-05,  1.7142e-05,  6.5831e-06, -1.5180e-04,  9.0354e-05,
         5.9645e-05,  6.2943e-05, -5.4052e-05,  6.0972e-06,  4.5725e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3511e-04,  7.1654e-05, -9.6037e-06, -3.6586e-05,  5.5047e-05,
        -6.5960e-05, -1.2125e-01, -9.6117e-05, -2.2386e-05, -1.3557e-05,
        -2.8744e-03,  4.3019e-05,  4.9414e-05, -3.1725e-02, -1.5876e-04,
        -1.0046e-04,  3.9513e-05, -8.3961e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1550: [tensor([-7.8054e-05,  4.7649e-05, -1.5272e-04, -3.4543e-05,  4.3607e-05,
        -1.4843e-05,  1.6079e-07,  8.2289e-02,  3.7061e-05, -3.6713e-05,
        -4.6006e-05, -2.0511e-05,  5.0979e-02,  3.7032e-05,  2.7035e-04,
        -4.8313e-05,  1.0958e-05,  1.7216e-02, -8.3288e-06, -5.2196e-05,
        -1.2420e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1829e-05, -7.8505e-05,  6.0238e-05, -1.3053e-04, -7.7282e-05,
        -1.3841e-04, -9.2338e-05, -8.9561e-02,  3.2596e-06, -1.6379e-05,
        -2.9021e-05, -2.9261e-05, -6.4687e-02,  6.6059e-05,  1.8031e-05,
        -8.2471e-02, -4.0221e-06,  4.4171e-05, -1.3940e-04, -2.2391e-05,
        -6.9388e-02, -2.9761e-05, -6.6249e-05,  7.5368e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5108e-04, -6.4960e-06,  3.8167e-05, -4.4651e-05, -1.2192e-02,
         5.5408e-07, -1.0634e-01,  4.3217e-05, -8.9498e-02, -7.3159e-05,
        -2.5924e-05, -3.4780e-05, -8.4998e-05,  1.7175e-05, -5.8257e-06,
        -1.3943e-05, -3.9875e-05, -1.1868e-04, -1.4304e-05,  8.3435e-05,
         8.8657e-05, -3.1701e-02,  5.0107e-05,  5.9097e-05,  6.1747e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2564e-04, -1.3697e-05, -1.6842e-04,  5.1549e-05,  7.3521e-02,
         4.9362e-05,  2.4284e-02,  8.7208e-05,  9.3304e-02,  7.9266e-05,
         2.3080e-05,  5.2371e-05,  1.0848e-04, -3.4338e-05,  7.0069e-05,
         2.3137e-05,  7.9344e-06, -7.0927e-04, -6.7225e-05,  9.9796e-05,
         2.3270e-03, -2.9559e-05,  7.6172e-06,  5.1435e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4401e-05, -1.8218e-05, -1.9838e-06, -7.4835e-05, -1.0988e-01,
        -2.9552e-05, -3.4574e-02, -5.0372e-05, -6.8862e-02, -7.5022e-05,
        -1.2384e-04, -6.5276e-05, -8.2158e-05, -2.0546e-04, -1.2696e-04,
        -6.7933e-06, -1.3057e-04, -2.2730e-02, -2.0781e-05, -7.7377e-05,
        -7.8505e-05, -2.1711e-03,  1.9728e-05,  5.9989e-05, -2.4029e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7703e-04, -2.7533e-02,  2.1389e-05, -1.3246e-02, -1.2872e-01,
        -1.9774e-05, -7.0009e-05, -8.9423e-05, -4.2849e-05, -1.1346e-04,
         9.0001e-05, -4.1478e-05,  9.0086e-05,  3.1949e-05, -1.0717e-01,
        -7.8700e-05, -6.2973e-05, -3.6062e-05, -3.7171e-02,  4.1746e-05,
        -9.5763e-05,  1.3287e-05, -7.6863e-05, -1.0832e-05, -1.0282e-04,
         8.4797e-05,  4.0750e-05,  5.2126e-05, -7.3795e-03, -5.2853e-05,
         3.5949e-06, -3.8157e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3684e-04, -6.9996e-02,  2.9321e-05, -2.4839e-02, -5.3377e-02,
        -1.0142e-05, -2.8343e-05, -7.6013e-05, -4.2238e-05, -3.8700e-05,
        -2.7180e-05, -7.1523e-06,  6.7243e-05, -4.0957e-06, -3.6011e-02,
         3.6148e-05, -5.2683e-05, -1.2672e-05,  2.4352e-05,  3.4630e-04,
        -6.3289e-04,  2.9418e-06,  4.0360e-05, -3.3480e-02,  4.8960e-05,
        -3.1183e-05, -1.9006e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.8680e-05, -3.0451e-02,  3.4811e-05, -5.5847e-02, -7.9909e-03,
        -2.0490e-05,  3.1643e-05, -1.5474e-05,  1.9502e-05, -1.0568e-04,
         7.5270e-05, -4.4843e-05, -5.9934e-05, -5.2671e-06, -5.9604e-02,
        -1.0294e-05, -2.6418e-05,  2.1551e-05,  8.7772e-05,  3.1655e-05,
        -2.9841e-06, -7.2504e-05,  1.8423e-06, -1.8350e-05,  7.5689e-05,
        -4.9798e-05, -5.3737e-03,  4.8860e-05,  3.0291e-06,  3.5418e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7889e-05, -1.0798e-03, -2.9905e-05, -1.7253e-02, -3.9554e-05,
         2.4480e-05, -1.8254e-03, -1.1861e-01, -7.4482e-05,  3.2981e-05,
        -8.2070e-05, -4.1985e-05,  2.7987e-05, -4.7580e-05, -7.4879e-05,
         8.8429e-06,  6.9056e-05, -1.5037e-03,  1.4831e-05,  2.2925e-05,
         9.3645e-05, -1.1521e-02,  3.4273e-05, -4.3611e-05, -1.7525e-05,
        -6.4547e-02,  4.0059e-05,  4.6722e-06,  2.3269e-05, -4.4748e-05,
        -1.9555e-03,  1.0040e-05,  9.9816e-05, -1.4333e-05,  1.3833e-05,
        -2.4143e-05, -6.1721e-03, -9.2718e-06,  4.3971e-05, -3.4964e-06,
        -3.8800e-05,  2.6373e-04, -2.7160e-05, -2.9813e-05,  1.3182e-07,
        -1.9381e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9252e-04, -4.3250e-03,  5.0978e-06, -2.2375e-02, -8.2471e-06,
         5.4976e-06, -2.3773e-02, -6.3581e-02,  2.6179e-05, -1.1737e-05,
        -1.5375e-05, -1.4200e-05, -1.9269e-05, -5.9104e-05, -2.5251e-05,
         1.9129e-05,  3.4887e-06, -5.8400e-03, -1.4241e-05, -6.9031e-05,
         1.3613e-04, -3.8739e-02,  2.5565e-05,  2.5592e-05, -3.1328e-05,
        -9.6250e-03,  2.6661e-05, -1.1128e-05, -5.6648e-07, -3.4589e-05,
         1.7775e-04, -1.9186e-05,  9.6878e-05, -2.7349e-05, -1.3051e-02,
         2.9729e-05,  8.3625e-06,  1.0801e-05,  6.8250e-06,  1.1321e-05,
         1.5034e-05, -1.3150e-02, -1.6002e-02, -1.6690e-05, -9.3646e-06,
        -3.5811e-05, -5.5812e-06, -1.7027e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9867e-05, -3.7942e-02, -1.5063e-05, -3.8160e-03, -3.6334e-06,
        -1.6655e-05, -1.6942e-02, -1.1057e-02, -1.3861e-05,  6.3947e-06,
         1.2995e-05, -1.4330e-05, -8.8165e-06, -3.3452e-05, -4.7483e-05,
         2.4824e-05, -1.0929e-05, -1.0952e-03, -5.7753e-06,  2.2869e-05,
        -3.0787e-04, -5.0099e-02,  1.2689e-05,  1.4587e-05,  1.0468e-05,
        -7.8238e-04,  7.5911e-06,  1.6064e-05, -1.1332e-05, -1.0965e-05,
        -2.6915e-04,  1.3644e-05,  4.1344e-04, -2.7442e-02, -1.7844e-05,
        -4.7098e-02,  8.7769e-06,  4.2374e-04, -2.2336e-04, -3.8975e-06,
        -9.9916e-06, -4.0579e-05, -3.2680e-06, -2.5120e-05, -2.4752e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0824e-04, -1.3419e-03, -2.3698e-05, -2.3046e-02,  1.0862e-05,
         7.3423e-06,  6.4337e-04, -5.1110e-02, -1.8149e-05, -2.7311e-05,
         1.7638e-06,  2.3900e-05, -8.8303e-06,  2.8376e-06, -3.4446e-05,
         4.3952e-05,  1.7907e-06, -3.1502e-05, -1.3367e-05, -1.1470e-02,
         1.2463e-05, -1.2954e-05, -5.9667e-05, -1.1909e-05, -5.4239e-06,
        -1.1367e-05, -4.5755e-02, -2.5921e-05, -3.0629e-04, -1.7780e-05,
        -2.6752e-06, -2.0039e-05, -2.3671e-06, -2.5123e-05, -1.2570e-05,
         1.5746e-05,  1.8176e-07, -1.8566e-05, -6.8696e-06,  1.6131e-05,
        -1.2436e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1600: [tensor([-3.9147e-05, -6.9331e-05, -7.6278e-06, -1.7425e-03, -7.0337e-02,
         7.7658e-05, -3.8561e-05, -1.1303e-04,  2.3642e-05, -1.4854e-01,
         3.3847e-05,  2.3467e-05, -6.7316e-05, -5.0284e-05, -3.4008e-06,
        -5.4968e-05, -1.0482e-04,  2.1569e-05, -1.4394e-05,  1.1583e-05,
        -1.2483e-03, -8.8585e-05,  6.4671e-05, -2.0324e-05, -7.3557e-07,
         1.6626e-05, -6.7438e-03, -1.5297e-05, -6.4719e-05, -8.9825e-05,
        -2.1817e-05, -8.3023e-03, -2.0881e-07, -1.0950e-04,  6.3561e-05,
        -1.5170e-05, -1.5194e-02,  4.0716e-05, -9.0897e-06,  1.0681e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0250e-04,  7.3484e-05,  4.1789e-05,  7.5697e-05, -1.4127e-04,
        -3.7045e-05, -1.4422e-04, -8.0833e-05,  1.5488e-02, -8.8220e-05,
         6.6577e-06,  5.7789e-05,  2.0632e-05, -4.0614e-05,  9.5697e-05,
        -3.6251e-05,  1.0829e-01, -1.4506e-04,  8.7425e-05,  4.2184e-06,
        -1.5481e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3318e-04,  1.0108e-05, -5.2842e-05,  9.5804e-05,  6.4634e-05,
         2.5732e-05,  1.1917e-04, -1.1597e-05,  7.7467e-02,  8.6750e-07,
        -2.0211e-05,  2.8816e-05,  7.9219e-06,  4.4015e-02,  4.6812e-05,
         3.2247e-05,  1.0641e-04, -2.5948e-05,  2.6496e-05, -1.2713e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2480e-04, -2.1278e-06,  1.1507e-05,  2.6414e-05,  2.2896e-05,
        -1.2911e-05,  5.7355e-05, -5.1255e-05,  4.8413e-02, -4.9621e-05,
        -3.6635e-05,  6.8198e-07,  4.7469e-05,  6.0769e-06, -3.7492e-05,
        -2.1118e-04, -5.4763e-05,  2.5034e-05,  2.8788e-05, -4.3039e-05,
         2.3057e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3489e-05, -6.5789e-06, -3.7384e-05,  1.5999e-04, -4.6216e-05,
        -1.2551e-05,  1.2553e-04,  2.5910e-05,  9.0078e-02, -5.3404e-06,
         6.0321e-05,  5.5810e-06, -2.9550e-05,  3.7495e-05, -6.5387e-05,
         4.1529e-02, -2.9490e-05,  2.1805e-05, -2.5712e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5982e-04,  4.2792e-05,  4.4623e-05,  3.0955e-06, -5.1561e-05,
         4.8371e-05,  2.4200e-05,  2.0247e-05,  3.2470e-02, -3.8819e-05,
        -3.3915e-05,  3.2860e-05,  7.6655e-06,  3.3189e-02,  9.4794e-05,
         1.4597e-02, -1.8987e-05, -1.0773e-04,  8.6611e-02,  6.6492e-05,
         1.7678e-05,  2.4708e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6761e-04,  1.0810e-04, -9.1566e-06, -1.5122e-04,  6.7641e-05,
         1.6334e-05, -9.3141e-05, -1.9727e-04, -1.3162e-01,  2.2561e-05,
        -1.3546e-04,  9.8394e-05,  4.9503e-05, -4.0997e-02,  6.5884e-05,
        -4.3232e-02,  8.6058e-05,  2.1924e-05,  1.0501e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4972e-04, -5.0105e-05, -3.1626e-05, -3.3530e-05,  7.1082e-05,
         1.1508e-04, -2.3291e-07, -8.8821e-06,  1.1732e-02, -1.5998e-05,
        -6.9377e-06,  8.1268e-06, -1.8648e-05,  6.8586e-02, -3.8902e-05,
         5.1368e-05, -1.4135e-05, -1.7007e-05,  2.7665e-05, -1.1876e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1831e-04, -1.1851e-05, -3.1896e-05,  9.6195e-05, -6.1141e-05,
         5.3003e-05,  9.4887e-05, -1.4795e-04,  1.2777e-01,  3.0994e-05,
        -8.3879e-05,  1.0254e-04,  1.7888e-05,  1.7422e-02,  2.9747e-05,
         3.9684e-05,  3.7771e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6622e-05, -1.1062e-05, -2.9597e-05,  1.1681e-04,  1.4094e-04,
         6.6180e-05, -5.4640e-05, -1.2086e-04,  2.4921e-02, -2.1728e-05,
        -1.6405e-05,  2.9367e-07, -1.5584e-04,  1.0796e-01,  3.6558e-05,
        -7.3801e-05,  6.0224e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0779e-04,  2.3622e-05, -4.3902e-05,  9.8255e-05,  6.4929e-05,
         1.5630e-04, -3.6577e-05, -4.3903e-05,  5.7030e-05, -1.4341e-06,
         7.3304e-05,  3.7289e-05, -3.1185e-05,  1.3202e-04,  6.5518e-05,
         3.9988e-05,  1.5864e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1147e-06, -1.0969e-04,  2.5892e-05,  2.6021e-05, -2.7217e-05,
        -3.8284e-05, -4.6758e-05,  8.5022e-05,  1.3530e-02, -1.9790e-05,
        -2.6475e-05, -7.5847e-05, -8.2861e-05, -2.8895e-05, -5.5926e-04,
        -2.6124e-05,  8.0201e-02, -4.9244e-05, -1.1958e-05,  5.8484e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1650: [tensor([-9.7152e-05,  1.8058e-04,  4.1510e-05, -1.7908e-04, -4.1974e-05,
        -1.0088e-04,  7.4672e-05,  7.4791e-05,  7.4122e-05, -2.4718e-04,
         2.0351e-01, -1.8164e-04,  1.7844e-04, -5.9200e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8990e-04, -3.0527e-05, -4.0686e-05, -4.1120e-05,  3.9042e-05,
         1.1150e-01,  7.1257e-05,  6.5629e-05,  1.0606e-04,  2.8080e-05,
         7.2346e-05, -5.1755e-05, -2.8671e-05, -1.5189e-03,  4.1606e-02,
         8.8872e-06,  1.1154e-04,  2.0333e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8316e-05, -4.7427e-05, -3.4435e-05, -2.8282e-05, -5.7074e-06,
         6.2027e-02, -2.1050e-05,  4.1956e-05,  7.1983e-07,  2.4801e-05,
         2.0459e-02, -3.0148e-05, -1.5300e-04, -7.9849e-06, -3.6723e-05,
        -8.4047e-05,  8.1074e-03, -3.4640e-05,  4.7260e-06, -3.8443e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2813e-06, -1.5224e-05, -2.3014e-05,  4.8705e-05, -2.5016e-05,
         1.0255e-01,  1.5717e-06,  6.1466e-06, -1.6404e-05,  6.8781e-05,
         3.6399e-02, -5.9154e-06, -2.2575e-04,  1.4088e-05,  4.4486e-05,
        -1.8895e-04, -1.6929e-04, -4.1448e-05,  1.1012e-05, -1.6513e-05,
         4.0197e-05, -2.1498e-05, -8.1342e-05, -1.0042e-05, -2.4122e-05,
        -2.8204e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4451e-04, -2.6101e-02,  2.8156e-05,  1.3317e-03, -1.3451e-05,
         7.2533e-05,  2.1026e-05, -7.5743e-02,  9.9498e-05,  3.8607e-05,
        -3.2812e-06, -1.0813e-06,  6.5592e-05,  4.3877e-05,  6.4065e-03,
         3.2682e-06, -4.5134e-05, -4.9474e-04,  5.1578e-05, -3.3885e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9243e-04, -4.1600e-02, -6.7999e-07, -5.1745e-03, -1.2167e-05,
         2.1890e-06,  2.7655e-05, -6.4378e-03,  4.8867e-05,  8.6941e-06,
         3.4006e-05, -5.0980e-05,  2.4966e-05,  4.7297e-05,  2.4813e-05,
         1.8601e-05,  9.9646e-06,  1.2532e-05,  1.3007e-03, -3.3467e-06,
        -7.6577e-05, -2.6917e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4153e-04, -7.3095e-02,  2.4768e-05, -8.9082e-04, -1.3465e-05,
         3.7101e-05, -1.7375e-05, -3.2794e-02,  6.0795e-05, -5.3111e-05,
        -2.5106e-05,  5.5358e-05,  2.8201e-05,  1.7681e-05, -3.2751e-03,
         3.2249e-05, -2.8713e-06, -4.6019e-03, -4.7345e-05,  5.7062e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6878e-04, -6.5724e-02, -3.0227e-05,  4.7310e-05, -2.5215e-02,
         1.6591e-05,  3.7912e-04,  7.2318e-05, -2.4429e-05,  4.9341e-05,
        -3.1748e-05,  3.5717e-05,  3.6784e-05, -5.3335e-02,  4.6525e-05,
         6.7275e-05,  2.2635e-05,  5.1921e-05, -2.5792e-03,  2.3328e-05,
         2.7564e-04, -1.0988e-05, -2.2914e-02,  3.6887e-05,  7.3964e-06,
         4.1649e-05, -3.6923e-05,  2.5264e-05, -5.1745e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6240e-04, -6.3641e-02,  1.6194e-05,  9.6437e-04, -3.2174e-02,
         3.3932e-05,  7.6800e-04,  3.7355e-05,  1.4254e-06,  3.0511e-05,
        -7.9999e-06, -5.2265e-05,  5.2211e-05, -3.9934e-02,  1.9905e-05,
         8.1516e-05,  1.6975e-05,  8.1005e-06, -9.6606e-03,  1.5288e-05,
         8.1178e-04, -1.5618e-05,  2.8113e-05,  1.7401e-03,  5.2761e-05,
        -1.7019e-05,  4.7554e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3632e-04, -1.5946e-02, -5.5396e-05, -6.7475e-03, -4.0968e-02,
        -4.2185e-05,  4.3505e-04, -8.2421e-06, -4.8077e-05,  8.2225e-05,
        -1.5477e-05,  1.2430e-05,  2.2065e-05, -4.5026e-02, -2.0643e-05,
         3.3984e-05, -5.2169e-06,  3.2412e-05, -1.3452e-02,  4.4938e-05,
         8.0952e-04, -9.1953e-06, -3.7454e-02, -7.3786e-06,  2.2894e-04,
        -2.8501e-02, -2.2625e-05,  6.4331e-06,  3.5081e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3702e-04, -4.9043e-01, -8.6619e-05,  6.6463e-04,  7.4913e-05,
         2.9014e-05,  3.1289e-03, -1.4966e-04, -5.6776e-05, -2.6026e-05,
        -2.2119e-04, -1.6741e-03,  9.2575e-05,  2.3031e-04,  8.0960e-04,
         1.2151e-04,  7.9948e-05, -1.6773e-05,  4.6297e-05,  2.6702e-05,
        -9.2534e-05,  3.8923e-05,  2.9406e-05, -5.3343e-02,  9.5038e-05,
         1.0796e-04, -2.4199e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5652e-04,  9.1853e-02,  7.0050e-05,  2.0972e-03, -3.4676e-05,
         3.0262e-06, -4.4789e-04,  5.4966e-05, -1.5913e-05,  3.4948e-05,
         1.1975e-05,  7.6461e-04,  2.8237e-05, -2.8118e-03, -2.6282e-05,
        -2.3048e-06,  8.3054e-06, -3.3176e-05, -6.2189e-05, -7.1393e-05,
         1.7948e-05,  2.0098e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1700: [tensor([ 1.3744e-04, -6.8918e-02, -3.2081e-05, -6.5556e-05, -2.1470e-05,
        -6.7187e-05, -7.5122e-05,  2.3399e-05, -3.0621e-05,  3.2755e-03,
         4.4705e-05,  4.1573e-05, -8.0672e-05,  3.3011e-05, -5.5077e-02,
        -1.7177e-05, -3.7282e-06,  1.3100e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0282e-04, -6.3515e-02, -4.9125e-05, -1.2236e-05, -2.2055e-05,
        -1.5326e-06, -3.6570e-05,  3.5648e-05,  1.3076e-05,  3.8219e-03,
         1.0447e-05, -9.3029e-06, -8.5192e-06, -8.3241e-06, -2.5942e-02,
        -3.7272e-06, -1.4287e-04, -7.6632e-06, -6.3736e-06, -3.2947e-05,
         1.8043e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4397e-05,  7.2466e-07, -3.5651e-05,  9.5213e-06, -5.6962e-05,
        -1.5506e-01, -4.3123e-05, -5.6501e-03, -1.4223e-05,  8.3788e-04,
         3.8933e-05,  4.2186e-05, -1.4601e-06,  2.4393e-05, -5.7897e-02,
        -1.0029e-04,  6.5146e-06,  4.0849e-05, -2.3980e-05,  3.6715e-04,
        -1.9003e-05,  2.5908e-06, -3.1907e-02, -2.3520e-05, -4.9572e-05,
         6.2081e-04, -1.7770e-03, -8.3208e-05, -5.7608e-05, -1.3914e-04,
        -3.4565e-05, -9.6513e-05, -6.1703e-05,  3.2215e-06, -3.5101e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9628e-04,  2.1570e-05,  4.2514e-06,  1.5106e-05,  8.0421e-05,
        -6.6490e-02, -2.5621e-06,  2.1946e-03, -1.8250e-05,  2.2271e-03,
        -2.4138e-05,  3.2717e-05, -1.5825e-05, -9.7955e-06, -2.0337e-02,
        -2.0904e-05, -3.3389e-05,  7.5548e-06, -5.7424e-05,  7.7569e-04,
        -7.1108e-05,  3.2839e-05, -8.1333e-06,  5.2664e-06, -5.0139e-05,
         1.2002e-05, -4.4003e-05, -1.3223e-06,  7.5996e-04,  5.5272e-06,
        -2.8013e-05, -1.0505e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0709e-04, -7.4899e-05, -9.2135e-05,  9.8018e-05, -3.0760e-05,
        -1.3268e-01, -4.8362e-05,  1.6890e-03, -4.8188e-06, -9.3467e-04,
         2.0204e-05,  4.4061e-05, -1.0725e-05, -3.6996e-05, -5.8194e-02,
        -3.5441e-05,  2.3501e-05, -5.1237e-06, -1.4985e-05,  4.7797e-05,
         6.2899e-06,  1.1323e-03, -6.2675e-05, -2.7281e-05, -8.8541e-06,
        -1.0033e-04,  7.3834e-04, -3.9641e-05,  1.1263e-05, -6.2713e-05,
        -1.3811e-02, -5.4155e-05, -4.6260e-05, -1.7917e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6825e-05, -5.5725e-03, -2.0890e-05, -2.6840e-05, -9.7878e-06,
         1.3594e-05, -8.7353e-03, -3.2684e-06,  3.7721e-05,  4.0228e-06,
        -6.4037e-06, -4.1560e-02, -4.4567e-05, -7.3794e-05,  1.5770e-05,
         1.4254e-05,  1.4188e-03,  8.4051e-06, -4.0076e-05, -5.3744e-06,
        -7.6427e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5928e-04, -4.5900e-02,  1.8187e-05, -1.3407e-05, -3.0393e-05,
         1.3683e-05, -3.6529e-03, -3.9139e-06, -3.4481e-05, -2.6061e-05,
        -2.5173e-06, -4.2440e-02, -2.7444e-05, -3.1273e-05, -3.9169e-05,
         1.1986e-05, -4.5845e-05, -5.7184e-05,  3.5078e-05,  1.0228e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3989e-04, -5.6164e-02, -3.0703e-05, -5.6602e-05,  3.8398e-06,
        -1.9073e-05,  2.8232e-03, -2.8267e-05,  6.7318e-05, -6.9089e-05,
        -1.4888e-05, -3.6698e-02, -8.0408e-06, -7.5667e-05, -2.4526e-05,
         2.1951e-05,  8.4406e-04, -1.0060e-05,  1.5690e-05,  3.0617e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0750e-04, -1.6266e-05,  7.1394e-06, -6.6305e-05, -3.0791e-05,
         3.4499e-06, -2.2612e-05, -1.2933e-05,  6.1796e-05, -3.2121e-06,
        -3.0528e-05,  3.1726e-05,  1.2862e-01, -1.6483e-05, -4.9111e-05,
        -1.7616e-05, -3.4299e-05,  3.2658e-04, -8.5586e-05, -3.9680e-05,
         4.6520e-03, -2.1879e-05, -3.7873e-06, -9.0827e-06, -8.1505e-08,
         7.6770e-05,  6.5767e-04, -3.0404e-05, -4.2845e-05,  1.4830e-05,
         3.3858e-05, -6.9853e-06,  1.4536e-05, -7.5276e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1022e-04,  3.0018e-05, -1.6663e-05,  7.3610e-05, -4.0238e-06,
         2.0486e-05,  9.1268e-08,  1.7720e-05, -1.9254e-05, -3.3373e-06,
         1.0056e-05,  5.3203e-02, -7.8868e-05,  1.0673e-05, -2.4638e-05,
         1.0277e-05, -4.4754e-05, -2.2689e-05, -1.2248e-05,  3.1318e-05,
        -6.0410e-06,  4.4997e-02, -1.1260e-05, -1.6902e-04,  2.0150e-02,
         5.6135e-06, -1.4355e-06,  2.4075e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4517e-04,  1.2534e-04, -7.7957e-05,  1.8564e-04, -1.7896e-05,
        -1.6270e-05, -8.4418e-06,  1.1682e-06, -1.5257e-04, -8.0408e-05,
        -9.6836e-05, -1.1849e-01, -6.6500e-05, -7.8022e-06,  3.8999e-05,
        -1.1209e-04, -7.5496e-05, -2.8341e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5323e-04,  6.5209e-05, -1.1225e-04, -5.4587e-05,  9.2132e-05,
         1.3501e-04,  2.4419e-04, -2.1649e-01,  5.5703e-05,  4.3734e-06,
         1.1659e-04, -1.6223e-05, -1.2961e-04,  6.8841e-03,  1.5640e-05,
         4.7890e-05,  2.0677e-04,  1.1381e-03,  2.2606e-05, -3.4041e-05,
        -2.1784e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1750: [tensor([-3.2270e-05, -1.5877e-02,  3.6423e-06,  2.7958e-05, -1.6020e-02,
        -4.6622e-06,  1.6340e-05, -3.4262e-06, -2.8812e-06,  4.0710e-06,
         7.2639e-05, -3.7725e-06, -1.9257e-02,  1.9702e-06, -2.1057e-05,
        -8.4219e-06,  4.5352e-04,  2.0352e-05, -1.6133e-05, -1.3500e-05,
        -9.3252e-06,  1.7341e-03, -3.5564e-06, -1.6457e-05,  1.0210e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8897e-04, -2.1963e-02, -1.5611e-06,  2.3707e-05, -2.0862e-02,
        -2.6085e-05,  1.9617e-05,  1.2169e-05, -2.1795e-05, -3.9735e-07,
         5.1968e-06, -1.0184e-05, -2.5077e-02, -1.1844e-05, -2.3655e-05,
        -2.9785e-05,  2.1817e-03,  6.4984e-06,  3.3467e-06, -8.0683e-06,
        -2.0592e-05,  6.1497e-06,  4.3623e-06,  2.2660e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1833e-04,  2.0964e-05,  7.1648e-06, -1.7044e-05, -1.2699e-03,
         3.8060e-05, -6.1182e-03, -1.7817e-02,  2.2247e-05,  1.9733e-05,
        -5.6796e-03, -5.2508e-03,  3.0004e-05, -2.0123e-05, -1.0451e-05,
         1.3446e-05,  1.0936e-05, -2.1871e-06,  1.6915e-05,  2.0427e-03,
         2.1466e-03,  1.4655e-05,  6.1554e-03, -8.5883e-06,  9.9225e-06,
         2.4845e-03, -8.2013e-03,  2.2095e-05,  2.1054e-05, -6.2558e-06,
        -1.6052e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.1784e-05, -4.1669e-06, -4.0950e-06,  3.6881e-06,  7.7478e-04,
         7.0785e-06,  4.9877e-04, -1.0117e-02,  1.8223e-05,  8.2547e-06,
         1.2506e-03, -2.3255e-02,  2.4708e-06, -9.5145e-06, -1.4402e-05,
        -1.1881e-05,  1.3991e-05,  1.8952e-04, -5.0229e-05,  1.1900e-05,
         3.1684e-03,  1.8047e-05, -5.2203e-06, -8.1006e-06,  1.0318e-05,
        -6.6611e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7204e-05, -2.5846e-05,  3.3642e-05, -3.5585e-07,  8.1584e-04,
         4.6584e-06,  3.2470e-03, -2.7027e-02,  1.2048e-05, -3.3370e-06,
         5.7632e-04, -1.5617e-02,  3.4950e-05,  1.4600e-05, -4.3059e-06,
        -6.8359e-07, -1.6985e-06,  1.4209e-04,  7.7375e-06,  2.6457e-03,
        -2.2039e-02,  9.5208e-06,  2.9316e-06,  9.2571e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1419e-04, -1.3133e-02,  9.2071e-06,  1.5839e-06,  1.3216e-03,
         1.7695e-06,  1.6839e-05,  7.5649e-06,  1.1307e-03, -1.0851e-05,
         3.6571e-06, -1.3963e-05, -1.5791e-02,  2.0782e-05,  5.2443e-06,
        -1.2927e-05, -1.3539e-03,  1.1261e-05,  9.8676e-04, -2.7475e-05,
         1.9649e-05, -6.8630e-06, -8.8643e-03,  1.3693e-05,  1.3644e-05,
        -6.5002e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3760e-05, -3.4686e-02, -8.8963e-07, -2.1306e-06, -7.5472e-04,
        -2.2032e-05,  3.2133e-05, -7.9079e-06, -9.8077e-04, -5.4297e-05,
         3.8433e-05, -2.5132e-05, -3.7383e-02, -1.0945e-05, -4.3208e-05,
        -2.5769e-05, -4.5847e-03,  1.3410e-05,  9.6124e-05,  5.8583e-06,
        -5.8788e-05, -4.5089e-05, -1.7340e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5822e-05, -3.5450e-02,  1.1088e-05,  1.9696e-05,  1.3231e-03,
        -1.9370e-05,  5.8563e-06, -4.5895e-06,  8.4305e-04, -1.8072e-05,
         1.4516e-05, -2.5713e-05, -1.3222e-02,  1.6406e-05, -1.3935e-05,
        -7.9681e-06,  1.8284e-03, -3.4024e-05,  4.2050e-04, -2.4085e-05,
         1.6627e-04,  7.2828e-03, -1.1877e-05,  2.9422e-05, -1.2262e-05,
        -5.3225e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1211e-05, -6.8912e-05, -2.5594e-03, -1.8471e-05,  7.9492e-05,
        -3.8902e-05, -1.3277e-02,  1.2020e-04, -3.9958e-05, -3.7827e-05,
        -7.0344e-05, -1.2515e-01,  1.6537e-06, -3.3224e-05,  5.9274e-05,
         3.2438e-06,  1.9332e-03,  1.0637e-05,  6.0882e-06, -1.0652e-05,
        -9.9220e-02, -1.8966e-05, -9.3197e-06,  2.0723e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6158e-05, -4.4261e-05, -4.9797e-03, -2.4050e-06,  2.8036e-05,
        -3.3877e-06, -8.7146e-03,  6.7285e-05,  1.7724e-05,  3.3622e-05,
         2.9786e-05, -5.1406e-02, -5.8312e-06, -4.1850e-06, -1.2679e-05,
        -5.7898e-05,  1.0999e-04, -4.2316e-05, -1.5664e-03,  4.8881e-05,
         2.6273e-05, -7.5421e-06,  4.5444e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3546e-04, -4.5790e-05,  1.0572e-03, -1.3345e-05,  1.9987e-05,
        -6.6718e-05,  2.5684e-03, -1.6587e-05, -1.1862e-05, -3.4649e-06,
        -1.8524e-05, -7.2843e-02, -4.5605e-05,  3.8635e-06, -1.1284e-05,
         6.4037e-05, -5.7692e-06,  2.4174e-03, -8.9108e-05, -6.4442e-06,
        -5.0858e-02,  8.2190e-06,  5.2822e-05, -2.4525e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9728e-05,  9.5408e-06,  5.3247e-06, -3.9995e-05,  2.6386e-03,
         7.5945e-06,  4.1841e-06, -1.4125e-05, -1.1920e-05, -1.3986e-05,
        -2.0907e-02, -2.4918e-05, -5.0325e-05, -2.9496e-02, -2.3160e-06,
         1.7302e-06, -2.2582e-05,  4.3453e-06, -2.0124e-02, -2.1734e-05,
         3.3184e-03,  1.1123e-05, -1.0839e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1800: [tensor([-1.3405e-04, -7.4324e-02,  1.9313e-05,  5.3302e-06,  3.8809e-05,
         2.6749e-05,  1.2999e-05,  5.1565e-05,  2.5820e-05,  5.9512e-05,
         3.2913e-03,  1.3489e-05,  1.7321e-05,  1.2489e-05,  6.0612e-05,
         1.5904e-04,  3.5489e-05,  3.4913e-03, -2.0990e-02, -2.4668e-05,
        -2.1924e-05,  2.0942e-05, -4.8656e-02, -3.4604e-05, -3.5305e-06,
         1.8309e-05,  1.8138e-05,  6.7340e-06,  7.5283e-05,  5.3701e-06,
         1.2575e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9713e-05, -8.4910e-02,  1.1859e-05,  9.4125e-05, -9.8559e-06,
         1.3882e-04,  1.0997e-05,  6.0726e-05,  1.1026e-04,  1.7159e-05,
         2.3268e-03, -4.3405e-05,  6.6608e-05, -2.4032e-07,  1.4587e-04,
         2.5584e-04,  7.8901e-05,  2.1482e-03, -3.4940e-02,  5.3133e-05,
        -5.1913e-05,  2.6208e-06, -1.2617e-01, -6.8247e-05,  8.8061e-05,
        -2.3517e-05,  5.3352e-05, -5.6485e-05, -6.5148e-05,  6.1558e-05,
         6.2983e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5577e-04, -7.4563e-02,  4.5606e-05,  2.1711e-05,  3.0052e-05,
         8.4840e-06,  4.6560e-05,  1.0946e-05,  3.5064e-03,  4.1760e-05,
        -3.5850e-05,  2.2800e-05,  1.0521e-05, -5.0799e-02,  3.1393e-07,
        -1.5547e-05, -1.8446e-05, -3.0899e-02, -1.7492e-05, -9.4783e-05,
         2.4070e-05, -5.4095e-05, -2.4140e-05, -5.9116e-03, -1.4421e-05,
         2.1238e-04, -5.1306e-05,  1.7016e-05,  6.9418e-05,  2.7754e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3465e-05, -4.3947e-02,  6.4108e-05, -1.1789e-05, -9.5621e-06,
         9.4294e-06,  1.4638e-06,  1.2156e-06, -2.1798e-02,  2.6985e-05,
         1.9142e-05,  5.9052e-06,  5.7305e-05, -2.1631e-02,  6.0975e-06,
         2.1527e-05, -1.9400e-05, -1.1331e-02,  4.6319e-05, -1.3136e-04,
         3.5807e-05, -2.5894e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4079e-04, -2.9602e-02,  3.6247e-06, -2.1266e-05, -2.4809e-05,
         3.7181e-05,  5.4678e-06,  2.7084e-05, -4.0133e-02,  3.5886e-05,
         2.5443e-05, -1.8600e-05,  2.3074e-05, -3.3273e-02,  5.5024e-05,
         6.4037e-05,  4.9687e-05, -1.8075e-02, -2.9747e-05,  2.2777e-03,
        -1.5856e-04, -1.0289e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6504e-05,  1.3016e-04, -4.3316e-05,  1.1238e-05,  1.7048e-03,
         3.4372e-04,  1.6518e-04, -1.0193e-04,  8.9977e-05, -1.2756e-01,
         4.3213e-05,  1.1767e-04,  8.5810e-05,  7.0934e-05,  1.0610e-05,
         4.2069e-03, -6.0195e-05, -4.6125e-05, -1.4715e-01, -1.0803e-04,
        -8.0751e-05, -1.9196e-05,  2.9052e-03, -6.6554e-02, -4.2165e-05,
         7.3706e-05, -1.4972e-05, -3.7004e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5884e-04, -1.6628e-05,  1.2130e-05, -9.9806e-06, -9.5767e-03,
         5.0872e-04, -2.6180e-05,  4.1965e-05, -7.0485e-05,  7.3579e-02,
        -2.5142e-05, -1.8159e-05, -2.0178e-05,  6.0675e-05, -1.8203e-05,
        -5.0518e-03,  4.0515e-05, -2.0245e-06,  3.2138e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6504e-05, -1.2391e-05, -1.1943e-05, -3.4521e-05, -2.2233e-04,
         5.0487e-04, -1.7413e-05,  6.9717e-05, -1.3805e-04,  1.6439e-01,
        -1.0143e-05, -4.8724e-05,  1.7018e-06, -6.3367e-05, -9.1534e-05,
         3.8610e-05, -4.3899e-05, -6.2021e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9566e-06,  4.9195e-05,  4.8803e-05, -2.1189e-05, -6.8547e-06,
        -4.6420e-05,  3.4085e-05, -1.2798e-02,  2.4020e-05,  2.9932e-05,
        -1.9505e-05,  1.5985e-05,  1.4507e-05, -2.5468e-06,  1.5476e-03,
         3.4186e-05,  2.8961e-05, -3.6821e-02, -3.7625e-06,  2.1408e-06,
        -1.0440e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7375e-04,  5.7017e-05,  6.8920e-05, -4.2370e-06, -1.1827e-05,
        -8.3189e-05,  2.3029e-05, -6.7827e-02, -1.3087e-05,  1.9898e-05,
         5.3096e-05,  2.9044e-05,  4.4404e-05, -1.2327e-05,  1.5590e-03,
        -1.5026e-05,  3.1138e-03, -3.7613e-02,  3.4459e-07,  3.1342e-05,
        -1.1993e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0787e-04,  4.1158e-05,  2.5725e-05,  3.3619e-05, -1.9638e-05,
        -4.3003e-05,  3.7671e-05, -1.9502e-02,  4.7393e-05,  3.4124e-05,
        -1.4061e-05, -1.4545e-05,  5.7673e-05,  3.9246e-05,  6.3972e-03,
         4.2709e-05,  3.5064e-05, -2.4059e-02,  1.5518e-05, -2.1404e-05,
        -1.8297e-03,  2.5046e-05,  1.2778e-05, -2.1166e-05,  1.2373e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5591e-05, -1.7245e-01,  4.0261e-06,  7.4843e-05, -1.2769e-04,
         1.5793e-04,  1.0181e-04,  2.7930e-04, -1.0170e-01, -4.0856e-05,
        -2.0178e-04,  4.3911e-05, -3.1583e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1850: [tensor([-6.4520e-05, -3.8126e-01,  7.2074e-05,  1.5731e-05,  7.7341e-05,
         2.5981e-03,  2.0335e-04,  8.8624e-06,  2.1223e-05,  6.9314e-05,
         8.8655e-04,  2.4877e-04,  9.7434e-05,  4.2662e-05,  1.5257e-04,
         5.6077e-05, -1.1226e-05,  1.9040e-04, -1.2306e-01,  6.1617e-05,
         3.9548e-05,  3.1721e-03, -1.1324e-05,  9.8516e-05, -5.7754e-05,
        -1.0002e-04, -1.8253e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4197e-05,  5.3451e-02, -5.2325e-05, -1.8343e-05,  4.9961e-06,
         7.7220e-04,  8.3707e-05,  1.4654e-05, -1.1391e-04, -1.0738e-05,
         1.2434e-05,  3.2440e-06, -1.0831e-04,  6.2408e-02, -1.5631e-05,
        -2.5565e-06, -2.8685e-06,  9.8260e-06, -4.4003e-03, -7.0652e-05,
         9.6263e-06, -5.8717e-05, -1.6411e-05, -5.0898e-06,  1.3597e-05,
         2.0167e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0525e-04,  7.2259e-03,  3.2630e-05,  1.5627e-04, -6.4077e-05,
         1.7465e-04, -2.0063e-05, -7.7710e-06,  2.7181e-04,  1.9421e-05,
        -6.7712e-05,  1.9283e-05, -1.4847e-05,  4.3369e-03,  4.9751e-05,
        -9.2795e-07,  1.5653e-05,  6.9045e-05, -1.1899e-01,  3.1444e-05,
        -9.6812e-06, -7.0819e-02, -5.3973e-05, -1.9520e-05,  3.4600e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1943e-05, -2.3163e-03,  5.7634e-05,  2.4361e-05, -1.7792e-04,
        -5.6065e-04, -1.7164e-04,  5.3131e-05,  1.8077e-04,  9.3160e-05,
         1.3495e-04,  1.3213e-04, -1.2208e-04, -2.7089e-01,  9.6254e-05,
        -6.8440e-05,  1.2393e-04,  2.8294e-04, -9.7206e-02, -7.0137e-05,
        -2.5088e-05, -5.2842e-05,  1.4609e-04,  4.8707e-05,  6.7276e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9815e-05, -1.5782e-01,  3.3397e-05, -6.2477e-05, -7.3428e-02,
        -2.2779e-05,  3.2851e-05,  5.0796e-05,  1.1522e-04,  1.4672e-04,
         1.1683e-04,  8.7065e-03,  4.7013e-05, -1.2347e-05,  4.4401e-05,
         2.2266e-06, -9.1683e-03,  2.0079e-05, -2.6827e-05,  1.1293e-04,
        -9.5476e-05,  6.2373e-03, -1.9877e-05,  1.2577e-06,  1.7634e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4104e-06, -2.2164e-01,  9.5724e-05, -1.5726e-04, -8.2447e-02,
        -1.1787e-04,  6.7247e-05,  4.5894e-06,  1.6844e-04,  4.5630e-05,
         1.5291e-04, -4.3218e-02,  6.2016e-05, -9.6612e-05,  3.8157e-05,
         8.7719e-05, -5.8851e-02,  6.2679e-05,  4.5326e-05,  2.3743e-04,
         7.1955e-03, -5.8766e-02, -7.2123e-05, -1.6944e-04, -9.0612e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5300e-04, -1.3429e-01, -1.3590e-04, -1.0547e-05, -4.4309e-02,
        -4.6774e-05,  1.6257e-04,  1.0299e-05,  1.7460e-04,  1.3657e-04,
         3.7356e-06, -1.0380e-01, -1.4981e-05, -5.6016e-05,  6.5736e-06,
        -6.0742e-05, -1.3307e-01,  3.1919e-05, -1.1127e-04, -7.0155e-05,
        -3.3765e-05,  2.6722e-04, -3.1049e-05,  1.1097e-04, -8.2924e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8858e-04, -2.0124e-01,  1.2137e-04,  1.0087e-04,  1.7547e-05,
         1.2186e-04, -1.3787e-02,  1.4066e-04, -1.1449e-04, -6.5129e-05,
        -1.9717e-05,  4.5569e-06, -7.0748e-02,  2.4337e-05,  2.1948e-04,
         2.3671e-05, -1.1298e-01,  9.4630e-05, -1.4074e-04, -1.5922e-05,
        -2.1800e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9486e-04,  1.0650e-01, -2.9831e-05, -3.1115e-05,  2.0040e-04,
         9.9659e-06,  7.2100e-02, -2.5999e-05,  6.9903e-06,  8.9880e-05,
        -4.5240e-05,  3.0967e-05,  1.7093e-05,  1.3454e-06, -4.4326e-05,
         8.1479e-06, -5.3335e-05, -1.2410e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3069e-05,  5.7819e-02,  1.0591e-05, -1.0615e-04, -5.1064e-05,
        -3.4007e-05,  6.6929e-02, -8.7974e-05,  8.6891e-05,  9.5108e-05,
        -2.5804e-05, -3.3838e-05,  4.4004e-02,  6.1563e-05, -9.0255e-05,
        -1.7451e-05,  5.8513e-05,  4.1405e-02,  1.2096e-04, -1.0864e-04,
        -2.0042e-05,  1.1913e-04,  2.6180e-05,  1.9919e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0228e-04,  2.0676e-01,  6.8783e-06, -5.1872e-05,  1.0809e-04,
        -1.7351e-04,  4.5781e-02,  1.5934e-04, -4.1679e-05, -4.6793e-05,
        -4.2304e-06,  7.6923e-06, -2.2926e-04, -1.6358e-04, -1.1635e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3078e-04, -5.2907e-02,  1.6410e-05, -1.2147e-05, -3.2902e-06,
         4.7190e-05,  1.9004e-02,  1.0438e-05, -2.7701e-05,  5.9540e-05,
        -4.5096e-02,  1.1304e-05,  6.3793e-05,  1.3009e-04,  5.5409e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1900: [tensor([-1.3168e-04,  1.6171e-05,  8.3604e-06, -1.5680e-05, -3.2578e-05,
         3.1309e-05, -1.9998e-05,  3.1628e-02,  8.2157e-06, -1.3620e-05,
         6.2710e-06,  3.2691e-02, -4.0524e-05, -4.4151e-05,  1.1075e-05,
         1.1613e-02,  1.8531e-05,  1.7312e-05, -2.0875e-05,  1.5160e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3788e-05, -7.8240e-02, -7.0846e-06, -6.1390e-05, -8.5227e-06,
         1.4403e-03, -5.6141e-05, -3.6290e-05,  3.7667e-03,  6.2406e-05,
        -4.0214e-05, -3.4986e-05, -4.7989e-02,  3.5621e-05, -3.7025e-05,
        -3.6020e-02,  3.8815e-05, -3.1032e-05,  7.1442e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2323e-04, -2.9550e-02,  5.2520e-06,  4.0014e-06, -1.9689e-05,
         1.8755e-04,  1.5709e-05, -3.6785e-05, -4.5291e-02,  1.7126e-05,
        -5.9609e-06,  1.6223e-05, -2.6071e-02,  1.1205e-05, -6.0828e-05,
        -3.3310e-02, -2.4284e-05, -1.0551e-05,  5.6420e-07,  5.5743e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6071e-05,  7.1736e-02, -1.2651e-05,  8.8156e-06, -1.8294e-05,
         2.4961e-04, -2.1246e-05, -2.3930e-05,  4.8874e-02, -2.8556e-06,
        -2.7065e-05, -5.5292e-05,  1.8433e-02, -7.9311e-06,  6.1363e-05,
         1.7576e-02,  1.1338e-05,  3.5840e-05,  2.9841e-02, -1.0888e-05,
        -5.4001e-05,  8.2251e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6208e-04, -1.3038e-05, -6.8616e-05,  1.7235e-05,  8.9981e-02,
         8.0218e-06, -2.8041e-05, -2.5165e-05,  2.3819e-05, -5.3636e-05,
         8.8921e-05,  4.6940e-02,  2.9914e-05,  1.5244e-04, -1.3740e-05,
        -4.3290e-05,  9.1362e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2102e-05,  2.7871e-06, -2.0435e-05, -4.0705e-05,  2.2217e-02,
        -2.9097e-06, -8.8111e-06,  1.1213e-05, -9.6121e-06, -8.5843e-06,
         2.0135e-06, -3.2082e-03,  6.7265e-05, -1.5973e-05,  2.6667e-05,
        -7.5393e-06,  2.8241e-02, -4.5260e-05, -1.2100e-06, -1.8246e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8221e-04, -3.8178e-05, -8.7826e-06,  1.1769e-05, -6.4581e-02,
        -7.4621e-05, -2.5728e-05, -4.4714e-05, -8.0398e-06, -7.2564e-02,
         9.3903e-05, -2.2384e-05,  5.1421e-05,  3.6423e-05, -3.8590e-02,
        -4.5804e-05, -6.5664e-06,  1.4043e-05, -7.7555e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7954e-04, -2.9791e-05,  6.4499e-06, -2.2183e-05, -6.9012e-04,
         5.1687e-05, -1.8632e-05,  5.8962e-02, -3.7619e-05,  3.2037e-06,
        -1.7963e-05,  2.8138e-05,  8.5235e-03, -4.1217e-05, -1.0658e-02,
         2.3493e-05,  9.3599e-06, -7.2312e-03, -1.8423e-05, -1.5062e-05,
        -5.7889e-05,  4.1876e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4080e-05,  9.8602e-05,  9.2078e-05,  1.0319e-04,  8.9768e-04,
        -2.5011e-05, -5.3901e-05, -8.7845e-02, -1.8397e-05, -1.1980e-06,
         4.1953e-05, -3.0346e-05, -9.2994e-02, -2.1967e-05,  7.1171e-05,
         1.9544e-04, -4.0877e-05, -6.6481e-02, -8.6992e-05,  7.6861e-05,
         6.6437e-05, -3.1695e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4606e-05, -1.1776e-05, -8.4563e-05, -7.8470e-05, -1.9106e-04,
         6.5313e-05, -9.4646e-05,  5.2023e-02, -2.2585e-05,  1.5029e-05,
        -1.2895e-05,  2.9293e-05,  5.5778e-02, -5.8519e-05, -2.9216e-05,
        -9.4341e-05,  1.0914e-02, -2.5549e-05, -2.5914e-05, -1.6373e-05,
         6.1427e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9150e-05,  1.0270e-01,  2.1534e-05, -2.6144e-05,  5.8105e-04,
         2.1242e-05,  5.8953e-05,  6.8978e-05,  1.8221e-05, -1.3412e-05,
        -5.4391e-06, -6.2681e-06,  8.9008e-07, -1.7072e-05, -2.2548e-05,
         5.2318e-03,  1.6442e-05, -3.6982e-05,  1.0738e-04, -2.4151e-06,
         6.0704e-02,  5.2491e-05, -4.0914e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3662e-05,  7.8444e-02, -2.8996e-05, -2.4680e-05,  7.7173e-02,
         1.2699e-05, -7.4458e-06,  2.6034e-05,  5.9220e-05, -1.4695e-04,
        -1.1393e-06,  3.5153e-05,  3.0737e-05,  8.7817e-06,  1.8416e-05,
        -1.4619e-03, -3.5014e-05, -9.6878e-06,  2.5174e-05,  4.4567e-05,
         2.1007e-02, -1.4074e-05, -4.7131e-05, -1.5840e-05, -1.4552e-05,
        -1.3895e-05, -7.4878e-05,  2.6814e-05,  1.3046e-05,  1.2606e-05,
         1.9746e-05,  3.3733e-06], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1950: [tensor([-8.3737e-05, -6.4616e-02, -5.4744e-05,  2.1876e-05, -3.8648e-05,
         1.8722e-05, -4.9227e-05, -3.5709e-05, -1.5270e-02, -4.6176e-05,
        -6.2646e-06,  4.5169e-05, -2.9519e-05, -6.3083e-02, -9.3919e-05,
         8.1062e-05, -6.9539e-05,  1.3139e-05, -9.6706e-02, -8.9405e-06,
         4.9781e-05,  2.0474e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9869e-04, -8.9497e-02,  8.3686e-05,  2.1976e-05, -9.8511e-02,
         1.2125e-04,  2.4710e-05, -5.6363e-06,  5.0662e-06, -1.9812e-06,
         2.3503e-04,  4.9729e-05,  3.6733e-05,  1.2574e-05, -3.4554e-06,
        -7.2556e-02,  1.3979e-04,  2.2357e-04,  4.3340e-05,  3.0355e-05,
         1.0435e-04,  5.3247e-03, -8.1292e-05,  7.6004e-05, -1.0869e-01,
         1.1809e-04,  6.7542e-04, -3.6375e-05,  9.6817e-05,  3.0884e-05,
         5.2522e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5040e-04,  7.3468e-02, -2.8823e-05, -1.2570e-04,  6.5726e-02,
        -8.1495e-05,  8.3751e-06, -2.9112e-05, -7.3292e-06,  3.8251e-05,
        -2.3345e-04,  5.8962e-05,  2.7855e-05, -4.1347e-06,  2.0026e-05,
        -1.9296e-05, -3.2961e-05,  1.8797e-06, -4.2275e-03, -1.4568e-05,
        -2.2813e-05,  3.3889e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1341e-05, -1.4562e-01,  1.3120e-05,  1.4360e-04,  3.4093e-03,
         8.4829e-05, -8.3951e-06,  1.5448e-05,  1.8470e-06, -1.2831e-05,
         2.4464e-04,  1.9346e-05,  4.3992e-05,  4.4791e-06,  4.1958e-06,
        -4.3250e-02,  4.9787e-05,  7.0435e-05,  7.1152e-05,  1.0549e-03,
         3.6966e-06,  4.5708e-05, -6.4543e-02,  1.1654e-05,  6.6710e-04,
        -1.0120e-05,  6.1939e-05, -9.2004e-07,  4.4062e-05,  1.2425e-05,
         7.3836e-06,  7.4258e-05, -1.5591e-05,  2.8670e-05, -4.3691e-05,
         2.9316e-05, -2.0202e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5684e-04, -2.1650e-01,  6.4735e-05,  6.1740e-05,  1.2443e-04,
         3.2062e-04, -3.2023e-02, -1.2044e-04,  1.1807e-04,  9.0261e-05,
        -4.5434e-05,  1.0835e-04,  3.6427e-05, -2.3582e-05,  1.3690e-04,
         2.8026e-05,  1.1839e-05, -9.6468e-02,  6.4268e-06,  1.8681e-06,
         2.3073e-04, -2.5746e-05,  9.3787e-05,  1.4494e-04,  9.0737e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2146e-04, -1.5949e-01,  6.1653e-05, -2.9651e-06, -9.8157e-05,
         2.5602e-03, -4.2997e-02, -3.7993e-05,  1.9721e-05, -2.7719e-05,
        -7.3670e-06, -3.2251e-06,  1.2877e-05, -6.6559e-05,  2.7503e-05,
        -2.6066e-05, -1.8420e-04, -7.1197e-02, -1.4928e-07, -6.4760e-05,
         4.7347e-05,  3.4443e-05,  6.4283e-05, -1.2810e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1536e-04,  7.9774e-02,  1.8711e-05,  6.2350e-05, -4.4794e-05,
        -6.5971e-04,  9.2095e-02, -1.5106e-05,  4.0480e-05,  1.4294e-05,
        -5.4739e-05, -2.9195e-05,  1.2919e-05,  6.2326e-05,  2.2811e-05,
         6.9582e-05,  1.0677e-05,  3.8080e-02,  2.8573e-05, -1.2882e-05,
        -1.1402e-04, -4.2203e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7109e-04,  1.3010e-02,  3.2135e-05, -6.2032e-05, -1.4633e-04,
         2.9710e-03, -2.4403e-05, -1.1845e-04,  1.6521e-04, -1.5288e-04,
        -4.5382e-05,  1.0183e-05, -1.0792e-04,  3.4485e-05,  1.1449e-07,
        -4.7978e-06, -2.1912e-03, -2.9575e-01, -1.6120e-05,  1.8457e-04,
         1.5860e-04,  4.8792e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5211e-05,  1.4643e-01, -2.2129e-05,  4.7914e-05, -1.0517e-05,
        -5.0071e-04, -2.2163e-06,  2.9126e-05, -8.1638e-05,  3.4235e-05,
         7.4079e-05,  4.1121e-05,  2.7297e-05, -7.5756e-05, -3.4550e-05,
         3.2269e-05, -8.2425e-05,  8.5139e-05, -9.6634e-04,  7.3434e-02,
         4.8496e-05,  4.1551e-05,  4.9849e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5057e-04, -2.6710e-01,  1.5667e-05,  4.5613e-05,  9.8053e-05,
         2.0523e-03, -2.7860e-05, -1.2261e-04,  1.2709e-04, -1.4718e-05,
        -2.4301e-05, -6.4709e-05, -6.4790e-05,  9.2268e-05,  5.5753e-06,
        -6.1637e-05, -7.9726e-05, -8.6788e-02,  1.0986e-04,  4.9976e-05,
        -4.9669e-05,  3.0082e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6242e-05,  2.9783e-03,  2.1955e-06,  9.5911e-06, -6.8149e-03,
        -1.5949e-05,  1.5555e-03, -1.1965e-05,  2.8977e-05, -3.6128e-02,
         2.1884e-05,  6.2463e-06, -8.3998e-06, -1.3901e-02,  2.9153e-06,
         2.1331e-05, -2.0494e-05,  4.6089e-05,  2.8310e-03,  2.4626e-05,
         1.4394e-05, -2.4645e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7628e-05, -4.6834e-02, -9.1104e-06,  4.4273e-06, -3.2096e-02,
        -2.7701e-06,  1.0235e-04, -2.9349e-05, -2.6905e-06, -1.6907e-02,
         2.6598e-05, -7.6635e-06, -4.4006e-06, -1.2038e-02,  1.0115e-05,
         8.5946e-05, -1.6567e-06, -6.2573e-06,  3.0978e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2000: [tensor([ 3.1368e-04,  7.8369e-06,  5.8183e-05, -1.1409e-05,  1.8503e-05,
        -3.2098e-05, -5.5482e-05,  1.8378e-06, -5.2707e-04,  7.2242e-02,
         3.6399e-05, -2.0464e-05, -2.4444e-05, -8.0726e-06, -2.9218e-06,
        -3.7198e-05,  5.7766e-05,  2.1822e-05,  1.3174e-06, -2.4905e-05,
         4.8614e-02, -4.5675e-05,  7.6086e-06, -3.1587e-05,  2.3153e-05,
        -9.6656e-05, -5.2814e-06, -2.7725e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1335e-05,  5.8169e-06,  4.8880e-07,  1.5861e-05, -1.6792e-05,
        -1.2513e-05, -2.5679e-05,  8.1195e-06, -3.3744e-04,  5.1224e-02,
         1.7925e-05, -6.1515e-05,  8.5939e-06, -2.3914e-05, -3.2589e-05,
        -1.6083e-05,  3.0761e-06, -8.7757e-06, -2.8117e-06, -2.3429e-07,
         7.4779e-02, -2.2352e-05, -3.8912e-04, -4.8298e-05, -3.1431e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4355e-04,  8.2811e-06,  5.0634e-05, -3.5802e-04, -2.4312e-05,
        -9.4727e-05,  2.8066e-05, -3.0925e-02, -2.8414e-04,  3.9972e-05,
        -2.1488e-05,  5.2087e-06,  4.2584e-05, -3.4244e-05, -9.7763e-05,
        -3.5284e-05, -1.8324e-05, -1.3320e-05, -5.0456e-02, -1.8654e-05,
        -1.9033e-05, -7.6733e-06,  3.9705e-05,  2.6736e-04,  3.5499e-05,
        -1.7119e-05, -6.6430e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1647e-04, -3.8494e-05, -8.0230e-06, -8.5657e-05,  1.4571e-06,
        -7.2212e-05, -4.1063e-05, -4.4808e-04, -1.4972e-04, -8.9952e-06,
         4.9111e-06, -2.5981e-05, -3.5768e-05,  1.1006e-05, -1.8795e-04,
        -1.7804e-05, -4.2335e-06,  1.1079e-05,  2.4655e-03,  1.2713e-05,
         1.4805e-05, -7.3027e-06, -3.6663e-05, -1.2657e-03, -4.7041e-05,
         8.4845e-02, -3.1828e-05, -7.5473e-06, -1.9629e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8335e-04,  1.0198e-04,  7.3994e-05,  1.8979e-04, -3.5666e-05,
        -1.9157e-04,  9.7402e-05, -2.7665e-02,  3.6956e-04,  4.5941e-04,
        -2.0268e-04,  1.1761e-04,  2.4746e-04, -1.1186e-04,  3.5938e-04,
         2.0797e-05,  5.3550e-05, -2.3780e-04, -1.4180e-02,  2.4305e-05,
        -2.7858e-05,  4.7183e-05,  3.7471e-04,  8.4544e-04, -3.1534e-05,
        -1.4455e-05, -5.6616e-01, -7.2419e-05,  4.7838e-05,  5.0004e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3396e-04, -9.3686e-04, -7.6192e-06,  5.7290e-06,  1.6651e-05,
         1.1153e-05,  5.4119e-06, -1.4288e-02,  1.4504e-05, -7.0365e-07,
        -4.3405e-06, -8.9819e-06,  2.3070e-05, -1.4798e-02,  5.8698e-07,
         6.4620e-06, -1.4231e-05, -7.5536e-03,  3.9404e-06, -1.4171e-06,
        -3.4867e-06, -4.5481e-06, -7.9785e-03,  7.4467e-06,  1.1912e-05,
        -7.9543e-07, -1.2758e-02, -6.5771e-06,  6.2574e-06, -2.0628e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9524e-04,  3.4766e-03, -4.7545e-06,  1.0386e-06, -3.1584e-05,
        -1.9173e-05, -3.2767e-05,  5.6484e-03,  7.2948e-06, -1.4970e-05,
        -3.3435e-05, -2.5447e-05,  1.9254e-05, -4.2257e-02, -2.7597e-06,
         1.9531e-05, -1.7129e-05,  5.1100e-03, -1.8218e-05,  1.2559e-05,
        -4.7268e-06,  3.9367e-06, -1.3239e-06, -2.3493e-02, -1.2022e-05,
         1.8404e-05,  7.2435e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8881e-04,  3.0991e-03, -1.7713e-05,  3.5484e-05,  1.3019e-04,
         3.4599e-05,  2.4976e-05, -2.9033e-02,  1.4491e-05,  1.9333e-06,
        -3.0720e-06,  5.1807e-06,  6.9592e-06, -3.9901e-02, -1.3997e-05,
        -1.9830e-06, -1.2139e-05, -1.5234e-02, -4.5513e-05,  3.9812e-05,
         1.9956e-05,  2.8720e-05, -2.4372e-02, -1.5282e-05, -3.4620e-05,
        -8.9679e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.3467e-05,  6.1917e-05, -1.0932e-05,  7.0506e-05,  2.6926e-03,
        -5.4896e-05,  3.8458e-05, -5.3312e-02,  5.2405e-05,  4.0950e-05,
        -6.3493e-06, -1.6193e-06, -4.6329e-05,  4.7599e-05,  8.0184e-05,
         3.1519e-05, -6.5936e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1290e-04,  2.2719e-05,  8.6002e-06,  5.1226e-05, -4.8295e-02,
        -3.2356e-05,  4.8531e-05, -3.9407e-02,  6.2984e-05,  3.8724e-05,
         4.6369e-07,  4.4208e-05,  2.2203e-05,  6.4429e-04,  7.4197e-05,
         3.3403e-05,  4.7463e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8746e-05, -6.1384e-06, -5.5036e-05,  6.4818e-06, -1.1222e-02,
         2.4122e-05,  8.2689e-05, -1.0679e-01,  5.0556e-05,  7.8010e-06,
         3.3636e-05,  1.3535e-04,  5.1237e-05,  4.0871e-05,  4.3781e-05,
         6.6730e-05,  1.6270e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3029e-04, -1.3214e-02, -1.9557e-05, -8.0844e-06,  1.6232e-03,
         4.0082e-05,  1.8668e-05, -8.7099e-05, -1.5487e-04,  7.0704e-05,
         3.9403e-03,  3.7663e-05, -1.2077e-04, -1.1449e-04, -1.4874e-01,
         6.7333e-05,  5.8990e-05, -1.5113e-04, -2.8280e-05, -6.4672e-05,
        -1.0609e-01,  1.3709e-05,  2.7615e-05, -9.4931e-02, -2.5763e-05,
        -1.3254e-04, -7.9968e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2050: [tensor([ 2.5317e-04, -1.2367e-01,  1.2666e-04,  3.0550e-03, -3.9206e-06,
         6.0416e-05,  1.1511e-05, -5.8708e-06,  8.2447e-05,  6.8240e-06,
         8.5307e-06,  5.0387e-03,  5.5626e-05,  6.8689e-03, -1.3385e-05,
         1.2542e-02, -1.1008e-04, -1.5843e-06,  5.9733e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5855e-05, -8.9492e-02, -3.4592e-05, -4.5832e-05,  1.9497e-06,
         2.6603e-05, -5.9276e-05, -1.9074e-05,  2.0978e-03, -3.8990e-05,
         4.9459e-05, -3.0479e-05, -3.0218e-02,  1.3489e-04, -3.8588e-05,
         3.2379e-05,  3.3484e-04, -5.6971e-05,  1.7968e-06, -2.8898e-02,
        -5.6382e-05,  4.3026e-05,  3.6089e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0507e-04,  5.5805e-02, -3.7953e-05, -1.0464e-04,  6.2627e-06,
        -5.4533e-07,  2.8643e-05,  1.7729e-05, -2.1131e-03,  7.1558e-05,
         5.0613e-05, -6.2442e-06,  7.7341e-02, -9.3975e-05, -9.0948e-05,
         6.7935e-05, -1.3442e-04, -2.3740e-05,  7.5051e-06, -3.4353e-03,
         4.6853e-05, -3.5502e-05,  4.4623e-05, -5.1689e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6191e-04,  1.9015e-01, -8.1648e-05, -1.5639e-04,  1.4727e-05,
         1.5544e-05, -4.2089e-05, -1.5919e-05,  9.9697e-03, -1.6689e-05,
         6.4185e-05, -8.6511e-05, -2.9251e-03, -5.0832e-05, -1.9090e-04,
        -5.8884e-06, -6.7529e-04, -8.6932e-05, -3.2637e-05, -4.8404e-03,
        -7.0254e-05, -7.4941e-06, -6.2637e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1808e-05, -1.0911e-02, -5.6789e-06, -5.8136e-06, -7.1914e-04,
        -1.5454e-05,  7.6564e-06,  5.5628e-06,  3.5337e-06, -1.0576e-02,
        -2.5339e-06,  7.6258e-06,  2.4703e-06, -9.3541e-07,  3.2870e-05,
        -1.4880e-02, -1.5924e-05,  1.1793e-05,  6.1042e-04,  4.5855e-06,
         3.8411e-06, -5.8676e-06, -6.4205e-03, -6.8762e-06,  3.0761e-04,
        -8.1587e-06, -5.6838e-03, -4.8672e-06, -1.2870e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0304e-04, -2.1892e-02,  7.8144e-07, -6.5498e-06, -2.1045e-02,
        -1.2594e-05,  7.2396e-06, -1.3918e-05, -1.3359e-05, -1.3510e-02,
         3.6192e-06,  5.0346e-06, -2.3709e-05, -1.4366e-05,  7.0744e-05,
        -1.4097e-02, -1.5521e-05, -1.3308e-05, -9.5296e-03, -6.4891e-06,
        -8.0610e-06,  2.3508e-05, -8.3482e-03,  1.0780e-05,  8.5641e-08,
        -1.7151e-05, -6.9595e-03, -3.6754e-06, -2.0844e-05, -1.6358e-05,
         1.9997e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1532e-04, -1.6193e-02, -2.6384e-05, -8.9037e-06, -1.5296e-02,
        -2.5705e-06,  2.8352e-07, -1.8993e-05, -1.3301e-05, -1.2827e-02,
        -1.2643e-05,  1.6897e-07, -8.4526e-06, -1.1244e-05,  1.7607e-04,
        -1.8243e-02, -3.4846e-05,  6.8088e-06,  1.2306e-03, -1.2501e-05,
         2.2881e-06, -1.1162e-05, -4.8031e-03, -6.0187e-06, -4.0659e-05,
        -1.2051e-05, -2.5006e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5474e-04, -3.0300e-02,  2.3839e-06, -3.2943e-06, -2.0790e-05,
         1.4442e-04, -5.2512e-06, -1.3846e-06, -1.8528e-02, -1.9505e-02,
        -1.3222e-05, -1.2578e-05, -1.9181e-05, -1.0857e-05,  3.5282e-03,
        -1.6337e-05,  6.4547e-06, -1.0147e-05, -2.7715e-02,  3.7834e-05,
        -5.4791e-05,  7.6815e-06, -1.6119e-05, -6.5820e-06,  2.1495e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8242e-04, -8.2768e-02,  2.6316e-05, -5.5240e-05,  4.2221e-05,
         6.9352e-04, -2.8650e-05,  2.2461e-05, -2.8800e-02, -6.1846e-02,
        -3.1518e-05, -4.5158e-05,  3.4014e-05,  4.3808e-05, -1.9874e-02,
         1.0416e-05, -8.6329e-06,  2.3348e-05,  2.2017e-03, -4.5033e-05,
         1.4310e-05,  3.3624e-05, -4.2326e-05,  3.7088e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4543e-05, -4.4868e-02, -2.5790e-06, -1.3203e-05, -1.5346e-05,
         9.1208e-04,  1.9602e-05, -1.8441e-05,  1.8752e-03, -1.1384e-02,
         3.1420e-06, -1.8890e-05,  3.9083e-05,  1.2986e-05, -2.3055e-02,
        -2.2036e-05,  1.2028e-05,  1.1303e-05, -1.3836e-02, -2.0766e-05,
         8.5968e-06, -2.7006e-03, -1.6123e-05,  1.8514e-05,  3.6836e-05,
         1.2206e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1326e-04, -5.2936e-02, -4.1856e-05,  3.9110e-06, -1.0194e-05,
        -1.3671e-04, -2.9557e-05, -1.1196e-05,  5.6881e-05, -1.1859e-05,
        -6.8873e-06, -2.4264e-02, -3.0311e-05, -2.1413e-05,  8.0169e-06,
        -2.8926e-02,  9.0700e-06, -3.7010e-05, -3.2639e-05,  3.3768e-07,
        -2.4238e-02,  6.5572e-06,  6.6645e-06, -1.4043e-02, -2.0770e-06,
        -2.4062e-05, -2.2467e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7209e-04, -7.9096e-02,  2.3645e-05, -8.5201e-05,  1.3700e-05,
         3.3228e-03, -5.2780e-05, -5.3044e-06,  4.0607e-05,  1.6327e-05,
        -3.1744e-05,  8.8785e-04, -4.4195e-05, -1.7833e-05, -2.4643e-05,
        -2.1688e-02,  1.3499e-05, -4.0730e-05, -4.9970e-05, -2.8230e-05,
        -3.8419e-02,  1.8345e-05,  2.3878e-06,  4.7415e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2100: [tensor([ 6.2547e-06, -1.6206e-04,  3.6488e-06,  4.6787e-05, -7.8621e-06,
        -7.4480e-04,  9.9750e-06, -2.3354e-05,  1.5033e-05, -4.7669e-05,
         1.1859e-02, -3.9704e-05, -3.8185e-04,  6.8254e-02,  5.3739e-06,
        -1.8561e-05, -7.2221e-06, -4.4727e-05, -4.0446e-05,  2.7081e-05,
        -4.3388e-05, -8.5562e-06, -4.7581e-04,  6.3091e-03, -1.5770e-05,
        -9.1267e-04, -5.3555e-05,  2.9008e-05, -1.0406e-05,  3.0432e-02,
        -7.6574e-06, -2.3304e-08, -1.1507e-05,  2.2292e-05,  1.5448e-05,
        -1.1706e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0662e-04, -1.2677e-05,  8.6717e-05,  1.9124e-05,  1.4044e-04,
         3.5813e-05, -6.6492e-05,  2.9805e-04, -1.3539e-01,  8.0816e-05,
         1.1074e-05, -1.2037e-04, -2.9381e-05, -4.8290e-05, -8.1857e-02,
         7.9822e-05,  7.5263e-04, -1.1069e-05,  1.5458e-02,  5.4299e-05,
        -1.8571e-05,  4.7660e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4617e-04,  6.7794e-05,  5.5826e-05, -7.7595e-05, -1.1175e-03,
         2.5050e-06, -6.3795e-06, -1.8973e-04,  1.6689e-01,  9.9607e-05,
         1.0790e-04,  1.0524e-04,  1.1968e-05,  3.7815e-05,  5.1208e-05,
         5.2892e-05,  1.0047e-04, -4.5523e-06, -1.7714e-06,  1.1550e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0127e-04,  9.9141e-05,  9.9389e-05,  1.8257e-04,  1.9622e-04,
         1.0800e-04, -8.9619e-06,  2.0416e-04, -1.7769e-01,  1.0262e-05,
        -5.3067e-05, -1.3511e-04, -8.1667e-06,  7.3958e-05, -2.0605e-05,
         8.5004e-05, -5.3393e-05,  4.1466e-05, -5.3675e-05,  1.9833e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0116e-05, -6.7661e-02, -3.5031e-05, -2.5055e-06, -8.0312e-02,
         3.8329e-06,  1.3709e-04,  6.2590e-05,  3.2309e-05, -9.2503e-02,
         2.8642e-06,  5.7788e-05, -1.5833e-05,  9.4172e-05,  2.6204e-05,
         3.9074e-05, -1.8462e-02, -1.9539e-05,  7.6907e-05, -4.4401e-02,
        -1.1112e-05,  5.5026e-05, -7.2397e-05, -4.2618e-05, -4.0705e-06,
        -6.0444e-05,  4.5387e-06, -1.6671e-06, -9.2671e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0289e-04,  9.3165e-03,  4.8036e-05,  6.1571e-05, -1.8703e-05,
         5.1447e-06, -7.7078e-06, -5.1585e-04,  1.9911e-05,  1.3575e-02,
         5.2208e-06,  2.1201e-05,  2.4013e-05, -6.9957e-05, -1.5434e-06,
        -6.6705e-06,  5.3253e-03,  9.4249e-07,  6.2764e-05,  4.7416e-02,
         1.7855e-05, -8.8286e-06, -2.6256e-05,  3.8498e-06,  4.5548e-06,
         5.1156e-05,  4.1767e-02,  1.9688e-05,  3.3188e-05, -1.7172e-05,
         7.6790e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7238e-04, -1.0270e-02,  1.1985e-05,  9.4259e-07, -2.6677e-02,
        -2.8230e-05, -2.0695e-06,  1.6239e-04,  2.0744e-05, -3.0930e-02,
         6.2749e-06,  2.2309e-05,  1.0598e-05,  3.2451e-05,  3.0320e-05,
         2.1565e-06,  7.0483e-03,  2.1897e-05,  4.6211e-05,  1.5373e-05,
        -4.3171e-02, -1.0863e-05, -3.4549e-06,  2.9034e-06, -2.7511e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7127e-05,  4.6166e-05,  3.6069e-05,  9.5422e-02,  4.2317e-05,
         2.5579e-05, -5.1718e-05,  3.8132e-05,  2.5173e-05, -6.0911e-05,
         1.0375e-04,  1.4968e-05, -4.4813e-05,  2.8951e-05,  1.0462e-04,
         5.5749e-05,  3.9417e-05,  2.8290e-05,  2.6376e-05,  8.5494e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5942e-04,  4.1315e-05,  3.3035e-05,  7.2324e-02,  1.8465e-05,
        -1.7291e-05,  8.1932e-06,  6.3294e-05, -5.5941e-05,  5.9729e-05,
         2.4162e-05,  4.2444e-05,  3.3598e-05,  3.5400e-05, -1.5309e-05,
         3.8029e-05, -2.4721e-05, -4.6981e-05, -1.6563e-05, -4.4045e-05,
        -5.9330e-05,  2.8396e-05,  8.6275e-08,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6555e-04,  4.0373e-06,  1.2136e-05,  6.9859e-02,  4.7527e-05,
        -5.9763e-05,  2.2511e-05, -4.9886e-05, -2.6178e-06,  2.5817e-05,
         7.4538e-05,  1.2909e-05, -2.0954e-05,  3.0954e-05,  7.7659e-05,
         1.7835e-05,  9.2917e-05,  1.2980e-05,  2.9054e-05,  7.9693e-05,
         1.7099e-06, -2.0531e-05, -7.5442e-06,  4.9183e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3412e-05,  8.5409e-03, -1.9043e-05,  4.4874e-05,  1.6642e-05,
         4.9652e-05,  1.8040e-05, -5.7220e-02, -2.4291e-06,  2.3360e-05,
         1.7054e-05,  4.8633e-06, -9.2708e-06,  6.3616e-05, -4.5370e-02,
        -3.1148e-06,  2.8395e-05,  3.5697e-05,  1.7268e-05, -8.0725e-05,
        -2.8278e-03,  1.0548e-06, -5.2553e-05,  3.3281e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6922e-05, -2.0509e-02,  9.7529e-06,  4.5846e-05,  1.1235e-05,
        -1.7634e-05, -1.5260e-05, -2.7789e-02,  1.2805e-06,  6.1990e-05,
         3.2872e-05,  9.9957e-07, -3.7553e-05,  1.1828e-05, -7.5305e-02,
         1.3467e-05,  2.9814e-05,  6.2104e-05,  4.7284e-06,  3.7425e-06,
        -3.4329e-05,  5.7944e-05, -1.4977e-05, -5.0394e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2150: [tensor([ 8.0105e-05, -4.7359e-02,  7.7031e-06, -1.2622e-05, -3.5684e-02,
        -2.9149e-06, -4.5728e-05, -1.5039e-02, -5.4113e-05, -5.1721e-05,
         2.2625e-04, -3.3518e-05, -1.9058e-05, -3.5330e-05, -2.0366e-02,
        -1.3317e-06, -3.5860e-05,  4.9858e-06,  1.4872e-05,  1.3052e-05,
        -4.4259e-05,  2.2972e-06, -8.4432e-06, -4.0931e-05, -4.9355e-05,
         4.6206e-06,  2.0615e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4514e-04,  1.0109e-05,  6.2773e-05,  3.8778e-05,  2.5467e-05,
         1.6224e-05, -2.5537e-05,  4.3523e-05,  4.6167e-05,  5.2777e-05,
        -1.1274e-05,  3.4936e-05,  2.4987e-05,  3.1215e-05,  1.8191e-05,
         1.1002e-05, -3.2297e-06,  4.6711e-05,  1.2876e-01,  2.2389e-06,
        -1.2517e-05, -2.4073e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4910e-05,  6.3491e-05,  7.0776e-05,  8.3944e-05, -7.3075e-06,
        -9.8890e-06,  3.2580e-05,  9.2111e-05, -1.4889e-05, -1.5668e-05,
         7.8773e-05, -3.2544e-06,  3.1604e-05,  4.9759e-05,  1.5818e-05,
         6.3571e-02, -5.6343e-05, -4.1155e-05,  5.1143e-05,  3.5352e-05,
         6.8766e-05, -1.8996e-05, -2.8855e-05,  4.5258e-02, -2.5319e-05,
         3.3858e-05,  1.2028e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5657e-05,  6.3876e-06,  3.9466e-05,  4.8861e-05, -8.7441e-06,
         3.0510e-05,  3.9636e-05,  2.0885e-05,  3.6398e-05,  2.9607e-05,
         7.1070e-05, -1.7502e-05,  4.0841e-05, -1.9969e-05,  2.4982e-05,
         4.7611e-02, -2.8762e-05,  2.1223e-05,  7.0877e-02,  4.3069e-05,
        -6.6651e-05,  2.5782e-05, -1.7154e-05, -2.4301e-06, -1.5508e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5580e-04,  4.2438e-05, -9.8088e-06, -4.6176e-06, -4.8848e-07,
         1.6914e-05, -3.0987e-02,  1.0034e-05,  1.2873e-05,  3.0332e-05,
        -3.0888e-02,  1.6480e-05,  3.5326e-05, -1.3129e-02,  7.7242e-05,
         1.3613e-05,  4.7536e-06,  1.0491e-06,  6.2718e-04, -2.3901e-06,
        -1.0898e-05,  1.4402e-05,  1.4859e-05,  3.3399e-05, -1.7941e-06,
         6.0016e-07,  2.6011e-05,  3.4252e-06,  1.1470e-06, -1.9143e-02,
         4.2635e-06, -3.0837e-06,  6.0457e-07, -1.3675e-02, -1.8055e-05,
         1.0680e-04,  2.0008e-06, -2.3020e-05,  1.3839e-05,  1.2597e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9909e-04,  1.4164e-05, -8.1502e-06,  6.0249e-06, -8.5983e-07,
         7.6181e-06, -3.0413e-02, -3.4533e-06,  3.3860e-06,  9.5995e-06,
         5.8060e-03, -1.4736e-05,  1.9976e-05, -1.7419e-02, -3.1403e-05,
         3.4298e-06,  4.3156e-06, -4.3101e-06,  1.5156e-03, -2.0381e-06,
         2.3023e-05,  7.7725e-06,  4.6697e-06, -2.5694e-05, -1.2661e-05,
         8.6266e-06,  4.9346e-07,  1.3510e-05, -2.2076e-06, -1.4161e-02,
        -6.0215e-06, -2.3024e-05, -4.7947e-06, -1.2090e-02,  1.1453e-06,
        -6.2469e-06, -3.6042e-06,  5.9899e-06,  3.5851e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6421e-04, -3.7460e-05, -2.3222e-05, -2.8667e-06,  3.6521e-06,
         1.2469e-05, -3.5660e-02, -1.7124e-05, -7.1191e-06, -2.8960e-05,
        -4.6961e-02, -7.5509e-07,  1.9169e-05, -3.0786e-02,  1.9154e-05,
        -3.0972e-06, -4.4497e-05, -1.8084e-05,  4.5801e-04, -3.0837e-06,
        -8.8826e-06,  1.7069e-05,  1.6268e-05, -4.2586e-05,  1.9599e-05,
         5.1422e-06, -8.7345e-07, -3.5876e-06, -1.7250e-05, -2.9714e-02,
         1.0847e-05, -2.8340e-05,  1.8219e-05,  2.7546e-03, -1.1362e-05,
        -1.6224e-05, -3.5993e-05, -1.5916e-05,  2.6575e-05, -1.1222e-02,
        -1.3933e-05, -1.8578e-06, -1.2986e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0111e-04,  7.8828e-02,  8.7781e-06, -1.5608e-05, -1.6284e-05,
        -2.6124e-04,  1.2731e-05,  4.4063e-05, -3.5677e-06, -7.0543e-06,
         2.8557e-02, -1.1684e-05, -5.3165e-05,  1.3452e-02,  2.5225e-05,
        -2.4714e-05, -8.2083e-06, -4.5994e-05,  3.7816e-02,  2.6263e-05,
        -1.0667e-05, -2.2612e-05,  1.7410e-02, -2.2553e-05,  5.4515e-05,
         1.8747e-05, -3.6520e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1749e-04, -6.2138e-02,  1.7753e-05, -4.5971e-05, -4.1307e-05,
         5.0007e-05,  3.0729e-06, -6.3456e-05,  2.0174e-05,  3.4698e-05,
        -5.2844e-02, -2.4824e-05,  6.2343e-05, -1.6390e-02, -4.8372e-05,
         1.8063e-05, -6.3564e-06,  4.8292e-06, -2.9495e-02, -1.0060e-04,
         1.8105e-06,  3.1366e-05, -1.7155e-05,  1.4081e-05,  1.7470e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4719e-04, -1.3855e-01,  8.0301e-06, -5.0703e-05,  3.3571e-05,
         2.4770e-04, -5.5808e-05, -8.6649e-05, -6.3228e-05, -1.0772e-04,
        -8.6949e-02, -2.7163e-05,  4.9815e-05, -2.4994e-02, -5.6749e-05,
         1.7306e-06,  6.8827e-06,  1.3291e-05, -9.7399e-02,  1.7108e-06,
        -1.6423e-05,  6.9943e-06, -1.5071e-04,  7.7430e-05,  1.2524e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0415e-04,  4.5967e-05, -1.2049e-05, -8.7463e-06,  1.7661e-05,
         6.7865e-06,  4.3229e-05, -3.7688e-02,  1.3041e-05,  2.3735e-05,
        -2.8622e-05, -8.2314e-06,  9.3140e-06, -1.3861e-05,  2.5223e-05,
        -3.9781e-05, -3.7745e-02, -8.2659e-07, -2.4626e-05, -1.2622e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6587e-04, -1.3927e-05,  2.3192e-05,  7.0686e-06, -3.1818e-05,
         1.2231e-05,  4.4390e-05, -4.4870e-02,  2.0481e-05, -6.2988e-07,
        -2.4050e-05,  2.7468e-05,  7.3279e-06,  2.7056e-05,  1.0786e-04,
        -1.8086e-02,  1.2022e-05,  3.2483e-06,  4.5063e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2200: [tensor([-2.2609e-04,  1.0386e-04,  5.5982e-05, -5.9179e-02, -1.3668e-04,
         1.4381e-05, -3.5348e-05, -5.7497e-05, -2.4911e-02,  2.4807e-05,
         5.9966e-03, -8.2304e-02, -5.9504e-06,  1.0082e-04, -1.8581e-05,
         2.6843e-05, -5.1782e-05,  1.2102e-05,  7.7629e-06, -3.1658e-05,
         3.0672e-05, -1.3624e-05,  6.7735e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1842e-05,  5.8836e-07,  1.2033e-05, -2.6983e-02, -3.7420e-05,
         2.7718e-05, -2.1369e-05, -8.2396e-06, -1.0663e-02,  1.9991e-05,
         1.9062e-03, -3.4878e-02,  1.9517e-05,  1.7480e-04, -1.7273e-05,
         7.3539e-06,  1.4382e-05,  1.2439e-05, -1.6360e-05, -1.8604e-02,
         1.5680e-06, -2.7126e-06, -9.1443e-06, -7.2634e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3167e-04,  2.3204e-05,  1.5467e-05,  3.3494e-02,  3.6138e-05,
        -1.6473e-05,  9.8211e-06, -9.4191e-05,  5.5535e-02, -4.3369e-06,
        -3.4114e-03,  4.5675e-02,  1.9548e-06,  2.8904e-07,  2.8234e-05,
         1.5755e-05,  4.0574e-05,  2.9326e-05,  3.1326e-05,  1.6963e-02,
         2.2706e-05, -2.8341e-05,  1.2655e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4663e-04,  2.0654e-05, -1.4802e-05,  6.5350e-06,  1.5996e-04,
         5.1402e-06,  3.5006e-05, -1.4129e-05, -3.1701e-06,  1.9312e-05,
         3.7656e-06, -1.1986e-06,  1.4921e-05,  1.0077e-01,  3.2779e-05,
         3.1749e-05,  2.1034e-02,  2.9279e-05, -3.4117e-05, -8.0544e-06,
        -1.1175e-05, -2.6423e-05, -5.2714e-06, -6.7593e-06, -1.5250e-06,
         1.3856e-05,  1.5906e-05, -1.1527e-05,  1.3448e-05, -1.9300e-06,
        -2.9745e-05,  3.9322e-05, -2.3740e-05,  2.2597e-02, -1.5029e-06,
        -9.8681e-06, -2.9957e-06,  1.3040e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3738e-04,  1.2006e-05, -3.1631e-05, -6.3752e-07, -1.7541e-02,
         3.8775e-06, -1.0345e-05,  7.7380e-06, -1.6988e-05, -1.2180e-05,
        -3.1218e-06,  1.4495e-05, -2.2397e-05, -5.7308e-02,  7.1159e-06,
        -2.9063e-05, -1.0992e-02, -3.0291e-06,  4.2663e-06,  9.9531e-06,
        -1.1047e-05,  1.1106e-05, -1.8790e-05, -2.7193e-06, -1.1530e-06,
         3.6957e-05, -1.2953e-05,  1.6907e-05, -1.2971e-05,  2.6704e-06,
        -2.2243e-02, -1.7944e-05, -3.0082e-02,  2.3479e-06,  6.0454e-05,
         1.7569e-04,  8.7773e-06,  1.6051e-05, -5.7535e-02,  2.8540e-05,
        -7.6873e-06, -1.1570e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9700e-06, -1.0617e-05, -1.7919e-05, -1.4600e-05, -1.5705e-03,
        -1.4583e-05, -1.8321e-06, -2.2483e-06, -3.8035e-05, -1.3928e-06,
        -9.0535e-06, -1.5152e-05, -4.7430e-05,  3.9849e-02, -2.8178e-05,
        -1.7644e-06,  1.0532e-02, -4.4581e-06, -4.0285e-06, -2.1154e-05,
        -7.1283e-06, -7.4492e-06, -1.0276e-05,  7.2592e-06,  2.3342e-05,
        -2.4825e-05,  1.4494e-05, -4.8650e-06, -1.9198e-05, -1.6195e-05,
         6.3428e-06,  9.8031e-06,  5.1018e-06,  1.0726e-02, -2.2171e-05,
        -3.7925e-05,  3.2864e-02,  2.2051e-06, -2.2383e-05,  1.8370e-05,
         3.5034e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0795e-05,  2.1360e-05,  1.3815e-02, -6.6694e-06,  5.9144e-06,
         5.8124e-06, -6.8300e-02,  6.5443e-06, -5.6514e-06,  1.1267e-04,
        -3.3510e-05,  4.9131e-05,  1.5492e-05, -5.5113e-06,  3.6642e-06,
         1.9632e-04, -1.3016e-06, -9.5564e-06,  3.6358e-06, -1.2705e-05,
         3.9043e-05,  7.3554e-03, -2.2130e-05, -1.9938e-05,  1.4932e-05,
        -3.2785e-05, -2.1750e-05, -1.2466e-05, -1.0786e-06, -1.0561e-05,
        -3.4664e-02,  3.9966e-05,  6.5210e-05, -7.9401e-06,  1.4611e-05,
         6.1218e-06, -1.7476e-05, -1.3994e-05, -9.6777e-06,  8.1796e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4650e-04,  1.8279e-05, -2.0020e-02,  5.2679e-07,  5.8066e-06,
         5.7130e-06, -5.1523e-02,  1.9829e-07,  2.2083e-06,  4.2470e-05,
         4.8965e-06, -1.0313e-05,  3.0613e-05, -5.0440e-06, -5.1733e-06,
         3.6327e-04, -1.1725e-05,  5.6522e-06,  1.0416e-05,  2.0017e-05,
        -2.2875e-06,  2.0440e-03, -4.8304e-06,  9.5350e-06,  1.6976e-05,
        -1.2075e-05,  7.5262e-06, -5.0823e-06,  7.1854e-06,  4.9784e-06,
        -2.4044e-02, -6.1450e-06,  7.9361e-05,  2.3501e-05, -3.7574e-06,
        -1.0424e-05, -1.0198e-05,  3.5919e-04, -7.6691e-06, -3.5281e-06,
         2.0119e-05, -2.3730e-06,  7.6903e-07, -4.7127e-06,  1.5257e-05,
        -1.0422e-02, -1.0147e-05,  9.2237e-06, -1.0331e-05, -1.7228e-02,
         4.9380e-06, -1.7258e-05,  1.6792e-05, -5.2623e-06, -5.5748e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5522e-05,  2.6024e-05, -8.6354e-03,  1.2367e-05,  1.6532e-05,
         6.0900e-08,  3.0161e-02, -9.6057e-06,  7.2742e-06, -2.3954e-05,
         1.2480e-05, -3.7775e-05, -9.7739e-06,  4.2261e-06,  1.5318e-06,
        -4.3788e-05,  1.9632e-05,  1.1852e-06,  7.5538e-06, -1.9546e-05,
        -1.2568e-05,  9.1451e-03,  1.6872e-06,  2.8018e-06, -1.3869e-05,
        -9.1368e-06, -1.5491e-05, -1.3346e-05,  3.8548e-06,  1.4385e-05,
         2.4909e-02,  9.1947e-06, -7.3921e-05, -9.7675e-06,  3.2778e-05,
         1.3886e-02, -4.3285e-06, -2.4586e-07, -3.3601e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2168e-05,  8.3493e-02,  1.3725e-06,  4.2606e-06,  9.3172e-06,
        -5.5963e-04,  7.4826e-06,  5.8352e-06,  4.9398e-06, -6.3300e-05,
        -8.8285e-06,  1.9222e-05,  2.7764e-02,  1.4920e-05,  3.8223e-06,
         7.0490e-08,  1.6340e-06, -1.5610e-05,  1.3623e-05,  6.9030e-03,
        -9.3380e-07, -2.4908e-05,  2.0143e-06, -3.2409e-06, -1.1891e-06,
         1.1007e-05,  3.4428e-05, -8.4699e-07, -2.9813e-05, -3.4309e-05,
        -7.4000e-06, -1.9131e-06,  2.7274e-02, -8.5223e-06,  2.0367e-06,
        -1.1020e-04,  2.8277e-06,  2.7225e-05,  6.2399e-03,  1.2252e-05,
        -7.7506e-06,  1.8542e-05,  8.4088e-06,  1.3936e-02,  1.1487e-02,
         1.0504e-05,  1.2785e-05, -3.6755e-05,  5.0699e-07, -1.5148e-05,
        -6.7777e-07,  4.7262e-06, -5.8153e-06, -3.4234e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6109e-04, -8.3162e-02,  1.3304e-05, -2.7241e-05,  1.3195e-05,
         3.3455e-03,  3.5400e-06, -1.4920e-04, -5.7675e-05,  2.1800e-04,
         1.8486e-05, -5.3008e-05, -4.6870e-02, -2.7489e-05,  3.3505e-05,
         1.1405e-05, -1.6538e-05, -6.3563e-07,  1.1627e-04, -1.7315e-01,
         7.9885e-06,  8.3685e-05,  1.0257e-05, -9.0272e-06,  8.9835e-05,
         1.0177e-05,  4.4792e-05, -5.2538e-05,  7.4084e-05,  4.7578e-05,
         4.4685e-05,  4.4859e-05, -1.0446e-05,  2.5190e-05,  5.1779e-06,
        -1.2067e-01, -3.6017e-05,  6.1169e-05, -5.1306e-02,  4.6705e-06,
        -4.8820e-06, -5.0661e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4393e-04, -1.0964e-03,  8.6108e-06,  9.5625e-06,  8.4407e-06,
        -2.8816e-04, -1.1762e-05,  7.8066e-06,  3.3201e-05, -6.9528e-05,
        -8.7167e-06,  3.4512e-05,  1.7378e-02,  1.0971e-05, -2.2263e-06,
         2.7052e-06,  1.2441e-05,  5.0516e-06,  3.3859e-05,  6.2520e-02,
         2.8126e-05, -9.6639e-06,  1.3182e-05, -3.1233e-05,  1.8356e-05,
         1.5027e-07,  3.9704e-05, -8.7491e-06,  2.1012e-05, -1.4769e-05,
         9.5205e-06,  1.7900e-05,  9.9022e-03, -3.1919e-05, -1.4678e-05,
        -1.5392e-04, -1.4513e-05, -2.6787e-05,  4.6841e-02,  7.8000e-06,
        -7.3063e-07, -1.7661e-05,  7.4794e-07, -7.8349e-06,  1.5653e-05,
        -3.7739e-03,  4.5750e-06, -2.7633e-05, -2.1435e-06, -1.0250e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2250: [tensor([ 1.7117e-05, -4.7798e-02, -7.3411e-06, -1.7593e-02,  4.1872e-06,
         9.2567e-06, -2.5931e-02, -4.9855e-06,  3.2052e-06,  5.5815e-06,
         2.5682e-05,  1.5976e-05, -2.4967e-05,  5.8624e-06,  1.6867e-05,
        -1.5569e-05,  7.4365e-05, -1.9985e-03,  1.4208e-05,  3.5441e-06,
         1.3273e-05,  1.5093e-07, -1.0225e-02,  9.2369e-07,  9.2917e-06,
         7.9420e-06,  9.0124e-07,  8.6252e-06, -2.1621e-02, -2.6969e-06,
         1.3459e-05,  6.9761e-06, -1.9742e-05,  3.1009e-04, -5.3639e-06,
        -1.9440e-06,  4.8107e-06, -9.1148e-03, -4.9347e-06, -6.2823e-06,
        -1.0928e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0547e-04, -2.7805e-02, -8.9914e-06, -2.0031e-02, -2.9827e-06,
         1.0532e-05, -2.4958e-02,  1.6868e-05,  6.1886e-06,  5.4128e-05,
         4.4916e-06,  1.8960e-05,  1.8287e-05,  7.3282e-06,  4.1576e-06,
         6.2067e-06,  5.3643e-05, -2.2394e-02,  2.5106e-06,  3.5214e-06,
         2.4326e-05,  1.1569e-05, -1.1541e-02, -6.1442e-06,  1.8606e-06,
        -3.5110e-06, -1.2729e-05,  1.0003e-05, -2.5058e-02, -3.1827e-05,
        -3.3503e-06, -1.1631e-05, -4.4023e-06, -7.7151e-06,  1.5020e-05,
        -1.3889e-02,  1.9075e-05,  8.5450e-05,  5.5937e-06, -7.2225e-06,
         1.5286e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7413e-04, -1.6560e-02,  8.5842e-06, -2.6903e-02,  2.3681e-06,
        -2.8468e-06, -4.7464e-02, -8.6643e-06,  2.1120e-06,  4.0467e-05,
         1.0451e-05,  1.1113e-05,  1.4803e-06, -5.3114e-06,  1.0930e-05,
         2.2798e-05,  1.0311e-05, -4.0182e-02,  1.2432e-05,  1.0089e-05,
         1.1727e-05,  9.1768e-06, -3.6526e-02,  2.9732e-05, -2.1148e-06,
        -1.1077e-05,  7.0004e-06,  1.4391e-05, -2.4375e-02, -5.1310e-07,
         4.2814e-06,  7.7552e-06,  3.0783e-06,  7.2597e-06, -3.3241e-05,
        -8.9115e-06, -1.3060e-05,  5.6903e-06, -5.5484e-03,  3.2589e-06,
        -2.4266e-05,  2.5576e-05,  1.4766e-05, -2.0869e-05, -2.2597e-02,
         1.0365e-05,  4.1784e-05,  9.1939e-06,  9.7233e-06,  1.1043e-05,
        -1.8085e-05, -1.9069e-05, -9.0537e-06, -1.7700e-02, -2.4911e-05,
         8.6585e-06,  1.6347e-05,  8.2526e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7719e-05,  2.6746e-02,  4.2228e-06, -3.3372e-03, -1.2358e-05,
        -1.4465e-05,  3.4533e-02,  2.6953e-05,  1.0524e-05, -5.8181e-05,
        -2.0046e-05, -9.9696e-06, -1.3670e-05,  8.9064e-07, -2.5927e-06,
        -1.6036e-05, -2.3661e-04,  1.6574e-02,  1.0721e-05,  2.5087e-06,
        -2.0432e-06, -1.2972e-05,  3.9648e-02,  6.6085e-06, -2.4329e-05,
        -9.3779e-06,  4.7852e-06,  1.7612e-05,  2.4369e-02,  1.2036e-05,
        -2.6278e-05,  1.5374e-06, -2.0467e-05, -1.2582e-05, -8.9595e-06,
        -7.8152e-07, -1.5908e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8970e-04, -3.5563e-02, -6.8309e-06, -1.6352e-02, -2.9612e-05,
        -2.0457e-06, -2.6893e-02, -2.4912e-05, -5.4413e-06,  2.5301e-05,
        -1.8170e-05,  8.9370e-06,  4.7151e-07, -2.8738e-05, -1.5114e-05,
        -2.8447e-05, -2.7031e-05, -3.5324e-02,  6.1915e-06, -7.5247e-06,
         1.1974e-07, -1.8367e-06, -1.9327e-02, -2.3937e-06,  9.6371e-07,
         1.6302e-06,  4.7877e-06, -5.6397e-06, -7.4769e-03, -1.2196e-05,
        -1.3549e-06,  2.0014e-03, -2.3753e-05, -9.1836e-06,  1.3647e-06,
         1.2792e-05, -1.2678e-02, -9.0691e-06,  5.1054e-06, -1.1218e-06,
         1.4264e-03, -1.2369e-05,  8.7354e-06,  2.2887e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4760e-06, -2.4280e-02, -2.3707e-05, -1.8588e-02, -4.3819e-07,
         2.1558e-06, -3.6852e-02, -2.5669e-05,  4.1920e-06,  4.1098e-05,
        -2.5219e-05,  3.4065e-05, -2.6996e-05, -7.8546e-06,  1.2413e-05,
        -3.9259e-05,  1.4397e-05, -5.5871e-02,  1.4639e-05, -1.2705e-06,
         1.1716e-05,  7.3785e-06,  8.8129e-03,  1.0826e-05, -2.7448e-08,
        -3.6553e-05, -1.4387e-05, -1.3304e-05, -1.7776e-02,  2.1344e-05,
         1.6108e-05,  1.9442e-05, -7.2394e-06, -2.5043e-02, -1.4909e-05,
         2.7974e-06, -2.0195e-05, -2.8302e-06,  2.8881e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4129e-05, -2.6271e-02, -1.6156e-05,  5.8789e-03, -2.4639e-05,
        -9.4937e-07, -1.4737e-02, -9.1772e-06, -3.3420e-06, -1.8009e-05,
         1.4002e-05,  2.0132e-05, -1.6680e-05,  4.3111e-06,  8.7394e-06,
        -3.0268e-05, -1.7135e-05, -1.7966e-02,  1.2248e-05, -2.1182e-05,
        -2.1959e-05,  5.8094e-06, -1.4086e-02,  1.6303e-05, -1.9158e-06,
        -3.0836e-06,  4.3616e-06, -5.9863e-06, -1.5059e-02, -1.5035e-05,
         9.8415e-06, -8.5353e-06,  3.6432e-06, -9.5021e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7196e-04, -3.5173e-02, -7.5588e-06,  3.4368e-03, -1.3304e-06,
         1.6817e-05, -3.9576e-02, -9.3902e-06, -1.2974e-05,  1.0007e-05,
         1.6916e-05,  2.9912e-05, -7.3552e-06,  2.8048e-06, -7.1683e-06,
        -3.4129e-05,  7.3387e-05, -4.6732e-02,  1.9313e-05, -3.2364e-06,
         8.2425e-06, -3.4676e-05, -2.4017e-02,  8.4797e-06,  1.5281e-05,
         2.1086e-05, -1.0045e-05, -5.3705e-06, -4.1216e-02, -3.5019e-05,
         1.5543e-04, -2.7206e-05, -3.1308e-02,  9.1711e-06,  1.4096e-07,
         1.0060e-05,  8.5078e-06, -1.8946e-05, -5.9992e-07, -3.3008e-06,
         1.7337e-07,  2.2774e-05, -1.7197e-05,  3.0545e-05, -1.5075e-05,
        -3.5807e-05,  2.6093e-06,  1.2106e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4590e-04, -1.7285e-02, -4.0613e-05, -8.6047e-03,  1.7547e-05,
         4.4919e-06, -5.3311e-02, -1.0050e-05,  5.1909e-06,  1.4379e-06,
        -3.4027e-05,  8.9501e-06, -2.5866e-06, -5.0102e-06, -6.1833e-06,
        -9.4945e-06,  5.1046e-05, -3.3118e-02, -2.5830e-05,  1.0720e-05,
         4.1477e-05,  2.9905e-06, -3.9773e-02,  6.7314e-06, -1.6593e-05,
         1.1204e-05,  1.1256e-05,  9.3808e-06, -3.3498e-02, -4.0979e-05,
         8.3556e-05, -2.8021e-06, -2.5996e-05,  1.4348e-05, -2.4395e-05,
        -3.1274e-06,  2.4822e-05, -1.2724e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7594e-04,  1.0997e-05, -6.3508e-06,  1.3966e-04,  6.1410e-05,
        -6.3992e-06,  1.1547e-04,  1.5720e-06,  3.0583e-04, -1.1629e-02,
         9.2674e-06, -4.2192e-06, -6.6926e-07, -4.3339e-02, -2.9355e-05,
        -2.7571e-05, -1.2112e-06, -3.9743e-02,  2.3219e-05,  5.3217e-05,
        -5.2778e-02,  1.3912e-05, -8.2748e-07,  1.4116e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1397e-04, -2.3730e-05, -1.3827e-05,  1.7480e-03,  3.4625e-06,
        -4.6835e-05,  1.4865e-04,  3.6942e-05,  3.7736e-03, -6.0243e-02,
         4.0901e-05,  1.4070e-05, -3.1661e-05, -5.7516e-02, -1.0090e-05,
         4.0497e-05, -3.6668e-02, -2.4738e-05, -5.1675e-05,  4.3090e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9619e-05, -1.1080e-05, -3.0233e-06,  1.0190e-04, -2.6461e-05,
        -1.0970e-05,  7.3029e-05,  2.0910e-05,  1.2727e-03, -4.6435e-02,
        -1.1590e-05,  3.9679e-06,  8.6656e-06, -1.7489e-02,  7.9318e-06,
         1.3428e-05, -3.2120e-02, -6.9631e-07, -5.5215e-06,  7.8857e-06,
        -2.1223e-03,  1.5886e-06, -1.5112e-05, -5.1543e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2300: [tensor([-6.4141e-05, -6.0771e-06, -1.7164e-05, -1.9980e-05, -8.0153e-06,
        -2.9529e-05, -1.2295e-05,  2.5172e-06, -9.6134e-04, -1.4451e-05,
         6.8878e-07, -6.4660e-03, -2.5301e-05,  1.5141e-07, -6.6394e-03,
         1.3565e-05,  2.8417e-06, -7.3377e-03,  7.9222e-08, -4.1424e-06,
        -4.2771e-05, -2.0813e-05, -5.9182e-06, -1.6932e-05, -1.0258e-05,
        -8.4638e-03, -4.8862e-05, -8.2335e-06,  6.7203e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8961e-04, -7.1054e-06, -2.1915e-06,  5.0229e-06, -7.7907e-06,
        -1.5730e-05, -1.5150e-05, -5.6637e-06, -6.9527e-04, -9.9025e-06,
         5.3260e-07, -5.0907e-03, -1.7038e-05,  6.2757e-06, -7.6330e-03,
         4.4653e-06, -7.9586e-06, -9.7948e-03, -1.9170e-05, -6.9664e-06,
        -1.6361e-05, -1.3842e-05, -5.6140e-06,  6.1709e-06, -5.2810e-06,
         2.6787e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8890e-05, -1.6627e-02, -1.8791e-05, -9.3096e-06,  4.8165e-05,
        -4.2738e-06, -1.1878e-05, -4.1100e-06,  3.9630e-05, -2.5079e-02,
         1.7840e-05,  3.1892e-04, -1.3667e-05,  8.6276e-06,  1.5252e-06,
        -8.9781e-03, -1.6022e-05,  2.0912e-05,  1.5504e-05, -1.5212e-02,
         2.6241e-05, -9.8337e-06, -4.7667e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2993e-04, -8.1482e-03, -3.0758e-05, -5.3195e-06,  7.4738e-06,
         7.8725e-06, -4.0316e-06,  9.9549e-07,  1.0821e-04, -2.7855e-02,
         6.8058e-06,  7.7353e-04, -2.9196e-06,  7.4552e-06,  2.9614e-06,
        -1.0888e-02,  1.4870e-05, -2.8067e-05,  5.3442e-09,  7.5130e-06,
         3.4134e-06,  1.0803e-05,  1.2716e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9165e-04, -4.2438e-02, -1.2473e-05, -1.0766e-06,  4.0332e-05,
        -4.4967e-07, -4.2914e-05, -1.9368e-05, -3.0105e-05, -2.1811e-02,
        -2.3520e-06,  1.1791e-03, -7.9394e-06, -2.4273e-05,  9.9685e-06,
        -2.3827e-02,  6.7558e-06, -3.0425e-05, -3.6486e-05,  2.8054e-04,
         4.0647e-05, -2.7712e-06,  2.3933e-05,  5.2210e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1227e-04, -2.0249e-02, -9.9824e-06,  4.1668e-06,  6.9365e-07,
        -6.0365e-05,  2.6395e-06, -2.7331e-05, -2.3418e-03, -1.1689e-05,
         1.5689e-05, -3.1609e-06, -8.6622e-03,  8.8023e-06, -3.0790e-05,
        -3.9569e-06,  6.5007e-06, -4.3138e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7989e-05, -1.6825e-02,  5.1059e-06,  4.0727e-06, -7.2843e-08,
        -6.8517e-05,  6.8167e-06, -2.5422e-05, -2.6938e-03, -5.7009e-06,
         3.2416e-07,  5.9190e-06,  3.3618e-03,  1.7289e-06,  8.1326e-05,
         1.9501e-06, -6.2316e-03,  7.0158e-06, -6.7672e-06, -1.1463e-02,
        -1.9173e-05,  1.7407e-06,  1.3509e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9242e-05, -1.6055e-02,  2.3352e-06,  2.5732e-06,  5.1777e-06,
        -3.6668e-05, -1.4693e-05, -2.1387e-05, -3.7770e-03, -1.5036e-05,
         9.3422e-06,  1.1368e-05, -1.0626e-02,  5.9587e-06, -1.4853e-05,
        -2.1483e-06,  8.0022e-07, -2.0026e-03,  1.2957e-05, -1.5533e-05,
         1.6768e-05,  8.4110e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0733e-04, -1.4035e-02, -1.5287e-06,  6.0297e-05, -1.3944e-05,
        -8.5110e-06, -1.0787e-05, -5.3056e-03, -1.2975e-05, -4.0799e-07,
        -8.5848e-03,  9.6992e-05, -1.0257e-05, -6.6606e-06, -2.2939e-03,
         1.1442e-05, -5.3634e-05,  4.0219e-06,  1.3310e-06, -1.3248e-02,
        -9.6312e-06,  2.7793e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3373e-05, -1.2353e-02, -7.8805e-06,  3.8580e-04,  3.7559e-06,
        -4.8777e-07, -4.0053e-05, -6.3350e-03,  2.4596e-05,  6.7434e-06,
        -6.3222e-03, -2.8904e-05,  9.4406e-06, -3.0480e-06,  7.9298e-03,
         6.2086e-07, -1.1924e-04, -4.6763e-05,  5.5011e-03, -1.5187e-05,
         1.0245e-05, -1.3188e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2078e-05, -2.1119e-02,  1.5238e-05,  8.6579e-04,  3.0005e-06,
        -5.0068e-06, -1.6961e-05, -1.1212e-02,  2.0308e-05,  1.0196e-05,
        -3.7970e-03,  1.3274e-05,  4.3046e-06,  1.1132e-05,  6.8575e-03,
        -9.7822e-06, -1.0391e-04,  8.8022e-06, -4.7548e-05,  8.8114e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5491e-05, -6.3516e-02, -5.1591e-05, -1.9302e-05,  1.5874e-04,
        -6.1643e-06,  1.0834e-06, -1.8568e-02,  4.7394e-04,  1.4922e-06,
        -2.2339e-05, -3.5078e-03, -2.5077e-05,  4.2447e-05,  3.1599e-05,
         7.2723e-05, -1.8233e-02,  2.5448e-05,  2.9626e-05,  3.2950e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2350: [tensor([-1.5753e-04, -3.7875e-02,  2.1705e-05, -7.4746e-03, -2.9002e-02,
        -4.0646e-05,  7.6511e-06,  5.0709e-05,  1.6510e-05, -2.0547e-05,
        -1.8403e-02,  1.9000e-05,  3.2133e-03, -1.2885e-02,  6.1841e-05,
         1.0585e-05,  2.1370e-05, -2.9314e-02,  3.5782e-05,  7.8793e-04,
        -1.2389e-05, -9.3082e-06,  1.1676e-05,  2.9065e-05, -1.0056e-05,
         4.2377e-03,  7.6796e-06,  8.3238e-06, -1.9833e-02, -2.9148e-05,
         2.9525e-05,  1.9471e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2938e-04,  3.0381e-02, -3.6249e-06,  9.2296e-03,  5.8710e-03,
         1.8571e-05,  4.0688e-06, -1.3774e-04, -6.4483e-06, -6.4614e-07,
         1.3013e-02, -4.0025e-06,  6.0651e-03,  1.6919e-02,  1.4935e-05,
        -9.3439e-06,  6.8481e-06,  7.7659e-03, -1.4838e-05, -1.3429e-03,
        -1.9235e-06, -6.8618e-06, -2.3595e-06, -6.7372e-06, -8.0015e-07,
        -2.2443e-03, -1.5937e-05, -3.0064e-06, -3.4288e-03, -1.4236e-05,
        -1.4684e-05, -2.2511e-05, -6.2412e-06, -1.3347e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.7886e-05, -3.5933e-02, -5.9525e-06, -1.5925e-02, -6.6822e-03,
        -1.9189e-05,  1.1927e-05,  1.0318e-04, -2.9041e-06, -1.5130e-05,
        -2.1740e-02,  4.7490e-07,  4.8256e-03, -9.0457e-03, -5.8836e-06,
        -8.3765e-07, -1.1204e-05, -1.0022e-02,  4.8968e-05,  1.2131e-04,
         2.6090e-05,  2.9391e-06,  2.9924e-05, -2.5825e-05,  1.7612e-05,
         3.0223e-03, -3.7336e-07,  5.6341e-07, -3.4060e-05,  2.6603e-06,
        -4.9808e-06, -2.0280e-05, -1.6353e-02,  3.3637e-06,  2.0812e-06,
        -1.7410e-05, -5.2959e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0682e-06, -1.0520e-02, -4.8142e-06,  1.9817e-03, -4.7655e-03,
        -1.9090e-05, -2.8937e-06,  2.5312e-04,  8.3758e-06, -1.1503e-05,
        -2.2067e-02,  1.2946e-05, -3.7141e-03, -2.4222e-02, -1.4399e-05,
        -1.7590e-05,  2.1313e-06, -3.4362e-02,  4.7849e-05,  1.8581e-04,
         1.0659e-05, -1.0454e-05,  7.9358e-06, -2.9786e-02, -2.3866e-06,
         1.8118e-05,  3.0215e-05, -1.6849e-05,  2.2563e-05,  1.5881e-05,
        -1.3983e-05,  9.6944e-06,  2.4805e-05,  1.3860e-06, -1.1672e-05,
         5.0283e-06,  1.0065e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6707e-04,  8.8986e-02, -2.0707e-05,  9.2537e-06,  5.6427e-05,
         3.1463e-05, -9.0894e-06,  5.4382e-05,  9.7692e-06, -2.3571e-05,
         4.5852e-02,  3.4252e-05,  6.6469e-05,  3.3073e-05, -6.7871e-05,
         2.6834e-02, -3.1072e-05, -4.7971e-04, -7.5312e-05, -2.1709e-05,
         2.1571e-05, -3.1880e-06, -3.2090e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4178e-05, -8.6495e-02,  2.1261e-05,  3.5612e-05, -7.7719e-06,
        -4.3406e-06,  1.9403e-05, -1.5315e-06, -1.9012e-05,  7.3648e-06,
        -2.5616e-03, -3.1925e-05, -1.0404e-05, -4.1798e-05,  5.1786e-07,
        -4.7247e-02,  2.8531e-05,  2.5158e-05,  4.4687e-05, -9.6589e-07,
        -2.9405e-05,  1.2298e-05, -1.8091e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1622e-04,  1.3999e-01,  1.7553e-05, -4.0034e-06,  1.0384e-04,
         8.8041e-05, -3.1826e-05,  3.9735e-05, -9.4511e-05, -1.1009e-04,
         1.1282e-02, -5.9698e-05,  5.7033e-05,  7.6647e-06, -7.0146e-05,
         1.1838e-01,  7.4869e-05, -3.4832e-04,  1.4306e-04, -2.4682e-05,
         1.1550e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6937e-04, -1.0484e-01, -7.6561e-05, -9.7801e-06, -6.4637e-06,
        -6.5563e-05, -3.8491e-05, -1.5715e-04, -9.6344e-05, -3.8510e-05,
        -1.1173e-02,  4.9286e-05, -1.0875e-04, -9.1274e-05,  1.3978e-05,
        -5.1127e-06,  8.4264e-05,  1.3456e-04,  6.7796e-05, -1.5656e-01,
         5.0379e-05,  2.7190e-06,  8.4988e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8044e-05, -9.1918e-02, -1.8902e-05,  1.7835e-05, -3.2912e-06,
        -3.5151e-05,  3.0066e-05, -4.3293e-05,  5.2679e-05, -2.4486e-06,
        -1.0084e-02, -3.8314e-05, -5.4959e-05,  2.1058e-05,  9.6229e-06,
         8.1099e-06, -3.2630e-05,  5.8338e-05, -3.4790e-02, -4.5032e-05,
         4.5636e-05,  1.9856e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2894e-04,  2.9858e-02,  2.1303e-05,  1.0250e-05,  7.1432e-05,
         1.5130e-05, -1.9303e-05, -3.3967e-05,  2.4261e-05,  7.4221e-06,
         1.7796e-02,  1.8974e-05, -2.6573e-05, -1.0284e-05, -1.6291e-05,
         4.1246e-02, -7.9126e-06, -2.9508e-04, -6.1258e-06, -6.4001e-05,
        -1.4728e-05, -3.7900e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6590e-06,  5.1031e-02,  3.5377e-05, -3.8686e-05,  7.2281e-05,
         2.4908e-05, -4.8076e-05,  1.1303e-04,  1.3554e-04, -4.1512e-05,
         4.4337e-02,  4.5400e-05,  5.4167e-05,  1.9401e-05, -1.3176e-05,
        -1.0722e-02,  4.2696e-05,  2.9641e-05, -1.1200e-05, -5.0642e-05,
         4.7958e-02,  2.2958e-05, -2.5136e-06, -2.7855e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2803e-04,  2.7580e-02,  1.3882e-05,  1.3234e-05,  3.4460e-05,
         1.8620e-05, -1.2239e-05, -2.3157e-06,  1.6523e-05,  2.1261e-05,
         1.1400e-02, -8.0014e-06, -1.0405e-05, -1.5282e-05, -6.2912e-06,
        -8.1270e-03,  1.1390e-05,  9.4708e-06,  9.9966e-06,  1.5119e-05,
         2.2513e-05, -9.4116e-06, -1.1868e-05,  1.3287e-05,  1.4959e-05,
         1.3800e-05,  3.8247e-05,  1.9297e-02,  1.1680e-05,  5.1847e-06,
         5.4214e-06,  1.0880e-05, -1.4152e-05,  8.8471e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2400: [tensor([ 6.1958e-06,  7.8902e-04, -5.7499e-06, -8.7039e-06, -1.3368e-06,
         2.0960e-07, -5.4066e-06,  1.3549e-04, -3.9545e-03, -1.2788e-05,
         5.1497e-06, -5.4955e-03, -8.0554e-06, -1.6299e-06, -9.9988e-06,
        -1.4548e-03, -2.0051e-05, -7.5100e-06, -5.9267e-06, -2.8266e-03,
        -1.5599e-06, -4.2611e-06, -9.0188e-07,  1.8889e-05, -4.5946e-06,
        -2.2326e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9639e-05, -5.0979e-06, -1.0084e-05, -1.4541e-05, -5.3040e-06,
         5.3808e-07, -1.2542e-05,  2.4696e-04, -1.9557e-03, -1.3941e-05,
        -3.0785e-06, -6.0663e-03, -8.1352e-06, -3.0130e-06, -1.5590e-06,
        -2.7327e-03, -1.7825e-05, -8.5363e-06,  2.0893e-06, -4.7588e-06,
         7.0222e-07, -1.3389e-06, -1.8577e-06, -4.8300e-06, -9.9386e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3196e-05,  5.6125e-05,  1.4754e-05, -6.5013e-06, -9.8138e-06,
        -4.0953e-06, -1.2504e-05,  1.0326e-04, -2.0649e-03, -4.0825e-06,
         2.6045e-06, -2.6192e-03, -1.2006e-06, -6.5123e-06, -5.0215e-06,
        -1.1210e-03, -1.1376e-05,  4.7885e-06,  5.5296e-06, -2.6215e-03,
         2.6733e-06,  6.5767e-07, -1.0954e-05,  2.6482e-06, -6.7370e-06,
         1.1208e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5381e-05, -2.4451e-05, -1.6070e-05, -1.0314e-05, -6.9929e-06,
        -9.9167e-06,  7.7627e-06,  7.7685e-04, -1.9571e-03, -8.5319e-06,
        -6.2504e-06, -7.0035e-03, -1.3184e-05, -3.6914e-06,  4.5787e-06,
        -1.0929e-03,  3.3233e-06, -8.3511e-06, -5.1161e-06, -4.9181e-03,
        -1.1190e-05, -1.3664e-07,  6.7928e-07, -4.7646e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3060e-04, -7.4289e-06,  3.1779e-06, -7.5411e-06, -4.7455e-06,
         4.7230e-06, -7.6578e-06,  3.0321e-05, -2.4623e-03, -8.1879e-06,
        -1.8269e-06, -1.1678e-02, -1.4420e-05, -2.0004e-06,  2.1830e-06,
        -1.7041e-03, -6.0858e-06,  1.5848e-03,  1.4597e-06,  9.0324e-06,
        -6.0644e-06,  1.6631e-06,  1.9740e-03, -2.7096e-05, -2.9801e-06,
        -1.6826e-06,  7.7202e-07,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.1220e-05,  2.1258e-04, -8.6751e-06, -9.2657e-06, -1.3728e-05,
        -5.6246e-06, -1.1267e-05,  5.3058e-04, -4.2795e-03, -1.6707e-05,
         4.3676e-06, -8.2839e-03, -1.8885e-05, -9.0110e-06, -9.5269e-06,
        -1.9050e-03, -3.9925e-06,  2.2129e-03, -1.5928e-05, -2.5427e-05,
         3.7101e-06, -4.1512e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0051e-05, -1.3685e-05, -6.8141e-07, -9.8408e-06,  3.0773e-06,
        -4.4906e-06,  1.2567e-06,  1.1660e-04, -2.0979e-03, -1.1108e-05,
         2.4280e-06, -5.6903e-03, -2.6035e-06, -2.8417e-06,  4.0456e-07,
        -5.5167e-04, -7.7259e-06, -7.8274e-06, -7.2886e-03,  5.1844e-06,
        -1.8599e-05,  1.5032e-05, -5.1323e-06, -5.5195e-06, -3.7748e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1412e-05, -2.1944e-05, -5.4398e-06, -8.0411e-06, -5.0569e-06,
        -5.2302e-06, -2.8371e-06,  2.2586e-04, -2.3286e-03, -1.4948e-05,
         2.5954e-06, -4.7393e-03, -5.0639e-06, -9.7475e-07, -7.3705e-07,
        -1.2014e-03, -1.2963e-05, -5.9448e-06, -2.6050e-06, -1.3891e-06,
         5.1536e-06, -1.0980e-05,  3.7504e-04,  8.6002e-05, -4.0897e-06,
        -1.4934e-05, -1.3171e-06,  8.3287e-07,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1901e-06, -1.4160e-05, -8.4776e-06, -2.9996e-06, -4.9304e-06,
        -1.0836e-05, -8.2336e-06,  1.4579e-03, -1.2358e-03, -7.7521e-06,
         2.9341e-07, -2.9418e-03, -6.1583e-06, -5.7851e-06, -5.7386e-06,
         6.0157e-04, -2.7784e-05, -1.0572e-05,  2.5488e-05, -4.0985e-03,
        -2.5613e-06, -3.3225e-06, -1.2149e-03, -4.4073e-06,  3.6047e-06,
        -3.9705e-03, -6.2977e-06, -8.6456e-06,  9.3267e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9877e-05,  1.2732e-04,  1.8343e-06, -6.3735e-06, -5.6564e-06,
        -4.1216e-06, -5.3267e-06,  1.6258e-04, -3.4943e-03, -7.8379e-06,
        -4.6779e-06, -4.4261e-03, -3.1995e-06, -4.2023e-06, -7.4998e-06,
        -9.2154e-04, -5.1305e-05, -5.6855e-06,  8.0292e-04, -5.5565e-06,
        -4.1660e-03, -1.4103e-06,  1.2269e-07, -9.3265e-07, -5.7415e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8543e-05,  1.6298e-04,  1.4763e-04, -1.3417e-05, -1.3178e-05,
        -4.8162e-06, -1.1195e-05,  1.0681e-03, -4.1462e-03, -1.4737e-05,
         2.7023e-06, -6.6129e-03, -1.3343e-05, -5.4835e-06, -3.1885e-06,
        -1.4647e-03, -1.1354e-05, -8.5763e-06, -3.2110e-07, -6.0948e-06,
         1.3898e-06, -1.0951e-05, -3.6237e-05, -1.5572e-05,  2.3980e-03,
         4.5989e-06, -1.6389e-05,  1.5002e-06,  6.9880e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2461e-05,  1.8881e-05, -3.1545e-05, -8.4613e-06, -8.4288e-06,
        -1.7074e-06, -7.2695e-06,  4.4610e-04, -4.1528e-03, -1.3897e-05,
         8.8890e-06, -6.1990e-03,  1.2247e-06,  6.0922e-07,  2.7617e-06,
        -3.1259e-05, -3.4920e-05, -4.5003e-06,  1.7509e-04, -1.1735e-02,
        -1.2163e-05, -5.2384e-03, -1.0115e-05, -8.6589e-07, -1.8769e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2450: [tensor([ 7.8960e-05, -6.6265e-02,  4.4036e-06,  3.8038e-05,  3.1938e-06,
        -5.4257e-05,  3.9615e-05, -4.1725e-02,  3.0675e-05,  7.6508e-03,
        -3.6981e-07,  1.9329e-05,  7.0109e-07, -3.6213e-05, -7.7026e-05,
         1.1005e-05,  7.6059e-06, -5.2149e-05, -5.3974e-05, -3.4512e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.6482e-05, -3.2769e-02,  1.3116e-05,  4.6292e-05,  6.8226e-06,
        -9.5122e-06, -1.0951e-05, -2.1258e-02,  4.2780e-05,  2.0783e-03,
         1.1408e-05, -1.3991e-06, -4.3558e-06, -1.3128e-06, -1.3600e-02,
        -3.6968e-06,  2.0139e-06,  2.1404e-04,  3.3350e-06, -4.8427e-05,
        -1.9781e-05, -1.5908e-02,  1.4012e-05,  1.8551e-06, -1.5210e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0102e-04,  1.1873e-05,  1.1166e-06,  7.9961e-06, -9.9123e-03,
        -1.3928e-06,  1.1472e-06,  2.2396e-05, -4.6369e-03,  6.5855e-06,
         1.9046e-06,  2.9119e-06,  2.5350e-06, -5.7569e-06, -1.8009e-02,
        -1.2208e-05, -7.5312e-06,  1.6064e-05, -1.9294e-06,  2.9245e-07,
        -1.0218e-06,  1.1325e-05,  8.4187e-05, -5.9029e-06, -4.5120e-06,
         2.8391e-06, -3.0661e-06, -7.2029e-06, -3.0572e-03, -5.4342e-06,
         1.1337e-05,  7.6653e-06, -1.1560e-02, -1.6238e-05, -1.1364e-07,
        -4.8853e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1963e-05, -2.7516e-06, -3.8555e-06,  1.4300e-05,  2.3207e-03,
        -6.0277e-06,  9.6253e-06,  1.8511e-05, -9.2926e-03,  9.5408e-06,
         6.5993e-06, -6.1271e-06, -2.0272e-06,  1.5319e-05, -1.0201e-02,
         5.7402e-06,  1.3824e-06, -1.0501e-06, -1.0899e-05,  4.2942e-06,
         6.4211e-07, -7.4745e-06,  4.8241e-05,  2.8985e-06,  3.5707e-06,
         8.4272e-06, -9.8254e-06, -9.1880e-03, -1.1449e-05,  1.6430e-04,
        -1.3584e-05,  2.1413e-04,  9.0176e-06,  9.1521e-06,  4.2298e-06,
         2.0039e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4723e-05,  9.9785e-06,  8.1572e-07, -8.1873e-06, -1.0054e-02,
        -4.2526e-06,  5.3473e-06,  7.5674e-06, -3.6920e-03,  2.5323e-06,
         1.0099e-06,  7.8785e-06,  2.6016e-06,  1.9390e-05, -7.3284e-03,
         1.7600e-05,  7.1240e-07,  3.5361e-06, -3.8803e-06, -2.2322e-06,
         8.6377e-07, -3.9602e-06,  5.0523e-05,  1.0543e-05, -6.5180e-06,
         9.1404e-06, -2.6108e-06,  2.9200e-06, -1.1456e-05, -3.5944e-03,
        -5.4758e-06, -1.6141e-06,  8.9915e-04, -4.3894e-03, -5.0230e-06,
         9.5345e-06, -3.3247e-07,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1458e-04, -2.2905e-02,  4.3609e-06, -2.3924e-06, -9.8428e-03,
         1.1843e-05, -1.5201e-06,  1.5883e-05, -1.3968e-05, -7.0121e-06,
         8.4129e-05,  1.9954e-06,  3.0018e-03, -1.6000e-02,  8.9524e-06,
         1.0519e-05,  4.1173e-06,  2.4704e-05,  9.9951e-06, -2.3752e-02,
         1.3043e-05,  7.9126e-05, -1.1947e-05, -1.0409e-05, -7.7354e-06,
        -5.4548e-08, -1.4475e-02,  8.9279e-06,  1.3214e-03,  3.7331e-06,
        -1.1610e-05, -2.9420e-06,  1.5322e-05,  1.9909e-05, -1.4705e-05,
         2.7804e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5845e-05, -3.0735e-02, -1.6828e-05, -5.4135e-06,  3.4623e-03,
         7.4017e-06, -1.3379e-06,  2.1021e-06, -1.5181e-05, -1.4043e-05,
         3.3353e-05,  1.7464e-05,  2.3149e-04, -1.2893e-02,  1.6838e-05,
        -4.0544e-06,  6.2266e-06, -2.7221e-06,  5.0323e-05, -1.9952e-02,
        -1.7177e-05,  7.1679e-06,  2.4054e-05, -7.3349e-06,  4.4324e-06,
        -1.8428e-05, -1.4196e-02,  9.5563e-06,  1.7165e-03,  2.9685e-06,
        -5.0590e-03, -5.6755e-06,  1.0612e-05, -3.4866e-06, -1.2599e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7449e-05, -7.7008e-03,  1.2991e-05,  8.5311e-06,  3.3379e-03,
         1.5013e-05, -2.2341e-06,  1.6936e-05, -1.4077e-05, -4.9242e-06,
         3.3181e-05,  1.4597e-05,  3.9474e-03, -3.0105e-02,  1.2368e-06,
         2.3581e-05,  2.2427e-06, -7.0733e-08,  4.0081e-05, -3.2128e-02,
         4.9256e-06,  2.0884e-04,  2.3673e-05, -2.4846e-05, -8.6568e-06,
        -3.1153e-06, -2.5438e-02, -8.1052e-08,  1.0673e-03,  4.5550e-06,
         1.4583e-05, -1.4227e-05, -1.4026e-05, -3.1940e-06,  2.1564e-05,
        -9.6374e-06, -2.0435e-06,  1.6690e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8832e-05, -1.1274e-05,  3.3604e-05, -2.2259e-02, -1.2821e-05,
         7.8150e-06, -2.4460e-05,  7.3766e-06, -3.6889e-06,  3.1326e-03,
         3.6202e-07, -1.0973e-05, -2.2183e-05, -6.7005e-06, -2.3009e-02,
        -2.3892e-05,  4.1695e-06,  2.5047e-07, -3.0165e-05,  8.9067e-06,
        -3.5989e-02,  9.8485e-06, -1.7319e-05,  4.3775e-06,  1.3601e-05,
         2.4465e-05,  2.5039e-05,  1.2074e-05, -3.7151e-06,  2.0081e-06,
         2.8473e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3878e-04,  6.4691e-08,  1.3699e-05,  5.8964e-03,  4.4261e-07,
         1.0360e-06, -5.0002e-06, -2.5463e-06, -5.4846e-06, -2.8885e-02,
         5.6006e-06, -5.2475e-06, -9.6402e-06,  1.8360e-05,  3.9620e-03,
         8.0494e-07, -9.0349e-06, -8.9706e-06, -9.2925e-06, -1.4221e-05,
        -1.8279e-02, -1.8812e-05, -7.4760e-06, -8.7284e-06, -1.1044e-05,
         1.3094e-05, -1.2050e-05, -9.2465e-03, -5.6224e-06,  1.6073e-04,
         7.2641e-05,  3.1033e-06,  3.2458e-06, -3.8142e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1857e-05, -4.6192e-06, -1.3297e-05, -2.3118e-02, -3.8115e-06,
        -4.5223e-06, -7.5368e-07,  6.6060e-06, -6.5126e-07, -1.4059e-03,
         8.0096e-07, -8.3242e-07,  6.0132e-06,  1.1167e-05,  1.5957e-02,
         2.1146e-06,  2.8007e-06,  1.6606e-06, -1.5945e-05,  1.2370e-05,
        -1.3225e-02, -8.2156e-06, -4.8280e-06, -9.2354e-06, -1.2992e-02,
        -9.2273e-06, -6.0625e-06, -3.3371e-06, -1.2006e-05, -1.2393e-05,
         4.8155e-06, -1.0892e-05,  2.1287e-05, -3.8086e-06,  3.1668e-06,
        -4.9324e-06,  2.1311e-05,  7.9635e-06, -7.0337e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4919e-05, -5.1804e-03, -7.3875e-06,  5.7089e-06,  4.2137e-06,
         1.4963e-04,  5.8663e-06,  1.0987e-05, -3.7606e-06,  2.8454e-06,
        -3.5977e-06, -5.2182e-06, -5.8975e-06,  2.9076e-06, -1.0917e-05,
        -1.4331e-06, -4.5153e-06, -1.8282e-02,  4.0612e-06,  8.5716e-06,
         3.5865e-06, -1.0380e-06, -2.8183e-06, -1.3441e-02,  1.1820e-06,
        -6.8633e-06,  8.9403e-06,  8.4240e-06,  9.0179e-04,  5.2794e-06,
         8.9183e-06, -1.0153e-02, -1.2312e-06, -8.8158e-06,  6.5795e-06,
         4.6226e-06, -1.0707e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2500: [tensor([-2.2402e-04,  4.3967e-05, -3.3121e-06, -1.2345e-05,  1.3305e-04,
         4.9502e-03, -4.1678e-05,  1.6320e-05,  3.9920e-06,  1.8206e-05,
        -1.1848e-05,  4.5423e-07, -4.3352e-02,  3.6374e-05, -1.0029e-05,
        -2.2452e-05,  4.2193e-05, -1.6097e-02, -3.1704e-05, -5.4276e-06,
         7.4449e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6702e-05,  9.5590e-06, -1.8301e-05,  3.5962e-06,  2.8025e-04,
         3.6299e-03,  1.8638e-06,  6.2245e-06,  1.4903e-05, -2.0443e-05,
        -9.6638e-06,  8.7497e-07, -1.5305e-02,  5.1518e-06,  3.0881e-06,
        -2.7987e-05,  5.5810e-03, -1.1089e-05, -9.2379e-07, -4.3340e-05,
         3.5392e-04, -1.8596e-02,  1.6838e-05, -2.0094e-05,  1.6104e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5593e-04, -1.7678e-02, -5.8442e-06,  5.6963e-06, -3.4883e-03,
        -5.8987e-06,  2.7743e-05,  1.4309e-05, -1.5072e-05,  6.8289e-05,
         4.2875e-05, -7.3620e-06,  2.6649e-05, -8.3293e-06,  1.3692e-05,
        -9.3269e-06,  1.1624e-04, -4.0380e-02,  2.1571e-05, -2.3829e-02,
         1.0954e-05,  4.0971e-04,  1.5326e-05, -9.9690e-06, -2.8184e-05,
        -1.5954e-02,  1.3995e-05,  1.3522e-03, -8.0023e-07, -4.1176e-05,
         8.1131e-06, -3.8222e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4302e-04, -1.3695e-02, -4.9341e-06, -3.7219e-05, -4.1638e-02,
        -2.3483e-05, -6.8055e-06,  1.1224e-05, -5.4164e-06,  2.9768e-05,
         2.2529e-05,  1.7562e-06, -1.8437e-05, -7.0748e-06, -8.7036e-06,
         9.6125e-06,  1.3494e-03, -3.6634e-02,  1.4892e-05,  1.5908e-03,
         6.0199e-06,  3.9767e-05, -3.8839e-06, -4.9736e-06,  6.0388e-07,
        -8.1638e-03,  2.3838e-05,  2.5523e-03, -1.3803e-05,  2.0747e-03,
         3.4076e-05, -1.4968e-06, -1.2511e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7657e-04, -5.8279e-02, -1.2633e-05, -7.6458e-07,  3.1368e-03,
        -3.0352e-05,  2.3307e-05, -2.7655e-06,  1.6485e-05,  1.1129e-04,
         1.0822e-05, -1.4500e-05, -6.7191e-07, -1.0321e-05, -3.3322e-05,
         1.1541e-06,  9.0018e-05, -1.3738e-02, -1.0165e-05,  3.4375e-03,
         1.6972e-05,  1.4945e-04,  2.9291e-05, -6.2571e-06, -6.3981e-06,
        -1.5017e-02, -1.9671e-05,  2.5543e-04,  6.3554e-07, -2.4801e-02,
         2.8921e-05, -2.8567e-06, -5.9728e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8719e-04,  1.0295e-03, -1.1438e-05, -5.5978e-06,  3.2856e-03,
        -9.3578e-06, -1.6294e-05, -1.2792e-05,  3.7820e-06,  3.4028e-06,
        -2.6726e-05, -7.0374e-07,  8.6839e-06, -7.9055e-07, -6.8546e-06,
        -1.4854e-02, -5.2692e-06, -1.6378e-05, -8.1600e-06, -6.0110e-07,
        -2.3805e-02, -5.8412e-06,  2.5668e-03, -1.0588e-05,  2.7126e-04,
         1.1628e-05, -9.7783e-03, -1.8786e-05, -6.7860e-06,  1.2096e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3030e-04, -2.8540e-02, -1.0065e-06,  3.1657e-06,  3.3690e-03,
        -2.1279e-05,  3.3859e-06,  1.5687e-05, -2.2820e-06,  8.3609e-07,
         1.7629e-05,  1.8778e-05,  1.4850e-04, -8.9569e-06,  2.5663e-05,
        -1.0794e-02, -2.7442e-05, -1.6289e-05,  1.3978e-05, -1.2691e-08,
        -1.3502e-02,  1.0971e-05, -2.6030e-05,  2.6688e-05,  1.7268e-03,
         7.1810e-06, -2.4272e-02, -2.0189e-05, -1.7725e-05,  3.6327e-06,
         2.5769e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6342e-05, -2.2737e-02,  1.8698e-05,  9.3048e-06, -1.5344e-02,
        -3.2311e-06, -4.9419e-06, -8.0484e-06, -7.4896e-06, -1.8680e-06,
        -7.6867e-06,  1.5562e-05,  4.6872e-05, -3.4626e-06,  8.6784e-06,
        -5.5035e-03, -1.0345e-05, -6.1318e-06,  4.7614e-06, -2.6392e-06,
        -2.6831e-03,  1.4339e-05,  2.0207e-03,  8.2784e-07,  6.0983e-06,
         6.7505e-06, -3.9537e-06,  6.3622e-06, -2.9393e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3079e-04,  1.2390e-01,  1.9452e-06,  1.2147e-04,  1.6920e-01,
        -2.7940e-05, -1.2186e-04, -5.6646e-05,  2.8970e-05,  7.7706e-05,
        -1.4273e-04, -5.7047e-05, -1.4425e-03,  3.2100e-05, -2.4096e-06,
         7.4508e-07, -1.8212e-04,  7.5623e-05, -1.7799e-05, -3.3584e-05,
        -3.9605e-04, -4.4907e-05,  3.9178e-05, -1.5704e-03,  4.3341e-05,
         1.2357e-04, -1.2304e-04, -1.9461e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2206e-04, -1.3941e-01,  4.6881e-05, -6.7655e-05, -5.7963e-03,
        -5.1751e-06,  1.2554e-04,  3.3475e-05, -1.0620e-05, -7.8452e-05,
        -3.4845e-05,  1.0725e-04,  2.6423e-04, -1.0351e-05,  5.8618e-05,
         6.7558e-06,  1.6005e-04, -2.0871e-05, -3.8852e-05, -5.3161e-05,
        -5.0376e-02,  9.9500e-05,  1.5868e-03,  3.8662e-05,  6.3474e-05,
         5.7070e-05, -1.3295e-06,  5.8361e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.0577e-05,  5.2153e-02,  1.2927e-05,  8.0803e-05,  3.7044e-02,
        -1.2135e-05,  4.2586e-06, -4.8360e-05, -5.3657e-06,  4.2275e-05,
        -9.2892e-05, -2.8944e-05, -3.3577e-04, -1.3158e-05,  4.8566e-05,
         2.7608e-06, -1.0287e-04, -2.7837e-05,  2.5570e-05,  2.2079e-05,
         2.9373e-02, -4.3107e-06, -4.2762e-03, -1.0966e-05,  1.8623e-05,
         4.5597e-05,  6.2128e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9791e-04, -1.0007e-05, -4.2175e-05,  1.7833e-05,  3.1916e-05,
        -7.9935e-06, -2.5098e-05, -3.7121e-05, -1.6439e-05, -8.6819e-05,
         5.1880e-03,  5.3451e-06, -1.3531e-05,  1.2103e-05,  4.0544e-06,
        -1.2846e-02, -2.9798e-05,  5.7585e-03,  4.3841e-06,  2.0525e-06,
        -1.5031e-05,  1.0564e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2550: [tensor([-8.7984e-05, -2.2589e-02,  7.1179e-06,  5.3435e-05,  3.1760e-05,
         2.6352e-06, -1.3273e-05, -8.4874e-06, -1.1304e-02, -6.0106e-06,
         3.7030e-03,  6.3560e-05,  2.2798e-05,  2.8883e-06,  8.4549e-06,
         1.3946e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8381e-04, -2.3516e-02, -4.9644e-06,  6.8760e-06,  6.5814e-06,
        -2.8529e-05,  1.4268e-06,  3.4871e-06, -5.8556e-03,  1.3980e-06,
         1.3920e-04, -3.5090e-05, -1.3183e-05, -1.9184e-02,  1.5724e-05,
         1.9832e-05, -2.0221e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1471e-07, -2.8312e-02, -5.7237e-06,  2.6813e-04, -1.4022e-05,
        -1.1114e-05,  2.5528e-05, -9.8294e-06,  4.0203e-06, -1.4294e-05,
        -9.7365e-03, -1.9473e-05,  1.3341e-03, -1.5826e-05, -1.9871e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0745e-05, -2.3085e-02, -7.8990e-06,  9.0939e-05,  2.5388e-05,
        -3.6879e-05,  8.4494e-06, -1.1027e-05, -1.1576e-06,  3.8093e-05,
        -9.8057e-03,  1.3811e-05, -2.8507e-05, -2.3887e-05, -2.9561e-02,
         1.2171e-05, -1.7828e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9600e-04, -3.6555e-02, -3.7818e-05,  2.4626e-04, -3.6595e-05,
        -1.2909e-05,  2.9422e-05,  3.3712e-05,  1.5527e-06,  3.8171e-06,
        -2.5470e-02, -6.2541e-06,  5.7068e-04, -1.3051e-05,  2.3104e-05,
         3.9609e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9732e-05, -4.3722e-02,  1.1024e-05, -9.3557e-06, -3.2654e-05,
         3.4341e-03,  4.0535e-06,  1.0027e-06,  1.7092e-05,  8.5729e-06,
         2.3213e-05,  1.6022e-05,  8.1019e-06,  1.9367e-05, -1.1509e-05,
        -1.1688e-05, -7.6398e-06,  1.4026e-05, -2.3454e-02,  6.3344e-06,
        -7.4298e-06,  1.6171e-05,  1.0183e-05,  7.1294e-06, -5.0941e-06,
        -1.9796e-05, -2.3785e-05,  6.9088e-06, -2.8467e-02,  1.2271e-05,
         2.9463e-05,  1.1908e-03, -1.6835e-06, -1.4542e-06,  2.1892e-07,
        -1.9773e-06, -3.1544e-07,  1.0232e-06, -1.1933e-02,  5.2548e-06,
        -6.7043e-03, -8.1621e-06, -6.2274e-06, -2.0063e-02,  2.4641e-06,
        -3.2885e-06,  1.9364e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2769e-05, -5.0090e-02,  9.7265e-06, -1.9849e-05,  2.3566e-06,
         1.4843e-04,  1.6378e-05,  2.4947e-05, -1.4758e-05,  1.4057e-06,
         4.9931e-05, -6.4531e-06,  1.7218e-05,  3.5990e-05, -7.1989e-06,
        -4.4843e-06,  1.8441e-05,  1.1815e-05, -2.8104e-02,  1.3732e-05,
         5.2385e-05,  1.0714e-05,  9.0241e-06,  2.4103e-05,  2.9440e-06,
         3.0513e-06,  1.9401e-05,  2.4747e-05, -2.8128e-02, -2.9382e-06,
         3.4573e-06, -5.9499e-03, -6.3873e-06,  5.8391e-05,  5.9698e-06,
        -2.0248e-05,  1.5983e-05, -7.1396e-06, -2.1353e-02, -2.6413e-06,
        -6.7108e-03,  1.6677e-05, -6.1404e-04, -1.2432e-02,  2.9915e-05,
         5.0259e-05,  2.9893e-05,  8.4095e-06,  5.4791e-06,  3.3760e-07,
        -4.3609e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5781e-05, -7.9129e-02,  1.5197e-05,  1.8391e-05, -1.6581e-05,
         1.2686e-03,  1.0084e-05,  1.2047e-05,  2.2672e-06,  1.2723e-05,
         3.5043e-05, -8.7261e-06,  1.3918e-05,  9.9736e-05, -1.3719e-06,
        -1.4772e-05,  3.0117e-06, -4.1964e-06, -3.6016e-02, -1.3540e-06,
         3.3594e-05,  1.4954e-05,  1.0954e-06,  2.8166e-05,  1.7909e-05,
         1.6922e-05, -2.8001e-06, -1.5958e-06, -5.3848e-05,  5.9345e-06,
         2.2803e-05, -6.1403e-03, -2.4613e-05,  7.7265e-05,  2.0521e-06,
         2.3380e-05,  2.0814e-05, -4.1886e-06, -2.2323e-02, -1.1427e-05,
         2.4970e-03, -1.0098e-06, -2.6153e-05, -2.3639e-02, -4.1275e-06,
         9.5836e-06, -1.4471e-05,  8.8163e-04, -1.3843e-05, -3.1541e-05,
        -8.6648e-06,  1.4354e-05, -1.2087e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1250e-04, -1.4068e-02, -2.6810e-06,  2.8018e-04,  1.6927e-07,
         2.1012e-04, -4.3709e-02,  6.2968e-05, -5.2080e-05, -2.9135e-05,
         2.2281e-05,  1.8676e-05,  2.2520e-06, -1.9471e-02, -2.9640e-05,
         2.5114e-03, -2.0734e-05, -2.9026e-05, -2.8970e-02,  5.7004e-05,
         5.7444e-06,  4.2520e-05, -1.6000e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6526e-05, -1.2893e-02,  1.1497e-05,  6.4945e-05,  1.1384e-05,
         5.0588e-04, -4.2356e-02,  4.1645e-05, -3.7547e-05, -4.1473e-05,
         6.2924e-06, -1.3011e-05,  1.7679e-05, -1.2066e-02, -4.3104e-06,
        -6.1613e-03,  1.7040e-06, -2.3455e-02, -3.5504e-05, -1.1196e-06,
        -2.8811e-02,  6.2549e-05, -2.8108e-05, -5.2101e-05,  1.8614e-06,
        -2.6512e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3371e-05, -2.5067e-02, -1.7713e-05,  5.9409e-05,  1.5928e-05,
         1.5278e-03, -2.3010e-02,  1.8414e-05, -3.5218e-06, -1.2817e-05,
         6.4921e-06, -2.4737e-07, -3.4820e-06, -1.6940e-02,  2.2681e-05,
        -7.3628e-03,  2.3562e-05,  2.5927e-05, -3.2536e-05,  1.5639e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8828e-05, -3.0938e-02,  1.7700e-06,  2.7037e-05,  5.7161e-07,
         3.2966e-06, -2.8908e-06,  3.5928e-06, -1.6744e-05, -1.1405e-02,
        -1.5025e-05,  8.6039e-07, -5.6223e-06, -1.9379e-02, -1.3546e-05,
        -1.4682e-05, -9.1767e-06, -9.1118e-03, -1.2684e-05,  9.0909e-06,
         3.0127e-05, -3.8412e-06,  4.1749e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2600: [tensor([-1.8958e-04, -4.2593e-02, -1.0712e-05,  1.4351e-05,  4.4523e-06,
        -2.9720e-05, -9.6652e-06, -2.5079e-02,  1.2749e-05, -7.0193e-06,
        -1.2660e-06, -1.1816e-05,  4.2111e-06,  2.1153e-03,  2.4022e-05,
         1.4278e-05, -1.0394e-05, -2.4642e-03, -4.1211e-05, -3.6877e-05,
        -7.5509e-06, -2.6190e-06, -2.5622e-02, -2.1968e-06,  2.8471e-05,
         6.5993e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2436e-05, -4.7555e-02, -6.1472e-06, -4.9286e-06, -1.9707e-05,
        -2.0396e-05,  5.4320e-06, -3.0491e-02,  1.8355e-06, -1.9725e-05,
        -1.2045e-05, -7.8822e-07,  2.4860e-05,  4.1299e-04,  1.1353e-05,
         1.7605e-05, -5.3305e-06, -2.7730e-02,  2.3815e-07, -1.0727e-05,
        -4.3447e-06, -1.4839e-07, -2.0787e-02, -9.3620e-06, -3.6132e-05,
         2.2720e-06, -7.2381e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9368e-05, -2.8509e-02,  4.2640e-06, -1.6787e-06, -7.6518e-03,
         7.6494e-06,  1.9251e-05,  1.6145e-05, -1.0345e-06,  5.2483e-06,
        -3.7531e-06,  7.7542e-06,  6.9108e-07, -6.0031e-06,  6.9545e-07,
        -1.7887e-02,  1.3813e-06,  9.8836e-06,  6.0907e-06, -1.0597e-02,
        -7.1337e-06,  2.6304e-06,  8.2491e-06,  9.0725e-06, -5.7539e-03,
        -2.1845e-06,  1.0867e-05,  1.0990e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4601e-05, -2.1448e-02,  1.4751e-05, -2.4261e-05, -3.6999e-02,
         5.0688e-06,  1.5129e-05,  1.4612e-04, -2.0260e-05, -2.3840e-05,
        -2.3257e-05,  1.6372e-05,  2.2254e-05,  6.9973e-06, -1.4672e-05,
        -1.8257e-02,  1.1738e-05,  6.6793e-06,  1.2992e-05, -1.6976e-02,
         2.3580e-05,  6.3557e-05, -1.0337e-05, -1.3006e-02,  6.1389e-06,
         1.4541e-05,  7.4711e-06,  1.4065e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6671e-04, -3.3225e-02,  7.8393e-08,  8.8051e-06, -2.4784e-02,
        -1.7719e-05,  2.0743e-05,  4.3261e-04,  4.6819e-05, -2.8592e-05,
        -1.7644e-07,  1.0568e-05,  6.2786e-06,  6.5156e-06,  2.5173e-05,
        -1.0965e-02,  8.8642e-06,  1.2518e-05,  7.2782e-06, -1.2786e-02,
         1.3546e-05,  2.6280e-04, -1.9775e-05,  1.8898e-03, -5.0719e-06,
        -2.1182e-05,  1.0327e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9933e-05, -4.5260e-02, -9.0652e-06, -2.4288e-02, -3.0161e-05,
        -2.5344e-05,  7.4343e-05,  5.2213e-05, -6.8577e-05,  2.0145e-06,
         1.9179e-02,  4.6013e-05,  4.7713e-05,  4.5492e-05, -4.1721e-06,
        -3.0558e-05, -5.1213e-02, -3.6626e-06,  3.1870e-04,  8.6312e-05,
        -5.8480e-02, -2.9380e-05,  5.8992e-05, -1.3288e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4128e-04,  7.2394e-02,  2.6760e-05, -2.6821e-03, -1.3907e-06,
         6.3833e-06, -3.4525e-05,  3.7298e-05, -3.7915e-06,  6.0895e-05,
         3.4988e-02, -4.3782e-05,  6.9493e-07, -3.0571e-05, -2.3347e-05,
        -3.0840e-05,  7.5846e-03,  1.3298e-05, -2.0704e-03,  1.3008e-04,
         4.5559e-05,  2.6504e-02,  3.8757e-05, -2.1921e-05, -1.1411e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2302e-06, -4.3423e-02,  2.6811e-05, -1.7685e-02, -1.9221e-05,
         1.7052e-06, -6.5066e-06,  3.5158e-04,  8.5192e-06, -4.1147e-05,
        -1.8314e-02, -1.6791e-05,  3.2070e-05,  4.2462e-06,  1.4142e-05,
         1.2067e-06, -1.6566e-02,  1.7002e-05,  4.7407e-05, -1.1360e-05,
        -1.2587e-05,  1.1144e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4545e-04,  4.1155e-04,  1.2868e-05, -9.5080e-06, -5.0279e-06,
         1.2474e-02,  1.0338e-05, -8.9062e-06, -1.3300e-02, -8.4627e-06,
         5.2341e-06, -8.4015e-06, -8.1851e-06, -8.4047e-06, -1.5502e-05,
         2.8988e-05, -2.0237e-05, -5.0259e-06,  2.4403e-05,  1.3241e-05,
        -1.4266e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0861e-04,  7.7939e-05,  1.1773e-05,  5.6305e-06,  1.2417e-06,
         6.5055e-03, -1.5564e-05, -1.4126e-05,  2.8052e-03,  3.8971e-07,
         3.3865e-06, -5.8471e-06,  7.7167e-06,  7.1341e-06,  9.7447e-06,
        -2.5960e-05, -5.5999e-06,  6.0265e-06,  6.3174e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2848e-04,  7.1086e-05,  1.4160e-06,  2.0098e-05,  2.0232e-05,
         1.3646e-02,  2.9883e-06, -7.1037e-06,  4.0612e-03, -1.7488e-05,
        -1.8059e-05,  1.2963e-05,  1.2031e-05, -1.1424e-06,  2.5092e-05,
        -3.1547e-05,  1.5104e-05,  2.5469e-06,  3.1434e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7127e-04,  2.6380e-05, -1.3776e-05,  4.1370e-05, -5.8263e-05,
        -5.6692e-05,  5.3382e-05, -1.6310e-05, -4.2683e-05,  3.1044e-05,
        -4.6250e-05,  6.9747e-02,  4.7584e-05, -1.4258e-02,  6.0580e-05,
         1.2862e-04, -1.2920e-04,  5.1017e-05, -2.1646e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2650: [tensor([-8.4477e-05, -4.4279e-02, -1.2001e-05,  5.0089e-06,  2.2872e-05,
        -2.0360e-05,  3.8160e-06, -5.2235e-06,  1.0812e-05, -3.8400e-05,
         1.2309e-05, -3.0534e-05, -4.3611e-02, -5.6132e-06,  1.4964e-03,
         3.6787e-05, -3.5780e-06, -1.3076e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2511e-05, -7.6841e-02, -4.6178e-05, -2.3807e-05, -3.6595e-06,
         2.4606e-05, -3.1239e-05, -3.1722e-05, -1.0466e-05, -1.5308e-05,
        -8.6525e-06,  7.9463e-06, -3.0710e-02,  1.3383e-05,  3.9632e-04,
        -1.5526e-05, -7.8324e-06,  4.1684e-04, -4.4879e-06, -4.7546e-05,
        -2.6100e-05,  2.1241e-05, -7.0575e-06,  2.4796e-05,  1.1348e-05,
         1.1797e-05,  2.6602e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7519e-05, -3.5630e-06, -1.3785e-05, -4.5260e-02, -6.7955e-06,
         9.5062e-05, -1.2148e-05, -4.1652e-05, -2.6088e-05, -1.0775e-05,
         6.1661e-04, -7.2541e-06,  9.2024e-06, -9.1494e-06, -2.5236e-05,
         9.3577e-06,  1.1285e-02,  1.2926e-05, -6.9812e-06, -6.8025e-06,
         2.9589e-07,  1.4424e-04, -1.0065e-05,  4.0189e-06, -1.1714e-02,
        -1.2331e-05,  5.0992e-06,  8.6759e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3821e-04,  1.5842e-05,  5.7279e-06, -5.6023e-02,  4.3839e-06,
        -1.8484e-06, -1.3821e-05, -4.6904e-05, -2.7432e-05, -2.7182e-05,
         2.5494e-05, -2.5952e-05, -2.3511e-05, -2.4589e-05, -1.1169e-05,
        -1.6204e-05, -1.7796e-02, -4.7954e-05, -2.2140e-06, -1.5208e-05,
         3.4705e-05,  7.8625e-04,  1.0266e-05, -3.9710e-03, -2.3893e-05,
         5.9921e-07,  2.0047e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9523e-05,  1.8676e-05,  1.0381e-06, -1.7754e-02, -1.3487e-05,
         7.0184e-05, -1.0431e-07,  3.0843e-06, -1.0833e-05,  5.4469e-06,
         2.2852e-05, -1.4214e-05, -2.0204e-05, -3.1057e-05,  2.8065e-05,
         9.7153e-06, -2.1917e-02, -3.0016e-05, -1.5177e-06,  8.7684e-06,
         2.3092e-05, -1.8421e-02, -9.8908e-03,  6.9232e-06,  2.2653e-03,
        -5.8608e-06,  3.1767e-06, -4.6645e-03,  1.5784e-06, -7.6466e-06,
         3.1245e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7209e-04,  4.7369e-07, -3.5935e-05,  3.8794e-05,  7.7587e-05,
         1.2156e-02, -1.0526e-05, -5.2382e-06, -3.1605e-05,  3.9246e-05,
        -3.2000e-05,  3.0123e-05,  4.9236e-02, -9.8149e-06,  3.0818e-05,
        -2.6895e-05, -3.9075e-04,  8.3393e-05,  1.0117e-06, -1.7657e-05,
        -4.0570e-06,  1.0745e-05,  2.2684e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1214e-06,  4.3734e-05,  1.5701e-05, -1.6048e-05,  8.4107e-06,
         3.7378e-02, -4.9220e-05, -1.2488e-05,  2.7409e-06,  7.0612e-06,
        -3.2612e-02, -3.6210e-05, -7.9328e-09,  8.5713e-06,  3.0303e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3905e-05, -1.3815e-05,  8.3091e-06, -2.7988e-05,  2.9741e-05,
         1.8685e-02, -1.9117e-05, -4.0714e-06,  6.6896e-06,  6.6284e-06,
        -1.6026e-02,  1.4753e-06, -1.6450e-02, -3.1016e-06,  2.3531e-06,
        -4.8598e-03,  2.3367e-06, -1.9734e-06,  1.0897e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3045e-04, -5.6681e-02,  1.0012e-05, -1.7567e-05,  7.8122e-06,
         8.2113e-05,  2.0649e-06,  1.0776e-04, -5.3862e-06,  5.0793e-05,
         8.6774e-04,  2.4080e-05, -4.1520e-06,  1.3902e-05, -1.1469e-05,
         2.7869e-05, -1.6403e-02, -8.9971e-06,  7.5631e-06,  2.9900e-05,
        -4.9517e-06, -2.3888e-02, -8.0377e-06,  5.8446e-04,  2.3501e-05,
        -4.2312e-06,  4.8925e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1340e-04, -4.3850e-02, -3.8156e-05,  2.6416e-05,  7.7723e-06,
        -1.8506e-05,  1.6342e-06,  2.3389e-04, -1.5906e-05, -2.2032e-05,
        -2.2417e-02, -2.2493e-05, -2.1220e-05, -3.5719e-05, -2.5934e-05,
        -1.9801e-06, -2.7706e-02, -1.6721e-06, -4.3874e-07, -1.6629e-05,
        -3.2046e-05, -3.2070e-05, -9.6852e-06, -1.1744e-05, -1.5832e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1663e-05, -3.2567e-02, -9.7524e-06, -2.7106e-06,  5.1607e-06,
         1.3828e-05,  1.0506e-06,  1.3327e-04,  1.3109e-05,  1.5532e-05,
        -1.7236e-02,  1.8236e-05,  7.9224e-06, -6.2927e-06, -5.3551e-06,
        -6.8732e-06, -1.4773e-02,  1.7251e-06,  2.3223e-05, -1.0992e-02,
         4.2677e-06,  2.8544e-05,  4.3343e-06,  6.7404e-05,  1.3008e-05,
         6.0340e-06,  3.8737e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4944e-04,  1.5233e-05,  5.9678e-06, -5.0211e-02,  2.2156e-06,
         1.7475e-05, -5.8967e-06,  4.4614e-06, -3.0248e-05, -1.5272e-05,
        -1.1446e-05,  2.4955e-06, -1.9746e-02,  4.1886e-05,  2.0703e-05,
         2.0626e-06, -1.5815e-05,  1.6633e-07,  5.3201e-06,  3.3620e-05,
        -1.1726e-05, -5.8921e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2700: [tensor([ 1.2041e-04, -2.4145e-05,  1.7710e-05, -4.0508e-05,  1.1365e-04,
        -5.5247e-05, -4.9811e-05, -4.7627e-06, -2.0727e-05,  4.2026e-05,
        -3.5364e-02, -3.5817e-06, -1.6364e-05, -3.6395e-05, -4.8819e-06,
         1.3217e-05, -1.4901e-05,  2.4651e-05, -7.4866e-02, -1.3211e-05,
         1.0309e-04, -3.6287e-05,  6.5834e-06, -6.9232e-02,  9.4751e-06,
         2.8553e-05,  1.8530e-05,  2.1557e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5210e-04,  6.5674e-03, -4.5835e-06, -2.5064e-06, -1.8869e-02,
        -1.8284e-05,  1.0030e-04, -2.9479e-07, -1.0844e-02,  3.2162e-06,
        -1.6881e-05, -8.9857e-06,  6.7056e-06, -1.5565e-02, -9.3383e-08,
         2.5722e-03, -2.7118e-05, -4.8244e-06,  2.0759e-05,  1.0305e-04,
        -6.1483e-03,  1.9401e-05, -3.9292e-05, -7.7696e-06, -6.6420e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.1442e-06, -2.8500e-02,  2.2693e-05,  8.8178e-06, -1.9360e-02,
         5.6234e-06,  6.8205e-05, -8.3515e-06, -8.5326e-03,  6.4908e-06,
        -3.5520e-06, -6.5120e-06, -3.8481e-07, -6.0678e-03,  1.4266e-05,
        -1.1016e-05, -1.5207e-02, -7.3043e-07,  9.4658e-08,  1.7963e-06,
        -3.5639e-06, -5.2090e-06, -1.1799e-05, -6.4068e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6211e-05, -8.1573e-03, -7.6949e-07,  3.8581e-06, -1.0232e-02,
        -8.2290e-06,  3.2211e-05, -2.3482e-07, -3.8533e-03,  8.6418e-06,
        -5.8931e-06,  1.0458e-05,  6.4613e-06, -9.4890e-03, -4.5562e-06,
         6.2417e-06, -6.0156e-06, -8.1517e-03,  1.3133e-05, -1.5971e-03,
        -8.3670e-03,  1.4950e-06, -2.8695e-05,  9.4500e-06,  5.2287e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1355e-04,  2.1877e-05,  1.7265e-05, -9.4504e-05,  4.9533e-05,
        -5.7148e-05,  3.2079e-06,  5.2779e-05,  7.5674e-02,  1.3095e-04,
        -6.3921e-05, -6.1129e-05,  3.9971e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3096e-04, -9.8391e-06,  1.5803e-05,  2.1067e-06, -1.1018e-05,
         1.4316e-06,  2.0326e-06, -4.7198e-06, -7.5500e-03, -1.8744e-02,
        -1.7101e-05,  5.6861e-03,  7.0338e-06,  1.2622e-05, -1.3837e-02,
        -1.9847e-06,  1.1651e-05,  1.5334e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2075e-06,  2.0371e-05,  1.3154e-05,  2.9641e-06,  4.9257e-05,
        -5.1468e-06,  1.4856e-05, -3.1593e-05,  4.3907e-06, -2.8802e-02,
         6.7450e-06, -2.0282e-02,  2.5112e-05, -3.6343e-05, -2.5187e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.2201e-05,  5.7892e-03, -1.5247e-05, -1.5258e-05, -7.7600e-06,
        -1.0038e-02,  6.9719e-06,  3.1841e-06,  1.5073e-05,  1.8881e-06,
        -1.0613e-05,  1.4887e-06,  1.0896e-05, -6.5801e-06,  7.3163e-03,
         1.3511e-06,  1.0394e-05,  2.2467e-05, -5.4884e-06, -2.8207e-04,
        -4.3032e-06, -6.4629e-06, -7.2122e-05,  8.2291e-06,  4.4946e-06,
        -4.1257e-07, -1.8859e-05,  1.0935e-05, -4.4266e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0757e-04,  1.1806e-02, -2.7725e-05,  6.1941e-06, -1.7252e-05,
         2.6788e-02, -2.5695e-05,  2.8787e-05, -3.6403e-05,  4.2348e-06,
        -8.5248e-06, -1.1629e-06,  1.7172e-05,  2.7289e-05, -5.3173e-02,
        -3.8687e-07, -5.2872e-06,  3.8656e-03, -8.1275e-06, -4.0686e-02,
        -6.3285e-05,  2.0109e-05,  2.0117e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8412e-05,  1.3264e-01, -6.9706e-05,  1.9945e-05, -5.3897e-05,
        -4.1373e-03, -4.4887e-05, -1.0042e-04, -2.2202e-05,  5.1698e-05,
         2.8521e-05, -8.2347e-05, -6.3548e-05, -3.0410e-05,  5.1870e-02,
         4.2975e-06, -6.8920e-05, -4.0932e-05, -7.5655e-05,  3.7239e-05,
         2.6331e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5781e-04, -2.7105e-02,  5.0018e-06, -1.2052e-05,  9.3860e-03,
        -2.1929e-05, -1.5776e-05,  5.0155e-06, -6.2786e-03, -4.9667e-06,
         7.4861e-07,  5.5011e-06,  5.5202e-05, -4.8789e-06, -1.7815e-02,
         2.3906e-05, -7.7402e-06, -1.1679e-02,  1.0947e-05, -2.4540e-06,
         8.5449e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4113e-04, -1.1792e-02,  1.7376e-05, -1.0487e-05,  3.8698e-03,
        -8.8280e-06,  1.3430e-05,  2.4770e-05, -6.7489e-03, -2.4346e-06,
         8.2560e-06,  1.0188e-05, -9.7954e-03,  8.0731e-06, -1.2207e-02,
        -6.5680e-06,  1.6663e-05, -1.1165e-02,  5.1013e-06,  9.7706e-07,
         9.4899e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2750: [tensor([ 7.8566e-05, -3.0114e-02,  1.8388e-05,  2.9366e-05,  1.2153e-05,
         2.0530e-05, -4.1670e-02, -1.9145e-05, -7.6307e-07, -5.2581e-06,
        -9.5770e-06,  9.9192e-06, -9.5396e-06, -7.6823e-07,  4.7407e-05,
         9.5536e-06,  1.6457e-05,  1.7469e-05,  9.8655e-07, -2.7265e-05,
        -3.7721e-06,  9.0867e-06, -1.8670e-05,  4.3400e-06, -2.5350e-02,
         1.0500e-07,  1.0790e-05, -3.6940e-06,  5.1697e-06, -4.8154e-03,
        -5.0416e-06,  1.4006e-05, -1.5643e-05, -7.3942e-06,  8.3810e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.9605e-05, -9.0444e-02, -2.9474e-06,  5.9062e-05,  2.3721e-05,
        -4.5264e-06, -3.4309e-02,  3.3676e-06,  1.2810e-05,  4.5930e-05,
         1.5952e-05, -4.4785e-06,  1.1485e-05,  1.0850e-05,  1.7832e-05,
         1.9793e-05, -7.2145e-06, -3.7579e-06,  2.6335e-05, -1.1642e-06,
        -2.8926e-05,  2.5343e-05,  1.0486e-05, -1.8222e-06,  1.6874e-05,
         2.6483e-05, -5.8324e-08,  4.2538e-06, -3.3990e-02, -3.2950e-07,
         5.2359e-04,  4.0565e-06,  2.3883e-06, -2.6893e-02,  3.1861e-05,
        -4.4661e-06, -9.4298e-06, -8.8141e-06,  3.3820e-06, -3.7814e-06,
        -8.3565e-03,  8.1985e-06, -5.9237e-06, -2.0938e-05,  1.5209e-05,
        -1.8574e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6486e-05, -6.9679e-02, -6.9305e-06, -4.7851e-06,  1.1287e-05,
        -1.7709e-05, -4.8513e-02,  4.6958e-06,  2.7372e-05,  1.9943e-05,
         1.3576e-05, -5.6395e-06, -5.4524e-06,  5.4151e-06, -3.4279e-06,
         1.2868e-05,  1.5918e-06,  1.2967e-05, -1.1672e-05,  2.1995e-05,
        -7.1514e-06,  1.6054e-05, -2.1391e-05, -2.1099e-05, -4.8089e-02,
        -1.5719e-05, -2.1512e-05,  1.8499e-05, -6.6788e-03,  2.2402e-06,
         1.5300e-05,  3.0946e-05, -2.1557e-05,  2.7271e-05,  6.4681e-06,
        -2.2550e-05,  2.2165e-05, -1.0403e-05,  2.2265e-05, -2.1344e-05,
        -2.8287e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4660e-05, -4.0107e-02,  8.1190e-06,  1.8965e-04, -2.4602e-05,
        -1.8850e-05,  8.4064e-03, -1.3437e-05,  3.1652e-06,  3.8617e-05,
         1.5086e-05, -4.7172e-06, -6.7826e-06, -1.5576e-06,  1.2176e-05,
         7.5612e-06,  2.6519e-05, -2.5982e-06,  6.1528e-07, -3.7603e-06,
        -3.1039e-05, -2.5881e-06, -3.9998e-06, -1.3431e-05, -3.6707e-02,
        -2.8562e-06, -2.5041e-05, -9.7911e-06, -3.7656e-02,  6.3180e-06,
         1.5948e-05, -2.3065e-02,  1.7520e-05, -1.6357e-02,  2.0876e-06,
        -2.0909e-05,  4.0670e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2315e-05, -4.7320e-02,  1.2929e-05,  1.4186e-04,  3.9503e-05,
        -3.8665e-05, -2.1482e-02,  4.1340e-07,  8.7977e-06, -3.0873e-06,
        -7.8271e-06, -2.1183e-06, -2.8002e-06,  1.9168e-05,  1.7954e-05,
        -3.7266e-06,  3.0153e-05,  1.4653e-05, -1.4339e-05, -2.2240e-05,
        -1.2626e-05,  7.6520e-06,  1.9810e-05, -2.6570e-06, -4.0180e-02,
         8.2224e-06, -9.6334e-06,  2.2305e-06, -2.4012e-02,  1.1439e-05,
         1.4533e-05, -5.0030e-06, -1.8385e-05,  1.7358e-06, -1.6370e-02,
         2.1939e-05,  9.3474e-06,  1.0580e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4197e-05, -7.2522e-02,  1.0244e-05,  8.7895e-05,  5.3461e-06,
         1.6669e-05,  3.8590e-05, -3.1852e-05,  5.8681e-06, -1.3720e-02,
        -1.0184e-05, -2.2209e-05,  9.4211e-06,  2.3550e-06,  4.6601e-03,
         1.9011e-05,  2.8617e-05,  1.0655e-05, -5.1200e-06, -1.7915e-05,
         5.3783e-04,  1.6812e-05,  1.1242e-05,  6.6327e-06,  3.1203e-05,
        -1.6791e-02, -5.7215e-06, -5.4636e-07, -2.2441e-05, -2.4080e-02,
        -1.1725e-06, -7.4136e-06,  1.3214e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9179e-04, -7.8035e-02,  3.9269e-06,  4.1776e-04,  9.3871e-05,
         2.3826e-05, -2.2114e-05, -3.0121e-05, -5.7705e-06, -4.4700e-02,
        -1.4168e-05,  3.0321e-05,  1.8671e-05, -2.9177e-05, -4.0197e-02,
        -1.7882e-05, -3.1118e-05,  7.8852e-06,  3.6867e-06, -1.5821e-06,
        -4.8688e-02, -1.2793e-05, -1.4360e-05,  1.6861e-05, -3.3488e-05,
        -3.0770e-06, -1.1167e-05,  1.4351e-05, -4.7601e-05, -7.1048e-06,
         3.7102e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5106e-04, -4.8358e-02, -7.4484e-06,  6.2866e-05,  2.7561e-05,
        -3.0689e-05, -1.9166e-05, -1.3296e-05, -1.5767e-05, -3.1768e-02,
         1.0007e-05,  1.5870e-05, -1.3486e-05,  6.7658e-06,  3.9905e-03,
        -1.3700e-06, -9.5479e-06, -8.6934e-06,  7.1963e-06, -6.5479e-07,
        -3.0183e-06,  1.6695e-05, -5.3352e-06, -3.0479e-02,  2.4719e-05,
        -2.0166e-05, -1.6616e-02,  7.6880e-06, -5.7765e-06,  9.7787e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1031e-05, -7.4614e-06, -3.8651e-06,  8.3582e-06,  1.0770e-02,
        -1.6528e-05, -2.4180e-05, -1.2176e-03, -2.6143e-06, -1.4849e-05,
        -6.1255e-06,  1.0401e-06, -3.1827e-05,  1.0389e-05,  1.5403e-06,
        -1.5861e-06, -1.8887e-06,  4.3432e-03, -1.0320e-05, -1.0118e-05,
        -4.4417e-06, -5.1461e-05, -1.1793e-06, -2.2981e-06,  1.5342e-02,
        -2.0300e-05, -1.4102e-05, -2.4662e-06,  1.8140e-05,  3.2124e-02,
        -9.1229e-06, -6.3098e-06, -4.2256e-06,  7.0683e-06,  9.2462e-07,
        -8.6210e-06, -3.9238e-04,  1.3747e-06,  1.1047e-02, -5.3938e-06,
        -8.6175e-06, -5.6909e-06,  2.3507e-02, -2.1214e-05, -2.7469e-06,
        -1.2735e-05,  6.3073e-06, -1.3526e-05,  8.2524e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1091e-05,  6.7734e-06,  2.5785e-06,  2.4221e-06,  5.2829e-03,
        -1.1022e-05, -1.7055e-05,  1.0225e-02,  2.7943e-05, -5.6653e-06,
         5.7006e-07,  3.3883e-05, -1.4328e-05,  2.0928e-05, -5.5700e-05,
        -6.9104e-06,  4.4524e-05,  1.0085e-02, -4.0619e-05, -5.0069e-05,
        -8.9778e-06, -4.3685e-05, -6.8330e-06,  1.4205e-05,  4.8229e-02,
        -3.6642e-06, -4.2456e-05, -7.2224e-06,  1.8212e-05,  5.7825e-02,
         1.7364e-05, -2.6021e-05,  3.2854e-05, -1.5136e-05,  2.6865e-05,
        -2.4042e-05, -1.2154e-05,  7.5072e-02,  3.5059e-05,  3.9655e-06,
         2.2766e-06,  1.2322e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4318e-04, -7.6403e-06, -1.9712e-05, -2.8187e-05,  1.8598e-03,
        -3.2910e-06,  8.0215e-06,  5.5597e-03, -5.0674e-06, -3.3502e-05,
         1.3532e-05,  1.9077e-06, -2.7795e-05,  2.1115e-05, -2.2824e-05,
        -5.3266e-06,  6.1341e-06,  5.1431e-03,  8.5605e-06,  4.6662e-06,
        -1.4392e-05,  1.4955e-06, -3.1817e-06,  4.9761e-06,  6.2088e-03,
         4.3655e-06,  1.2111e-06, -2.5119e-05,  5.4378e-06, -5.5613e-03,
         5.0986e-06,  1.1139e-05,  8.0561e-06, -4.4885e-02, -5.0666e-06,
        -7.7681e-06, -7.2218e-02,  1.2517e-05,  1.7626e-05,  5.2755e-03,
        -1.4755e-05,  9.5993e-06, -5.3659e-06, -7.4784e-06,  1.2048e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9794e-04, -3.6552e-06,  7.0838e-06, -1.4052e-06,  2.9032e-02,
         2.9652e-06, -4.9483e-06,  2.4290e-03,  7.4916e-06,  2.7212e-05,
         1.1121e-05,  7.1217e-06, -2.6192e-05,  1.4578e-05,  1.1975e-05,
         8.1937e-06,  8.0851e-06,  5.3415e-03,  3.1690e-06,  2.5150e-06,
         7.8889e-06, -1.5998e-06,  1.1658e-05,  6.6162e-07, -7.6730e-03,
        -1.4889e-05,  9.8151e-06,  2.5102e-05,  7.4988e-06, -2.8134e-03,
         8.4465e-06,  3.5915e-06,  8.7109e-06, -1.3456e-05, -4.3299e-06,
         3.8770e-02,  3.4199e-05,  2.5808e-06,  1.7567e-05,  5.7911e-06,
         5.4546e-06, -8.4735e-05, -2.7600e-08, -9.7843e-06, -7.1320e-04,
        -3.7060e-06, -3.1351e-06,  8.1970e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2800: [tensor([-5.3785e-05, -1.7087e-02,  1.6831e-05,  4.7092e-05, -5.9660e-06,
        -5.9670e-07, -1.6587e-02, -3.7087e-06, -1.2385e-05,  3.2161e-05,
        -1.4987e-02, -5.0737e-07,  6.1003e-06, -1.4024e-06, -6.1852e-06,
        -9.7889e-03,  2.8835e-05,  7.9971e-06, -1.0877e-05, -6.5994e-06,
         1.8702e-03, -6.5135e-06,  5.8328e-05, -2.9769e-06,  9.9003e-06,
         9.1108e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2437e-05, -1.3871e-02,  2.5863e-05,  2.2299e-05,  6.9139e-06,
         1.1059e-05,  7.4226e-05, -7.9653e-03,  8.6457e-07,  1.2888e-06,
        -6.3481e-03, -9.2300e-03,  3.7577e-07,  8.3220e-04, -2.4649e-06,
        -4.0854e-03,  7.9946e-06,  3.0122e-06, -9.5957e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7688e-04, -5.7583e-02, -2.2643e-05, -2.7052e-06,  2.6623e-05,
         6.7832e-06, -4.8966e-05, -3.0628e-02,  3.2559e-05,  2.5220e-05,
         2.7728e-05,  1.9949e-03, -3.3037e-02, -4.1937e-05, -2.7636e-05,
         8.7048e-05, -3.6093e-03,  2.3199e-05, -5.0618e-06,  6.4260e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0572e-04,  2.1014e-02,  3.3892e-05, -6.4045e-05,  2.0394e-05,
        -7.9425e-06, -2.4007e-04, -7.4176e-02, -1.2861e-05,  6.0464e-05,
        -2.9668e-02, -3.1246e-05,  3.2441e-04, -4.5452e-05,  4.2274e-05,
         2.8133e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0307e-04, -4.2220e-02,  9.8814e-06,  1.0190e-06,  5.8267e-06,
         1.2440e-02,  3.4015e-06,  1.7959e-05,  2.2695e-05, -7.4978e-06,
         9.1749e-05,  5.0651e-06,  6.9136e-03, -1.7374e-02, -4.5621e-06,
        -9.2630e-06,  2.2591e-05,  1.8267e-05,  1.3443e-05,  5.0454e-06,
        -1.2063e-02, -2.1327e-05,  7.1380e-04, -3.6920e-06, -1.8886e-02,
        -1.9081e-05, -4.0526e-06,  6.6607e-06,  4.4064e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6421e-06,  8.2392e-03, -7.2375e-06, -5.5474e-06, -3.6772e-05,
         9.1699e-03,  4.7748e-06,  1.7796e-05,  1.3387e-06,  7.2105e-06,
        -2.0424e-05,  3.5647e-07, -7.5907e-05,  1.1667e-02, -1.6284e-05,
         1.3867e-05, -1.6548e-06, -5.3089e-06, -4.7377e-06,  2.1747e-05,
         6.1149e-03, -1.0195e-05,  3.6818e-04, -5.7816e-06, -1.0478e-05,
         9.9304e-04,  9.7762e-04, -3.9310e-06,  1.6601e-06,  1.7750e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3554e-05, -4.4264e-02,  1.5038e-05, -1.3379e-05, -7.1830e-07,
         5.9816e-03, -2.2728e-05, -1.5320e-05, -1.3490e-05, -2.7376e-05,
         3.3818e-05,  1.2250e-05,  2.0721e-03, -2.6424e-02, -8.9555e-07,
        -2.8094e-06, -1.0394e-05, -2.4092e-05, -3.1036e-06, -1.1405e-06,
        -2.3279e-03, -7.4483e-06, -4.3987e-06,  2.0745e-05, -1.5022e-02,
        -5.0556e-06, -1.5217e-06, -4.2008e-07,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5467e-05,  2.1232e-02, -9.9631e-06,  4.6155e-07, -1.7226e-05,
        -2.9360e-06,  4.9498e-05, -2.8975e-05,  1.7353e-05,  4.9511e-06,
        -9.3903e-07, -7.4396e-06, -2.3766e-05, -5.5858e-06,  5.8013e-07,
        -1.5677e-02, -7.5101e-06,  5.8620e-03,  8.3261e-06, -5.0004e-05,
        -2.5776e-02, -1.1749e-07,  6.1341e-06,  8.9949e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0558e-04,  7.2641e-02, -3.4299e-05, -3.7920e-05, -4.7507e-05,
         2.5535e-05, -8.9341e-05,  6.8786e-06, -3.0300e-05, -1.1766e-05,
        -1.3909e-05, -4.6216e-05, -3.6336e-06, -3.1250e-05,  3.7144e-06,
         9.5184e-03,  6.3112e-07, -2.5778e-05, -2.5821e-04, -4.8288e-05,
         2.9491e-02, -1.4024e-05,  4.6965e-05,  4.2462e-06, -2.1130e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3674e-04, -1.7098e-02,  9.6171e-06,  8.2570e-06,  6.9862e-06,
         2.3307e-06,  1.8876e-05,  2.0055e-06, -2.2049e-05,  1.3740e-05,
         5.5974e-06,  6.0747e-06,  2.1570e-05,  1.2463e-05, -1.2451e-06,
        -1.0792e-02, -7.9588e-06, -2.8448e-06,  5.7745e-04,  1.2987e-06,
        -3.1512e-02, -1.4158e-05, -9.5081e-06,  1.2230e-05,  9.7727e-06,
         1.4549e-05, -5.8523e-06,  5.4102e-06, -9.1216e-06,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6765e-05, -3.0212e-02,  2.1811e-05,  1.9332e-06,  7.2110e-06,
        -1.1199e-06,  3.0220e-05, -2.8081e-02,  1.1266e-05,  2.5733e-05,
        -1.3182e-05, -4.2883e-02,  6.7672e-06, -3.8505e-05,  2.0100e-05,
         2.3947e-05, -1.6727e-02, -1.4712e-05,  8.4994e-06,  1.5906e-03,
         3.6231e-05, -6.9229e-03,  9.7453e-06, -8.5527e-06, -7.1561e-07,
         1.2353e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6088e-06, -7.9356e-02, -1.7103e-05, -6.8125e-05,  1.9704e-05,
        -7.1677e-07,  1.4748e-05, -3.1048e-02,  8.9119e-06,  1.3733e-06,
        -1.9066e-05, -3.2067e-02, -1.3755e-05, -4.4304e-05,  9.7379e-06,
         1.3087e-05, -3.7180e-02, -3.0343e-05, -1.3079e-05,  4.2774e-06,
         2.6409e-05,  4.9431e-07, -1.4697e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2850: [tensor([ 6.9818e-05, -4.8491e-06,  1.4137e-05,  4.6568e-04,  3.2767e-05,
         1.2457e-05, -1.6872e-05, -9.3962e-06, -1.7096e-02, -1.1205e-05,
         5.9926e-03,  2.6149e-06,  1.0895e-03,  7.5975e-06, -2.0578e-06,
        -3.9250e-06,  2.7771e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4681e-05, -2.4918e-06, -2.5144e-05,  1.4752e-03,  1.3810e-05,
        -1.6210e-05, -1.0420e-05,  3.5581e-06, -2.1641e-02,  1.1607e-05,
         2.2725e-03, -9.1619e-07, -9.0468e-06, -2.2064e-02,  1.5759e-06,
         2.2299e-05,  8.7013e-06,  4.4540e-04,  1.6144e-05, -2.6613e-02,
         4.6221e-05,  3.6682e-06,  8.3956e-07,  1.4280e-05,  1.6566e-06,
        -1.4419e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1919e-05, -3.9681e-05,  1.1150e-05,  2.9551e-04, -4.8799e-05,
        -3.9709e-05,  3.0290e-05, -3.2801e-05,  3.2719e-05,  1.8422e-04,
        -7.6159e-02, -7.6359e-06,  3.9698e-05,  2.9064e-05, -1.3421e-03,
        -4.5489e-05, -3.4385e-05,  5.1381e-05, -5.3613e-05,  2.5740e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4361e-04, -3.6212e-05, -2.2859e-05,  1.7026e-04, -1.5768e-06,
         3.7853e-05,  5.4034e-05,  2.8488e-05, -1.4298e-02,  1.2732e-05,
         2.2196e-03, -6.1779e-02, -1.3708e-05, -4.1809e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3253e-05,  3.4993e-05,  4.6107e-05,  8.9313e-04,  1.6365e-05,
         2.2408e-05,  2.7617e-05,  1.7868e-06, -3.5909e-02,  3.6815e-05,
         2.3135e-05,  4.3158e-05,  1.7017e-05,  1.1250e-06,  2.4214e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2451e-04, -1.6916e-05, -1.8493e-05,  8.3921e-04, -2.5666e-06,
         1.0099e-05, -8.9003e-06,  2.4837e-06, -8.7615e-03, -1.2226e-05,
         1.2437e-04, -7.0334e-06,  2.8641e-05, -1.7477e-02,  1.1530e-05,
         5.2309e-06, -8.9549e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8332e-05,  5.2567e-05, -7.6032e-06,  2.2226e-03,  6.3850e-05,
         3.3629e-05,  9.1742e-06,  8.2640e-05,  3.2372e-02, -4.4430e-05,
         5.5153e-04,  1.2743e-05, -1.3343e-07,  4.0106e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9274e-04, -8.0537e-06, -2.2223e-05,  4.8470e-04,  2.0854e-05,
         8.1728e-06,  4.3860e-06, -2.4291e-06, -2.5489e-02,  1.9913e-05,
         8.7308e-03,  3.1628e-05,  2.2414e-05, -1.3852e-05,  3.1570e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4227e-05, -2.1566e-02, -2.4562e-06,  1.5586e-05,  1.2392e-04,
        -2.7920e-06, -1.7181e-02,  5.3061e-06,  9.4230e-06,  3.0996e-06,
         1.2495e-04,  8.6443e-06, -4.5272e-07, -1.0019e-06, -3.4275e-06,
        -5.9667e-03, -7.9304e-06, -6.4595e-03, -2.4870e-06, -1.2935e-02,
        -2.1507e-05, -1.3271e-05, -1.4716e-05,  4.3547e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3576e-05, -3.7273e-02, -2.0245e-05,  1.5744e-05,  1.7980e-04,
         2.0296e-06, -3.0216e-02,  6.1942e-06,  1.4566e-05, -5.3231e-08,
         1.3486e-04,  1.8370e-06,  6.5622e-06, -8.2057e-06, -1.6202e-05,
        -5.3338e-03,  4.5035e-06, -2.4616e-05, -2.3271e-02, -2.7519e-06,
         1.8616e-05,  8.4259e-05, -2.4465e-05,  2.3118e-05,  8.1427e-05,
        -5.6150e-06,  1.4181e-03, -7.9206e-06, -5.6056e-06, -3.4041e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4346e-04, -1.2608e-02,  1.2616e-06, -2.8044e-06,  1.6524e-04,
        -2.7797e-07,  6.3089e-03,  2.4909e-05,  6.6267e-06,  8.5314e-07,
         1.4649e-04, -7.6697e-07,  1.8108e-05, -9.1815e-06, -2.5944e-06,
        -9.4659e-03,  5.9656e-06, -1.1371e-05, -8.8782e-06,  1.0626e-04,
         3.8351e-06, -7.4882e-06,  2.1294e-05,  1.8030e-05,  6.9502e-04,
         6.1644e-07, -1.6827e-05,  3.6200e-04,  3.9641e-06, -3.8879e-02,
         2.9861e-05,  2.1058e-05, -3.3556e-07,  1.8329e-06, -6.7006e-06,
         8.4275e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2536e-04, -6.8922e-02, -2.1769e-05, -1.8020e-05, -3.6595e-02,
         1.5079e-05, -3.2101e-05,  4.7175e-06,  1.2864e-05, -4.5406e-02,
        -6.6901e-06,  4.3634e-05,  1.2346e-03,  2.8121e-05, -5.0025e-05,
         3.6311e-05, -4.6875e-05,  5.3707e-06, -1.9762e-05, -1.2442e-05,
         2.1714e-04, -5.0015e-05,  1.2166e-05,  1.9422e-05, -2.9548e-05,
        -1.5518e-05, -3.8138e-02, -8.7865e-06,  1.2845e-05, -5.1308e-06,
         2.4252e-05, -1.7825e-05, -1.1227e-02, -2.8131e-05,  5.5169e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2900: [tensor([ 1.3587e-04, -3.3942e-02, -7.4768e-07, -1.0741e-02, -3.1015e-02,
        -3.7488e-05, -4.4440e-06, -9.0309e-06, -8.9943e-06, -9.7034e-06,
        -2.4722e-05, -4.1545e-05,  2.2221e-06, -1.4774e-05,  5.9826e-05,
        -1.0388e-02,  9.1070e-06,  1.4342e-06, -3.0397e-06, -9.7964e-06,
        -9.7286e-03, -5.7448e-07,  1.2075e-05,  9.0297e-04,  1.5207e-03,
        -2.4914e-05,  2.2223e-06,  4.8060e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5662e-05, -2.9212e-02,  2.7522e-05, -8.8722e-06, -2.1708e-07,
         2.7541e-03, -2.0131e-06,  3.7428e-05, -1.6136e-02,  2.2177e-05,
         1.2481e-05,  1.1448e-05,  1.7369e-06,  2.5891e-05,  6.9323e-04,
        -1.3198e-05, -5.5740e-05, -4.0228e-06,  9.7293e-04, -4.5608e-06,
         1.2221e-05,  1.0907e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5791e-05, -5.8356e-02,  5.9127e-06,  1.4935e-05, -3.0198e-05,
         3.3123e-03, -7.8551e-06,  5.6155e-06, -7.6590e-03, -1.0607e-06,
         2.2836e-05, -1.0724e-05,  3.1767e-06, -9.8740e-06,  6.9692e-03,
        -1.9480e-05,  2.7069e-05,  3.7972e-03,  5.4640e-08,  1.9499e-03,
        -4.7563e-05, -8.0488e-03, -1.8092e-05,  1.4171e-05, -2.9584e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0542e-05, -2.2572e-02, -7.8025e-06,  1.5457e-05,  8.9695e-06,
         1.7670e-02,  5.2487e-06,  2.4394e-05, -1.9750e-02,  6.5723e-08,
         3.2144e-05,  1.6288e-06,  5.2962e-06,  5.1754e-06,  3.9535e-03,
        -1.0418e-05,  5.8652e-05,  3.9081e-06,  6.2769e-04, -4.2708e-03,
        -8.1929e-06, -2.5281e-07,  1.0886e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6086e-05, -7.9976e-03, -1.6341e-05,  3.8217e-06, -4.3900e-02,
         9.8103e-05,  2.2546e-04, -3.8496e-02,  3.1117e-05,  2.6913e-05,
        -7.3936e-02,  6.7184e-05, -3.8575e-06, -1.2260e-05, -5.3362e-02,
         2.0817e-05,  1.9987e-04, -8.4862e-07,  9.4044e-05, -2.5021e-02,
         4.8176e-05,  3.4665e-05,  2.4487e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0351e-05, -3.7823e-02, -2.7233e-05,  6.7876e-07, -4.2989e-02,
        -1.3111e-05,  6.1173e-04, -2.9839e-02,  4.4846e-06,  8.7630e-06,
         4.9792e-04, -1.2980e-05, -1.3805e-06,  5.3727e-06, -2.1684e-02,
         1.8194e-05,  2.8844e-06, -3.4321e-02, -3.9872e-06,  1.5944e-05,
         4.7075e-05,  6.5618e-05,  4.5099e-05, -3.6347e-05,  2.1130e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1308e-06, -3.1723e-06, -4.4191e-06, -1.1286e-04,  1.1969e-04,
         1.9951e-04,  3.5270e-05, -1.6115e-05, -5.3390e-05,  1.5935e-05,
         1.1959e-05,  3.4047e-05, -5.1091e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1383e-04,  1.4147e-05,  5.8679e-07,  4.2766e-05, -7.9075e-07,
        -7.3662e-05,  4.0559e-05,  1.4661e-06,  4.3387e-06,  1.7023e-06,
        -1.1914e-05, -9.5824e-06,  2.0874e-02,  2.8701e-05,  1.7895e-06,
         2.0302e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0031e-06,  7.4647e-06, -1.8084e-05, -3.4671e-05,  7.3210e-05,
         1.5637e-04,  1.6044e-05,  1.8917e-06, -2.3369e-05,  1.7905e-05,
         1.4508e-04,  1.3595e-04,  9.5767e-07,  2.0551e-05, -1.6644e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2542e-04, -9.5005e-02,  2.4710e-08,  7.3404e-06,  9.4485e-05,
        -1.5482e-05, -3.7967e-05, -3.9453e-05,  1.6483e-03, -4.7700e-03,
         7.5173e-06, -6.5604e-06,  6.0721e-05, -8.2751e-06,  1.4086e-05,
        -3.1412e-05,  1.2522e-05, -2.6088e-02,  3.1950e-06,  5.9705e-05,
        -3.0431e-02,  4.6895e-06,  3.2111e-05,  4.4153e-06, -3.9213e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1853e-05,  1.6112e-01,  9.8246e-05,  2.2778e-05, -2.1881e-04,
         1.8986e-05,  3.0654e-05,  3.3342e-05, -2.5044e-03, -2.1485e-03,
        -9.2999e-06, -9.0583e-06,  3.8434e-05,  2.8349e-05,  2.8139e-06,
        -1.6176e-05, -1.0367e-05, -2.4777e-03,  5.3145e-05,  3.4015e-05,
         3.8329e-05,  2.8534e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7884e-05,  1.1724e-01,  4.7864e-05, -1.3230e-06, -2.9017e-04,
         4.9119e-07, -6.7664e-05, -5.1136e-05, -3.2186e-03, -5.9944e-04,
         4.6381e-05,  7.2543e-06,  3.1108e-05,  3.6167e-05, -6.0063e-06,
         6.2429e-06,  4.4015e-02,  4.0171e-05, -2.4953e-03,  3.1065e-05,
        -4.6273e-05,  2.6305e-05, -3.7363e-05, -4.6395e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2950: [tensor([ 4.2332e-05,  7.5869e-06,  7.4894e-05,  3.3273e-04, -3.2238e-05,
        -4.6432e-06,  3.9888e-05,  7.6078e-06, -4.0361e-06,  6.4805e-06,
        -1.9752e-05,  6.6129e-05, -2.1003e-05,  8.9279e-06,  1.7037e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5397e-05, -2.7140e-05, -7.6086e-06,  7.2229e-05, -4.4700e-05,
         3.2833e-05, -7.9214e-05, -2.2368e-05, -1.6835e-05, -6.6201e-02,
        -3.7916e-05,  2.8790e-03, -1.7205e-06, -4.2868e-02,  1.2551e-06,
         2.6530e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4114e-05,  7.4669e-06,  1.6191e-05,  4.2231e-05,  2.6868e-05,
         1.0859e-06, -9.9913e-05,  2.2346e-05, -3.9017e-06,  1.7061e-02,
        -1.8913e-06, -1.1127e-03, -1.0787e-05,  3.3069e-05,  2.5191e-05,
        -3.8031e-04,  5.9898e-06, -2.8026e-03, -5.4744e-06,  3.3846e-05,
        -1.1557e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8549e-05, -2.2671e-05,  1.0156e-05,  4.5985e-05,  3.9123e-05,
        -3.4725e-06,  3.8046e-05,  1.1238e-05, -2.9914e-05, -3.7030e-02,
         1.6399e-05,  9.6192e-06, -3.0665e-02,  1.0915e-05, -2.8097e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4111e-04, -4.6098e-02, -2.0487e-05,  1.3285e-04, -3.2062e-02,
         1.8315e-06, -2.3970e-05,  3.6288e-05, -7.0475e-07,  1.6167e-05,
         1.6363e-06,  3.2553e-05, -1.4802e-05,  2.6280e-05,  2.0335e-05,
        -1.3239e-02,  8.5892e-06, -1.9837e-06, -8.6692e-06,  8.4196e-06,
         1.4427e-05, -1.4926e-02, -1.8529e-05,  1.1193e-05,  5.5309e-05,
        -1.2581e-05,  3.5476e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0390e-04,  1.9264e-02,  4.3550e-06,  6.0559e-05, -2.8795e-02,
         1.6386e-05, -4.6297e-06,  2.5761e-05,  7.6232e-07, -4.3318e-06,
        -7.7172e-06,  9.1058e-07,  2.4905e-07, -9.4315e-06,  7.0289e-06,
        -1.4616e-02, -1.9763e-05,  5.1695e-06,  1.4246e-05, -1.1901e-02,
        -3.1307e-05,  3.8900e-06, -1.0308e-05,  5.3634e-03, -1.5777e-05,
         2.1096e-05,  1.5463e-06, -7.5713e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5756e-04, -4.9077e-02, -1.4263e-05,  7.9167e-05, -6.9626e-03,
        -2.0854e-05,  7.9424e-06,  8.5728e-06,  1.1747e-05,  1.4194e-05,
        -9.8287e-06,  1.3054e-05,  7.7703e-07,  8.5142e-06,  5.0891e-06,
        -1.6464e-02, -8.3310e-06,  1.4296e-05, -8.6097e-07, -1.0278e-02,
         8.9581e-06, -2.5172e-02, -1.5402e-05,  1.9462e-04,  1.1647e-07,
         1.6280e-05, -8.0012e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2750e-04, -4.8328e-06,  1.9056e-07, -2.0030e-02, -5.9785e-06,
        -6.8059e-07, -2.4057e-06, -2.3294e-05,  1.6562e-05, -1.5983e-05,
         3.6525e-06,  1.2730e-06, -8.6823e-06,  6.0178e-06, -9.0630e-06,
        -5.2189e-06, -2.3974e-05, -1.6556e-02, -1.3509e-05,  2.5380e-05,
         1.2183e-05, -3.9058e-06,  1.8575e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6237e-05, -2.6551e-06,  2.1537e-05, -2.8134e-02, -6.8769e-06,
        -2.9352e-05,  2.0892e-05, -5.6963e-05, -3.4211e-05, -1.5112e-05,
        -3.5356e-05, -3.0890e-05,  7.0284e-07,  6.8199e-06,  1.6915e-05,
        -3.4023e-06, -1.6530e-05, -4.4745e-02, -4.1978e-05, -4.7731e-06,
        -5.8011e-06, -3.3114e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2728e-04, -8.8605e-06, -1.0757e-05, -2.9851e-02, -2.5375e-05,
        -2.5246e-05,  8.6880e-06, -3.3578e-05, -7.3119e-06,  1.6444e-05,
         1.1932e-05,  1.4660e-05, -6.2126e-05, -3.6645e-06, -2.6241e-05,
        -5.6225e-06,  1.0718e-05, -5.3246e-02, -4.3599e-05,  1.4425e-05,
         2.7707e-05,  5.5602e-06, -1.0008e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2182e-05,  7.3475e-06,  1.2924e-06, -2.0218e-05, -3.5599e-02,
        -2.4692e-05, -1.9072e-05, -2.4677e-02, -2.6789e-05, -1.0801e-05,
         2.3518e-05, -1.8304e-02, -1.7622e-05,  2.4612e-05, -5.3494e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9719e-04, -1.2751e-05,  6.1187e-06,  7.5655e-05, -4.5927e-02,
        -3.4009e-05, -2.3845e-05, -3.0263e-02,  1.1154e-05, -2.4304e-05,
         3.4700e-05, -3.8808e-02,  2.4702e-05, -6.5375e-06, -5.7454e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #3000: [tensor([ 8.4240e-05,  1.8759e-02,  1.0114e-05,  2.1993e-05,  1.9093e-04,
        -1.6413e-05, -9.1772e-05,  7.8831e-05,  5.5329e-05, -3.0808e-01,
         3.3107e-05,  5.7600e-05,  2.0866e-05, -4.8180e-05, -4.1222e-05,
        -3.2283e-05,  7.9462e-05,  4.6165e-05,  1.6040e-04,  2.0825e-04,
        -1.3273e-04, -8.5590e-05,  1.3897e-04, -3.8078e-05,  1.0141e-04],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2131e-04,  4.2927e-02, -1.0151e-06,  3.4462e-05,  2.1407e-06,
         6.4681e-05, -5.4641e-06,  4.8059e-05, -3.3751e-05, -3.9558e-05,
        -3.6521e-06,  1.2606e-06,  1.5465e-02,  2.6880e-05,  2.7165e-05,
         1.7960e-04,  7.4620e-03, -3.3723e-06, -1.2740e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7301e-05, -6.2091e-02,  3.0678e-06,  1.7287e-05, -2.0922e-05,
        -4.2011e-05, -5.9919e-05,  3.8347e-07,  5.6357e-04, -2.6034e-06,
         3.5305e-05, -7.5374e-08, -3.3315e-02, -5.2309e-05,  1.8743e-03,
        -1.0295e-05, -5.3004e-05,  6.9114e-06, -3.2130e-02,  8.4965e-06,
         1.8437e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3715e-05,  3.2967e-02,  4.2158e-06,  1.0968e-05, -7.1057e-06,
        -4.6949e-07, -3.0610e-05,  8.3516e-06, -7.5719e-04, -2.7430e-05,
         1.7898e-05,  3.8803e-06,  1.3371e-02, -1.5806e-06, -2.5557e-05,
         8.9522e-03,  1.3801e-07,  1.5468e-05,  3.1775e-05,  1.5220e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.8438e-05, -5.7067e-02,  2.0902e-05,  2.2766e-03,  2.2996e-05,
        -1.3158e-02, -6.7657e-06, -5.1697e-06, -3.6837e-02,  4.2946e-07,
         3.0820e-05, -8.8753e-06,  1.7677e-05,  3.5629e-05, -1.1050e-02,
         5.3285e-06, -1.8766e-02,  4.4387e-05, -3.6664e-02,  2.5423e-05,
         1.3587e-05,  8.0529e-06,  9.8066e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5201e-05,  1.6173e-03,  2.9349e-05, -1.3556e-05,  1.8150e-05,
        -2.4311e-02,  7.7374e-06,  1.7643e-05, -1.9003e-02,  1.7446e-05,
         1.5719e-05,  1.1622e-06, -7.3977e-06, -6.5428e-06, -1.7137e-02,
         9.4538e-06, -1.2984e-02, -1.6991e-05,  2.1275e-05, -1.8118e-06,
        -1.2553e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5867e-05, -5.9061e-02, -8.2921e-06,  2.0244e-03,  3.6380e-07,
         7.3978e-03,  2.6723e-06,  4.1371e-06, -1.5594e-02,  1.9656e-05,
        -6.2602e-06, -2.2364e-06, -1.3650e-05, -6.1428e-06, -1.9803e-05,
         1.8703e-05, -5.1548e-03, -4.7753e-05,  1.1044e-03,  6.1498e-06,
        -4.5518e-06,  7.1992e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2099e-05,  9.8819e-02, -5.9459e-07, -8.8099e-05, -2.7777e-05,
        -3.3671e-05,  8.0353e-05,  6.3822e-02, -3.6718e-06,  4.3914e-05,
         2.2085e-05, -4.1822e-05,  2.0959e-05, -1.5278e-05, -7.7528e-03,
        -3.0174e-05,  2.8765e-06,  5.7496e-06,  5.5880e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2641e-04, -3.6674e-01, -1.2506e-05,  5.1788e-04,  1.3923e-05,
         2.3926e-05, -8.7881e-05, -1.3887e-01,  1.3986e-04, -1.5451e-04,
        -1.2339e-04,  1.0333e-04,  1.0743e-05,  5.5669e-05, -1.2005e-02,
        -1.0289e-04,  2.2774e-05, -5.3805e-05,  4.3894e-04,  2.1384e-04,
         1.2040e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1693e-04, -8.3369e-02, -5.3018e-05, -1.9501e-06, -1.1198e-05,
         2.7792e-05, -3.0759e-05, -4.0307e-02,  2.2195e-05, -2.8025e-05,
        -3.7275e-05,  2.2528e-05, -1.8875e-05, -2.6409e-05,  1.0669e-02,
        -8.4688e-06, -1.2935e-04, -4.5755e-05, -6.8340e-06, -3.9214e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0333e-05, -1.1107e-01, -7.1716e-06,  2.5072e-04, -6.2883e-05,
         1.0591e-04, -1.9700e-05, -5.3944e-02, -2.4878e-05, -3.7358e-05,
        -2.6145e-05,  2.4300e-05, -1.8963e-05,  4.7499e-05,  5.3619e-03,
         6.6256e-06,  2.2760e-04,  3.4135e-05,  2.5582e-05,  6.1923e-05,
        -3.0201e-06, -7.0364e-06, -8.8506e-07,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1595e-05, -5.2794e-02,  1.8628e-05,  2.9993e-04,  2.8511e-05,
         1.0581e-04, -1.7117e-05, -2.3001e-02,  8.9576e-06,  2.3737e-06,
         4.4801e-06, -7.6543e-07, -1.3642e-06, -3.0744e-06, -2.6053e-02,
        -5.0876e-06,  2.1969e-04, -1.6418e-05, -3.1639e-05,  1.8592e-04,
        -1.6844e-02, -2.3187e-05, -2.2585e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3050: [tensor([-6.0096e-05,  1.1434e-05,  2.1962e-05,  1.1794e-05, -2.5327e-05,
        -5.4201e-06, -5.6890e-06,  5.0650e-07,  4.0200e-06, -8.2922e-06,
         3.9997e-05,  2.5436e-02,  2.3687e-05, -1.7497e-06, -3.1566e-06,
         2.4836e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9425e-05, -1.2460e-06,  2.7588e-06, -9.0670e-06,  2.2694e-05,
         6.4672e-06,  3.6485e-05, -6.6937e-06,  1.8423e-06, -2.7157e-05,
         1.1441e-02, -4.1634e-05, -8.8263e-06, -1.9838e-05,  2.4296e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4223e-05, -2.4632e-02,  1.2130e-05,  2.7691e-05,  2.1149e-05,
         1.0094e-05,  6.4528e-03,  3.0161e-05, -3.4816e-06,  1.7189e-05,
        -2.9462e-02,  9.5197e-06,  5.5301e-06, -1.8759e-07,  1.4113e-05,
        -1.6034e-02,  3.7875e-06,  9.2070e-06,  4.6374e-05,  3.0945e-05,
         1.4565e-05, -1.1148e-05, -1.2095e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7660e-05, -2.8357e-02,  1.3975e-05,  5.0377e-07,  1.6475e-05,
        -5.5155e-06, -1.0025e-02,  7.9187e-07, -1.7920e-05, -8.1072e-06,
         8.4432e-03, -1.6163e-05, -4.3915e-07, -2.1505e-06, -1.2299e-05,
        -8.3110e-03,  1.5538e-05, -6.3101e-06,  1.8798e-03,  1.1932e-05,
         1.0631e-05,  1.2331e-05, -1.0760e-05,  1.0163e-05, -7.8740e-06,
        -7.6864e-07,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6172e-05, -1.8463e-02, -2.3860e-06, -1.9187e-05, -2.3184e-06,
         7.6665e-06,  9.4689e-03, -1.7840e-05, -5.1471e-06,  5.0016e-06,
        -2.3534e-02, -1.3428e-05,  1.2917e-06, -1.3107e-05, -1.0157e-05,
        -7.1651e-03, -4.5164e-06, -8.4231e-06,  4.8652e-06, -1.4256e-05,
        -1.2056e-05,  3.0549e-04, -3.1773e-05, -1.6389e-02, -1.3736e-05,
        -1.3291e-06, -3.6469e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4155e-05, -8.5845e-05, -2.7628e-05, -4.2045e-05,  2.4586e-02,
         1.4930e-04, -8.0074e-05, -1.4706e-04,  2.4056e-05,  1.1459e-04,
        -2.2142e-06, -1.5058e-01,  3.9555e-06,  6.3700e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5660e-05,  5.0350e-05,  7.6382e-06,  1.5499e-04, -5.7130e-02,
         6.3702e-05,  2.6997e-05,  2.3198e-05, -2.5362e-05,  1.6848e-05,
        -5.2416e-05,  5.0314e-05, -5.8970e-02, -1.6444e-05, -1.9660e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5969e-04,  3.1014e-05, -4.7227e-05,  1.1905e-05, -1.0902e-01,
         3.3118e-05,  3.3873e-05, -1.8466e-05, -1.7172e-06, -9.0928e-05,
         5.0595e-05,  2.1500e-05,  4.8378e-05, -3.7034e-02,  5.1107e-05,
        -3.0520e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5093e-05, -5.7889e-02, -1.3814e-05,  2.0728e-03,  1.1316e-06,
        -7.2780e-03,  2.7433e-05,  1.7709e-05,  1.3930e-03,  8.8879e-06,
         1.0784e-05,  1.6437e-05,  1.1580e-05,  5.2853e-06,  2.3668e-05,
        -1.6493e-02,  1.1992e-05,  4.2839e-05,  3.0010e-05,  2.7164e-05,
         2.9800e-06, -1.3180e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0880e-05,  9.9723e-03, -2.2299e-06, -1.0473e-03,  6.0505e-06,
         1.4184e-02, -6.5571e-06,  2.0742e-05,  1.5382e-05, -5.9293e-07,
         8.9825e-06, -5.9613e-07, -1.2368e-05,  1.4957e-05,  1.1071e-02,
         8.8243e-06, -2.8253e-04,  1.0039e-05,  9.9621e-03, -6.7230e-05,
         1.6236e-05,  2.0166e-05, -4.0358e-06, -9.3551e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3821e-04, -2.5756e-02,  2.4215e-05,  1.2623e-03, -5.8185e-06,
         6.1626e-03, -2.7476e-05,  3.4079e-05,  1.2718e-03, -3.0217e-05,
         7.7623e-06,  8.8566e-06,  2.9791e-06, -2.1396e-05,  2.5727e-05,
        -3.9261e-02, -2.9597e-05, -9.0033e-06,  1.3003e-02, -1.7692e-05,
        -3.4448e-05, -1.0466e-05,  1.7814e-05, -1.3656e-06,  5.5819e-06,
         1.1970e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3212e-04, -7.5602e-02,  6.2538e-06, -2.2786e-05, -3.0018e-02,
        -3.1061e-05, -9.2299e-05, -8.6658e-06,  5.0102e-05, -5.0141e-06,
        -5.1453e-06, -3.8928e-05, -5.1443e-02, -4.0366e-05, -2.7170e-05,
         1.4912e-05, -1.5525e-02, -1.1211e-05,  5.2923e-05, -1.3621e-05,
        -3.5859e-05, -1.0451e-05, -1.7540e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3100: [tensor([-7.5944e-05, -3.8146e-02,  1.1811e-05,  9.9214e-06,  1.0048e-05,
         1.6570e-05, -4.5725e-02, -1.5398e-05,  1.1866e-05, -1.7912e-05,
         1.6285e-03,  7.3188e-04, -8.3611e-06,  3.0418e-06, -1.2231e-05,
         9.1166e-06,  1.1965e-02,  6.0416e-06,  1.8495e-05, -5.6236e-07,
        -1.9789e-02,  1.5123e-05, -1.8047e-05, -7.1793e-03,  7.3771e-06,
         9.1931e-07, -3.0808e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1556e-05, -5.0856e-02,  1.4299e-05,  7.5289e-07,  6.9532e-05,
        -2.0988e-06, -2.2775e-05,  2.0535e-05, -8.1480e-03,  4.2029e-06,
         2.8885e-05,  2.0578e-05, -3.8539e-02,  1.0765e-05,  1.3166e-05,
         3.7010e-06, -1.2540e-02,  3.0698e-05,  1.4721e-04, -2.4552e-05,
        -1.5083e-02,  1.5779e-05,  1.2940e-05, -4.6959e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.3227e-05, -5.4548e-02, -1.3446e-05, -3.1837e-05,  3.7637e-05,
         3.7245e-05, -2.2143e-05, -1.4480e-05, -1.0817e-02,  8.6104e-06,
         1.0604e-05, -2.8897e-05, -1.7013e-02,  1.4046e-05, -1.3746e-05,
        -1.6304e-06, -1.4674e-02, -4.3888e-05,  4.6852e-05,  2.1371e-05,
        -2.2401e-02, -1.4279e-05, -3.3578e-05, -1.7688e-02,  4.9932e-06,
         1.5860e-05, -1.5146e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7248e-05, -2.9463e-02, -6.5530e-06, -6.8863e-07,  9.5186e-05,
        -4.9890e-06, -1.3535e-05,  1.2955e-05, -9.6786e-03, -6.4049e-06,
         1.3831e-05,  5.3354e-06, -7.2846e-03,  1.1566e-05, -4.0204e-07,
        -4.9774e-06, -1.3405e-02, -7.1213e-06,  5.0704e-05,  5.8117e-06,
        -2.0086e-02, -1.1862e-06, -1.3878e-05, -8.5953e-04,  1.1896e-06,
        -7.2960e-06, -7.3563e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6078e-04,  1.4190e-04, -4.0522e-05, -3.7293e-05,  9.2346e-05,
        -1.6317e-01,  1.7196e-04,  7.1028e-05, -3.2109e-05, -8.5606e-02,
         8.4280e-06,  3.5184e-03, -6.9663e-05,  3.6693e-05, -7.5150e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5403e-05, -1.1837e-04, -3.3634e-05, -1.1238e-04,  2.1351e-04,
        -1.4439e-01, -3.2172e-05,  1.9029e-07, -1.5017e-05, -1.4711e-01,
        -3.7802e-05,  2.4540e-03, -3.8571e-05, -5.3836e-05,  6.8663e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9258e-05,  6.4198e-05, -3.1496e-05,  4.2419e-05,  6.4846e-05,
        -7.6712e-02,  4.7476e-06, -5.3111e-05,  2.4842e-06, -8.0283e-02,
         1.1264e-05,  6.1180e-03,  9.2154e-05,  3.9970e-06, -5.6494e-02,
         5.1948e-05, -2.7935e-05, -1.7482e-06,  1.3963e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2521e-05, -4.9400e-02, -1.7123e-05,  8.8734e-06,  1.0557e-04,
        -8.7427e-06,  2.3787e-03,  1.4300e-05,  7.6753e-05,  1.9705e-05,
        -2.2898e-02,  4.0980e-05,  9.0709e-06,  7.7188e-06, -2.2731e-02,
        -9.7872e-06,  7.9704e-05, -1.4056e-05, -2.6870e-05,  2.3360e-05,
        -1.2121e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0202e-05, -4.9844e-02,  2.8938e-05,  1.2331e-05,  8.4549e-05,
         1.7363e-06,  3.0115e-04, -2.7110e-05, -2.7493e-05,  7.9777e-06,
         1.5206e-02, -3.0105e-05,  1.7256e-05,  2.4215e-05, -5.2855e-05,
        -2.6544e-03, -1.4055e-02,  3.4626e-06,  7.8735e-04, -1.7683e-02,
        -1.6144e-06,  1.4315e-05,  8.1697e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4431e-06, -1.1697e-01,  3.1887e-05,  2.0693e-05,  1.4389e-04,
        -4.3098e-06,  7.0621e-04,  2.7470e-05,  1.0516e-04,  1.2643e-04,
        -5.5872e-02, -4.4730e-05, -2.4814e-05, -2.0652e-07, -2.0473e-02,
        -7.5046e-06,  5.3688e-04, -2.0941e-05, -5.2381e-05, -1.2305e-05,
         4.8375e-06, -2.1834e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9345e-05, -1.1466e-01,  6.5372e-06,  8.8489e-06, -2.3110e-05,
         4.2733e-05,  1.6817e-05,  4.5751e-05, -4.2504e-06,  1.9852e-03,
        -1.6690e-05,  3.9859e-05,  1.0988e-06, -4.2371e-02, -2.4755e-05,
        -3.3145e-05,  2.4864e-04, -2.5686e-06,  2.2935e-05,  6.8668e-04,
        -4.0206e-05, -3.7831e-05,  2.0350e-05,  2.7310e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3279e-05, -2.4036e-02, -9.4094e-06,  5.6705e-06, -1.0038e-05,
        -7.8911e-06, -1.2868e-05, -1.9488e-05, -1.9023e-05,  4.9203e-04,
        -5.4013e-06,  6.8283e-06, -7.2818e-06, -1.5291e-02, -3.5893e-06,
        -3.5493e-03, -1.8587e-06, -1.5782e-02, -8.4078e-06,  3.2959e-04,
        -1.8802e-05, -1.3182e-02, -1.8018e-05,  3.1544e-05,  1.3816e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3150: [tensor([ 1.6366e-04, -2.7261e-02,  3.9279e-06,  5.2046e-05, -2.7999e-02,
         1.8439e-05,  2.1751e-03, -3.2579e-02,  5.4051e-06,  2.7119e-05,
        -2.5804e-05,  1.3149e-05, -2.6723e-02,  4.5907e-05,  2.0786e-03,
        -1.1103e-02,  6.8578e-06, -6.2721e-06,  1.5278e-05,  1.7004e-05,
         2.0764e-03,  1.7494e-05,  6.9585e-06, -1.1390e-06, -1.2005e-05,
        -7.2452e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8669e-05, -2.6705e-02,  3.3713e-07,  9.4994e-06,  5.7049e-03,
         1.2286e-05, -8.2932e-03, -1.4199e-02,  1.7503e-06,  5.3783e-06,
        -1.0853e-05, -4.1467e-06, -8.8985e-03, -1.4425e-05,  6.3545e-04,
        -1.7782e-02,  1.2432e-05, -3.6717e-06, -1.0678e-05,  6.1707e-06,
         1.1256e-04,  7.1527e-06, -1.3143e-05, -6.3024e-06, -1.1717e-05,
        -1.5550e-02,  1.2449e-05,  1.4334e-05,  3.9763e-06,  3.6886e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5069e-05,  1.3911e-02, -5.5859e-06, -2.1587e-05, -7.6231e-03,
        -6.8706e-06,  1.5803e-04, -1.1035e-02, -2.5543e-06, -8.8862e-06,
        -1.3123e-07, -3.8529e-06,  6.0601e-03,  2.1136e-06,  2.9435e-04,
        -8.4476e-03, -1.3927e-05, -5.7152e-06,  2.4533e-06, -7.7280e-03,
        -1.2101e-05, -1.4213e-05,  2.4719e-03, -2.2078e-05, -7.7192e-03,
        -7.0732e-06, -3.0545e-06, -4.1445e-03, -3.0532e-07, -4.1509e-06,
         1.8254e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5971e-04, -1.1269e-05, -3.5027e-05,  8.5676e-05,  4.6730e-05,
        -3.5098e-06,  2.5064e-05, -3.4497e-02,  2.3304e-05,  5.6644e-05,
        -6.1075e-02,  3.7153e-05,  1.8914e-05,  3.2021e-05, -3.9393e-02,
        -1.1058e-05,  3.5744e-03, -2.9651e-02,  2.9321e-06, -1.0417e-05,
        -2.2117e-05,  2.0062e-05,  2.7000e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1460e-05, -8.4518e-06,  5.5783e-06, -5.9104e-05, -1.1985e-05,
         1.6934e-05,  1.0686e-05,  1.1644e-02,  6.8217e-06,  3.0187e-05,
         2.0163e-02, -1.8837e-05, -1.5579e-05, -4.0566e-06,  1.0183e-02,
         2.8970e-05, -2.0699e-05, -1.2586e-03,  1.0077e-02,  1.3181e-05,
        -2.1979e-05,  9.7225e-03,  1.5378e-05,  2.3776e-07,  1.7593e-06,
         1.1194e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3978e-04,  2.8603e-06,  2.1951e-05,  7.9899e-05,  3.4895e-05,
        -8.7872e-06,  3.5322e-05, -1.8039e-02, -3.0693e-05, -2.3521e-05,
        -4.3337e-02, -3.0002e-06, -1.1286e-05,  1.9291e-05, -1.0040e-02,
        -2.1546e-05, -3.5390e-06,  2.2773e-06, -1.3541e-05, -1.2589e-05,
        -1.4995e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3520e-05, -8.1506e-03,  3.4641e-05, -9.6437e-04,  2.7374e-02,
         1.3199e-05,  1.6916e-05, -2.5779e-06, -2.3886e-04,  2.8522e-02,
        -6.8500e-06,  3.1784e-06,  2.0608e-05,  4.5601e-06,  2.0908e-02,
         2.5582e-05, -4.6427e-05, -1.0070e-05, -1.0008e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3191e-04, -1.2214e-01, -3.7447e-05,  3.3838e-03, -1.2407e-02,
         4.0258e-06,  5.0329e-05,  1.0239e-04, -3.3398e-03,  3.2748e-03,
         2.5601e-05, -1.4804e-05,  2.9932e-07,  1.5540e-05, -1.0120e-02,
        -5.0473e-05,  7.0469e-05, -5.7696e-02,  3.3626e-05, -2.0954e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2247e-04,  4.4956e-02,  4.4570e-06,  2.0752e-02,  5.3291e-02,
        -3.6044e-05,  2.0721e-05, -4.6131e-05, -6.5358e-04,  3.5798e-02,
        -4.0083e-05, -2.2597e-05,  4.5863e-07, -5.6957e-06,  1.2805e-02,
        -3.6256e-06, -9.8236e-05,  4.9371e-05,  7.8312e-05,  2.8385e-05,
         3.5814e-05,  2.0020e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8740e-05,  1.7848e-06,  3.0425e-05, -1.2210e-06, -1.2928e-05,
         5.2312e-03,  7.7687e-06, -8.5750e-06, -2.7811e-05, -1.8384e-05,
        -2.5463e-02,  3.2515e-05, -2.4907e-02,  3.0111e-05, -2.8420e-02,
        -2.8593e-05, -3.1323e-09,  6.0999e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7187e-06, -1.3352e-05, -5.8505e-06,  2.8094e-06, -8.8150e-06,
        -4.2299e-04, -1.8034e-05,  2.8133e-05,  1.2617e-07, -1.1765e-05,
        -1.1568e-05,  3.3058e-06,  2.6845e-02,  1.3286e-05,  2.8044e-08,
        -1.1972e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3576e-04,  5.1359e-06,  1.7723e-06,  9.5797e-06,  7.9012e-06,
         2.4283e-03, -4.3824e-07, -1.1808e-05,  3.1510e-06,  2.1663e-05,
        -2.7357e-02, -1.6571e-05,  2.7015e-05,  1.6175e-05, -1.2278e-05,
        -1.1873e-02, -7.3717e-06, -1.2693e-02, -2.0320e-05,  6.6688e-06,
         1.6889e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3200: [tensor([ 2.6975e-05, -5.9227e-02,  1.0232e-05,  3.6321e-03,  1.5419e-05,
         9.9877e-06, -1.1192e-05,  2.8483e-05, -1.6626e-02,  1.0157e-05,
        -2.1013e-05, -6.2303e-06, -3.6541e-02, -3.2736e-05,  7.0596e-04,
        -4.1094e-05, -2.4048e-05, -1.3379e-05,  6.0262e-05, -1.3654e-05,
        -1.4226e-05,  1.4408e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4136e-05, -5.3797e-02, -1.3283e-05,  1.0577e-03,  1.4065e-05,
         1.4366e-05, -3.2468e-05,  2.9756e-05, -1.4616e-02, -3.5125e-05,
        -1.1113e-05,  2.8637e-06, -3.0647e-02,  4.4340e-06,  2.4798e-04,
        -2.6485e-05,  1.3833e-05, -1.7730e-05,  1.8013e-05,  1.2328e-03,
         3.0815e-06,  1.4809e-05, -1.8033e-06, -2.2724e-05,  3.9442e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5078e-05, -4.8137e-02, -8.7670e-06,  2.4070e-03,  2.0236e-06,
         2.3059e-05, -1.8017e-05,  4.2095e-06, -1.9018e-02,  3.2788e-05,
        -2.0587e-05, -2.3904e-05, -1.1494e-02,  1.6319e-05,  3.7661e-04,
        -2.5653e-05, -1.9052e-02, -4.3448e-05, -2.9220e-02,  1.8485e-05,
        -1.7330e-05, -3.7084e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5191e-05, -5.7673e-02, -6.6472e-05, -1.5289e-05,  1.0888e-03,
         5.1293e-06, -2.1636e-02, -2.4253e-05,  1.7162e-03, -1.8829e-05,
         2.4650e-05,  1.8550e-05,  4.5855e-05, -2.7248e-02, -6.6067e-06,
        -9.0869e-03,  5.3868e-06, -1.7454e-05, -1.9447e-02,  3.7217e-06,
         1.3830e-05,  4.4391e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3672e-05, -5.1483e-02, -3.0709e-05,  2.5638e-07,  3.9441e-04,
        -2.3883e-05, -1.8256e-02, -1.2231e-05, -1.8226e-02,  1.0758e-05,
         8.2811e-06, -5.9081e-06,  1.1580e-05,  1.1214e-05, -1.7785e-06,
        -5.1433e-03, -2.4802e-05,  4.7494e-04, -3.0517e-02,  3.8293e-05,
         1.3144e-03,  6.1374e-06,  2.1083e-05,  1.3591e-06, -4.1404e-06,
        -9.3116e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0004e-05, -5.2038e-02, -1.1586e-05, -2.6689e-05,  3.9259e-04,
         6.1996e-06, -1.8030e-02,  2.7895e-05,  2.8928e-03,  1.0378e-05,
         1.9366e-05,  8.5046e-06,  2.1729e-05, -1.4098e-02, -1.2339e-05,
         1.9703e-05, -1.7491e-05,  1.2998e-05,  1.0523e-06,  7.1042e-04,
        -1.6534e-02, -2.5192e-05, -3.2645e-06,  9.5431e-07,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5767e-05,  4.1984e-02,  2.0422e-06, -1.9497e-04,  1.9308e-05,
        -4.4588e-03, -5.9437e-06, -3.2545e-04, -8.0967e-06,  1.6083e-05,
        -2.4217e-05,  3.7401e-06, -2.7597e-06,  1.1317e-02, -1.7413e-05,
         4.2475e-03,  1.3522e-05, -9.5750e-06, -1.3732e-04, -3.1016e-05,
        -2.2760e-05, -1.8349e-06,  1.6323e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6375e-05,  1.5891e-02,  1.7257e-05, -1.3492e-04,  6.7822e-06,
         4.3455e-03,  2.1549e-06, -6.6228e-04, -9.2354e-06,  1.2133e-05,
         5.0915e-06,  3.4632e-07, -2.6215e-06,  1.1648e-02,  1.6173e-05,
         9.8804e-04,  2.3403e-05, -2.3355e-06,  1.2187e-03,  8.0065e-06,
         4.6521e-06,  4.0987e-05,  3.0257e-06,  1.9754e-06,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5455e-05, -8.0682e-02,  3.4825e-05,  4.7647e-04,  4.3661e-06,
        -7.9299e-02, -9.1016e-06,  3.2511e-03,  1.1262e-04,  1.4831e-05,
         1.6824e-05, -3.3251e-05,  1.8080e-05, -4.0130e-02,  3.6881e-05,
        -8.9995e-03,  2.5263e-05,  3.8978e-03,  2.5611e-06,  4.5113e-06,
         5.4474e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0876e-05, -2.4895e-06,  9.7486e-07, -1.0019e-04,  1.3482e-05,
        -9.1614e-06, -2.2411e-05, -4.1134e-04, -5.0458e-06,  3.0137e-06,
         9.3962e-06,  1.7568e-02, -7.8082e-06, -7.7428e-06, -9.3755e-06,
         5.7230e-07, -1.5361e-05,  2.3088e-06,  2.1792e-02, -1.1581e-05,
        -3.9672e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5598e-05, -4.9150e-07,  2.5883e-06,  7.3165e-05, -8.3828e-06,
        -9.1389e-07, -1.2890e-05,  5.0839e-04, -7.0251e-06, -2.8268e-05,
         3.6145e-06, -2.6258e-02, -1.1715e-05, -1.8807e-05,  6.2674e-06,
        -1.3101e-02, -1.9100e-05, -2.2399e-02, -4.5386e-05,  6.6835e-06,
        -3.1136e-02, -2.5505e-05,  1.0056e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9826e-05,  1.8302e-05,  2.0349e-05,  2.2633e-05,  2.5870e-05,
        -2.2041e-05,  9.8784e-06,  1.7970e-04, -1.2231e-06, -2.0185e-06,
         1.7260e-05,  6.8010e-03,  1.4319e-05,  1.4989e-05, -2.4846e-06,
        -1.8720e-02,  2.4191e-05, -1.8373e-02,  3.0218e-05,  6.3636e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #3250: [tensor([ 7.4923e-05, -2.2197e-05,  1.5524e-06,  3.7607e-04, -3.9574e-06,
        -1.3644e-05, -6.9975e-06, -9.5712e-06, -3.6225e-03, -1.2574e-06,
         7.3893e-06, -3.3170e-02,  7.4542e-06, -1.0093e-05, -2.1356e-07,
        -7.6891e-03,  1.2626e-05,  2.4550e-04, -9.0657e-06, -9.5904e-03,
         2.2463e-05,  1.1510e-05, -1.7734e-06,  1.3983e-05, -1.7016e-06,
        -9.2606e-06,  1.2806e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3985e-04, -1.5857e-05,  1.6112e-05,  1.2246e-04,  2.2319e-05,
        -1.1212e-05,  3.4500e-05,  2.8172e-05, -2.3780e-02,  1.7421e-06,
         2.0667e-05,  1.2707e-02,  3.0151e-05, -1.0836e-05,  1.3426e-05,
         1.6735e-03, -2.2090e-02,  2.7267e-05, -8.6282e-06, -3.3872e-02,
         2.6835e-06,  2.4666e-06,  3.8423e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7173e-05,  3.0499e-05,  2.7826e-07, -5.3378e-05, -1.2930e-05,
         3.1276e-06,  3.9635e-06,  2.3604e-05,  8.1344e-03,  9.9809e-06,
         2.0893e-06,  3.5852e-02,  3.8511e-06,  1.9638e-06, -9.5405e-06,
        -2.4812e-06,  1.4102e-02, -1.5638e-06, -1.9966e-05,  7.9547e-06,
        -1.6751e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.1530e-05,  1.0169e-05,  1.0636e-04, -8.0110e-06,  7.8001e-06,
        -1.1122e-05, -1.4138e-05,  8.3161e-07, -4.8464e-02, -1.2789e-05,
        -6.7412e-06, -1.3904e-05,  7.5213e-06,  4.1181e-04,  4.0573e-06,
         8.3188e-05, -4.3371e-06, -1.5041e-05,  1.0189e-05, -5.3221e-06,
         4.7757e-06,  2.9654e-06, -1.0993e-05, -1.0324e-05, -1.6814e-05,
         3.5794e-06, -1.4409e-05,  4.9781e-04,  2.1165e-07, -2.4062e-05,
        -1.1256e-05,  3.2754e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2853e-05, -4.2462e-06, -2.8661e-05, -1.3132e-05,  5.1605e-06,
        -1.5049e-06, -5.8777e-06, -1.9517e-06,  2.3177e-02, -4.7152e-06,
        -9.4383e-06, -1.8458e-06, -2.6666e-06, -1.0992e-03,  9.9430e-07,
        -1.2336e-05, -1.5867e-06,  9.0972e-06,  2.9189e-06,  3.0473e-07,
         3.2227e-06, -7.0699e-06,  3.6216e-06, -3.3137e-06, -1.2289e-06,
        -1.1893e-06, -1.6265e-06,  1.3046e-06, -5.2224e-06, -4.1614e-06,
        -2.8037e-06,  2.0559e-02,  6.8447e-07, -6.2299e-06, -2.3259e-06,
        -1.5449e-05, -1.1006e-06,  3.8233e-06, -7.1726e-06, -1.0076e-03,
        -1.1460e-05,  9.1661e-06, -6.5861e-05, -4.1872e-06, -3.0059e-05,
        -5.2899e-06,  5.7225e-06,  4.3692e-06, -4.8722e-09,  6.2452e-06,
        -3.2775e-06,  5.9151e-06, -7.5433e-09], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5843e-04, -5.8118e-06,  2.8673e-05,  1.4115e-05, -1.6044e-05,
         3.3836e-06,  1.3450e-05,  3.4358e-05, -1.6550e-02, -1.1299e-05,
        -3.8319e-06,  2.3811e-05,  3.3753e-05,  2.8283e-04,  2.6201e-06,
         3.3283e-04,  1.5895e-06, -4.7123e-06,  2.8061e-05,  1.7221e-05,
         5.5208e-06,  1.1814e-05,  4.5816e-06, -4.1802e-06,  1.7812e-05,
         1.2829e-05, -1.4661e-05,  1.4020e-05,  2.5588e-05, -4.3096e-02,
         4.0606e-06,  2.8724e-05,  2.8575e-05,  7.2282e-06,  8.4027e-06,
         1.0777e-05,  9.7159e-06,  6.1727e-05,  1.9752e-05,  3.9122e-06,
        -1.0672e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7355e-06, -3.0376e-02,  1.0780e-05,  6.2187e-04,  2.0545e-06,
        -1.0287e-05,  1.9115e-06,  4.8625e-06,  3.2791e-06, -2.0611e-02,
         1.2005e-05,  5.0555e-03,  9.7260e-06, -9.4086e-03, -3.3390e-06,
        -1.0422e-02,  1.0924e-05, -1.1969e-06,  1.0363e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4189e-05, -4.5437e-02, -1.0594e-06,  2.2750e-03,  1.0542e-05,
         1.3844e-05,  1.0969e-06, -4.5437e-06,  1.5555e-05, -1.6140e-02,
         1.0490e-05,  3.4448e-03, -8.5081e-06, -1.1349e-02, -9.7207e-06,
        -5.2971e-03,  2.3434e-06,  1.0011e-05, -2.6776e-02,  7.8866e-06,
        -6.2594e-06,  2.6524e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5629e-06, -4.1942e-02, -1.6737e-05,  2.3210e-03, -4.4093e-05,
        -2.4915e-05, -1.6380e-05,  2.7435e-05, -4.6696e-05, -6.0892e-02,
         3.6323e-06,  3.6988e-03,  2.2303e-05, -3.9942e-02,  6.4994e-05,
        -2.7618e-05, -1.6290e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7581e-05, -1.2301e-05, -2.4516e-05,  2.9526e-04, -8.4787e-02,
        -5.4858e-06,  3.9785e-06, -9.4235e-06, -2.6427e-05, -6.5535e-02,
        -4.5612e-05,  2.4476e-05,  2.6650e-05, -3.0262e-05, -1.6551e-05,
         1.0878e-03, -2.1503e-02,  5.4330e-06, -3.5307e-02,  2.2196e-05,
        -9.6623e-06, -3.3202e-05,  3.4132e-03, -2.3439e-05, -5.4473e-05,
        -8.7828e-05,  3.9935e-06, -2.3419e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0509e-05,  3.3225e-05,  9.1557e-06,  1.2804e-04, -8.6849e-02,
        -3.5214e-05, -3.2440e-05, -2.4227e-05, -3.7351e-05, -5.5087e-02,
        -2.8780e-05,  2.3374e-05, -4.9490e-05,  1.5858e-05,  2.0555e-05,
        -2.1894e-02, -1.1998e-05, -2.9303e-05,  3.4270e-06, -3.1572e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.8045e-05,  1.3236e-05, -1.9336e-05, -1.0964e-04, -1.6237e-02,
         2.0359e-05,  1.6152e-05,  1.6276e-05,  6.4230e-06,  4.3412e-02,
         1.1380e-05,  2.8979e-06,  1.3724e-05, -2.5041e-05,  4.6679e-05,
         7.6607e-03,  3.1856e-05,  1.1192e-05,  1.6752e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
