Iter #50: [tensor([ 1.1172e-03, -4.3835e-03,  4.5990e-05,  2.8602e-05,  1.5516e-05,
        -9.2915e-05,  7.0793e-05, -9.5719e-05,  1.2901e-04, -4.7120e-05,
        -9.2401e-05,  7.3079e-05,  1.2898e-06,  9.7226e-05, -1.4258e-05,
        -5.1890e-06, -1.5908e-04, -1.3503e-04, -2.2117e-05,  2.8641e-05,
        -1.0266e-03, -1.5665e-04,  2.5775e-05, -3.2473e-05,  7.7852e-05,
        -2.1480e-05,  3.2950e-05,  2.5396e-05,  5.9826e-05, -9.9472e-05,
        -1.6144e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1913e-03,  2.1202e-03,  1.3495e-04,  1.3392e-04, -3.1265e-04,
         1.5641e-03,  9.5201e-05, -9.9881e-05,  1.1267e-04, -1.3555e-04,
        -1.8755e-04,  2.0774e-05,  7.5555e-06,  1.0791e-03,  2.1793e-04,
        -2.1843e-04, -1.6415e-04, -2.8527e-04,  9.2951e-05,  1.1273e-04,
        -1.3243e-04,  1.0822e-03,  1.5879e-04, -1.0067e-04, -3.2882e-04,
        -3.4951e-05, -4.0146e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2907e-03, -2.4355e-03, -2.9727e-04, -2.3837e-04,  2.0850e-04,
        -1.6542e-03, -2.0065e-04,  2.9555e-04, -1.8378e-04,  2.5061e-04,
         2.6255e-04, -2.6484e-06, -1.4626e-04, -1.3387e-03,  9.7615e-05,
         2.0532e-04, -7.4193e-05,  2.5925e-05, -1.4917e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4568e-03,  3.4885e-03,  2.0723e-04,  2.9074e-04, -4.4782e-04,
         2.1501e-03,  1.7247e-04, -2.0547e-04, -1.2962e-04, -2.0967e-04,
        -7.6803e-05,  2.0851e-04, -8.0947e-05,  1.5527e-03,  2.8922e-05,
        -8.1908e-05, -1.4245e-04,  1.9273e-04,  4.4129e-04, -2.7951e-04,
        -2.5685e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0323e-03, -1.4011e-03, -8.1785e-05, -2.6522e-06,  6.4688e-06,
         1.6411e-04, -9.2657e-04, -2.1814e-04, -2.1768e-04, -5.4405e-06,
         2.3067e-04,  7.3588e-05, -4.0593e-05,  1.0963e-04, -2.2310e-04,
        -9.5588e-04, -1.1952e-04,  5.3609e-05, -2.3963e-05,  6.9347e-05,
         9.2511e-05, -4.8958e-05,  3.6160e-05, -2.6763e-05, -4.5094e-05,
        -8.3780e-04, -1.4598e-04, -4.5667e-05,  1.6964e-04,  3.6647e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5453e-03, -1.6630e-03, -1.9545e-04, -5.6738e-05, -1.5364e-05,
         8.9972e-05, -1.0577e-03, -1.8614e-04, -1.0959e-04, -2.3480e-05,
         2.8222e-04,  7.5464e-06, -2.5821e-04,  1.7443e-05, -3.0121e-04,
        -1.0165e-03,  9.7444e-05,  5.3346e-05, -6.5167e-05,  1.0662e-04,
         4.7287e-05,  1.1038e-04,  1.0981e-04, -1.7483e-04,  3.8061e-05,
        -4.6363e-05, -9.3391e-04, -3.7589e-05, -8.8472e-05,  1.3817e-04,
        -1.7525e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.2674e-04, -1.5898e-03, -1.7399e-04, -1.4678e-04,  2.5645e-07,
         7.0795e-05, -1.0745e-03, -2.4116e-04, -1.9428e-05, -1.9695e-04,
         2.0113e-04,  7.7722e-05, -8.1195e-05,  1.1681e-04, -6.7805e-06,
        -1.2262e-03, -4.9322e-05,  7.2168e-05, -1.2893e-04,  2.3543e-04,
        -1.5262e-04, -4.1498e-05,  2.7838e-05, -1.2340e-04,  2.0866e-04,
        -7.6068e-06, -1.3155e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3728e-04, -6.6249e-03,  1.0527e-04,  3.6254e-04,  3.1848e-04,
         2.9790e-04, -9.8835e-05,  7.0594e-04, -2.1179e-04,  4.9161e-05,
         1.1150e-04,  1.5562e-04, -1.3641e-03,  4.5417e-04,  3.1262e-05,
         3.3318e-04, -1.2811e-03,  1.3360e-04,  1.3165e-04,  1.5147e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0838e-03, -3.9442e-03, -3.5535e-05,  1.1642e-04,  1.7684e-05,
         1.8507e-04, -1.6837e-04,  3.1995e-04, -7.5501e-05, -1.5493e-04,
        -3.8846e-05, -1.1143e-05, -8.9431e-04,  1.4387e-04,  1.7667e-04,
         1.1757e-04,  4.6567e-05,  5.3983e-05, -8.7650e-04,  8.8665e-05,
         7.1966e-05,  4.7094e-05,  3.5522e-05,  2.4942e-05,  1.0108e-04,
         5.0832e-05,  1.5634e-04,  3.7150e-05,  1.9641e-04, -5.1442e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3364e-03, -4.3607e-03, -9.8738e-05,  1.5471e-04,  1.3169e-06,
         3.4893e-04, -3.3713e-04,  3.8683e-04, -6.8421e-05, -3.1125e-04,
         9.9870e-05,  1.8758e-05, -1.0735e-03, -1.1594e-05, -7.7096e-07,
         3.1773e-04, -1.1012e-04, -1.9659e-04,  2.5807e-05, -1.5897e-04,
        -1.2423e-04,  4.1799e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1362e-03,  1.8247e-03,  1.6876e-04,  8.3952e-05, -2.2827e-04,
         1.3629e-03,  2.0186e-04, -3.0740e-04,  1.0109e-04,  3.5119e-05,
        -1.5866e-04,  1.0306e-03,  4.9941e-05, -8.3602e-05, -9.2859e-05,
        -2.3797e-04,  2.1538e-04,  8.8902e-05,  2.1433e-05,  1.4212e-04,
         1.9833e-05,  4.8054e-05,  1.2212e-04,  2.1869e-04,  1.1200e-03,
        -1.2914e-04, -1.8796e-06, -6.6868e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7795e-03, -1.2649e-03, -2.8790e-05,  2.3334e-05,  1.7273e-04,
        -1.0711e-03, -6.4372e-05,  3.3245e-04, -6.6429e-05,  5.6256e-06,
         1.7903e-04, -8.4153e-04,  1.0309e-04,  4.0488e-06, -7.9642e-05,
         1.0454e-04, -3.0782e-05, -1.6561e-05,  1.5112e-04, -7.3645e-04,
        -5.3578e-05,  2.3370e-04, -6.5726e-05, -1.1079e-04, -1.4477e-05,
        -9.0374e-04,  2.1853e-05,  6.7072e-05,  2.7665e-05, -8.5144e-04,
        -2.0619e-05, -2.0616e-04,  1.2417e-04,  3.8434e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.5337e-03, -3.2565e-02, -9.5284e-05,  1.1120e-04,  3.1510e-04,
        -5.6695e-06,  2.4661e-04,  1.4970e-05,  3.6329e-05,  3.7142e-05,
         1.3119e-04,  2.9787e-04, -7.5316e-06, -6.1621e-05,  4.3711e-06,
         2.2196e-04,  3.6288e-05,  1.6309e-04, -3.5230e-07, -1.1263e-05,
        -9.3457e-05, -3.5861e-05,  1.8314e-04, -3.9396e-06,  1.7668e-04,
        -4.3914e-05,  2.7205e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2444e-04, -2.8010e-02,  2.5126e-05, -2.1608e-05,  3.3010e-04,
        -6.9053e-05,  2.0841e-04,  5.1244e-05,  3.8689e-05,  1.7499e-06,
         2.4204e-04,  1.7173e-04,  3.1167e-05, -7.8438e-05,  1.2915e-04,
        -3.7519e-05,  6.7185e-05, -3.9903e-05,  1.5727e-04,  3.0546e-06,
         1.0351e-04,  4.2383e-04,  6.6729e-05,  2.9193e-05,  3.1873e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3083e-03, -3.5784e-02, -1.0398e-05,  4.6997e-05,  3.3915e-04,
        -4.5442e-05,  5.7499e-05, -8.5893e-05, -2.8948e-07, -2.5313e-05,
         3.2769e-04, -7.6725e-07,  3.6099e-05,  3.1217e-06,  3.5454e-05,
         6.4194e-05,  4.0841e-05,  9.0950e-06, -9.4851e-06,  1.1927e-04,
         4.2026e-05,  5.6884e-05,  3.4849e-05,  1.0549e-04,  8.5318e-05,
         1.8725e-05,  1.4294e-05, -9.3145e-05,  1.4903e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6422e-03, -5.1376e-03,  3.7923e-05,  8.2843e-05,  2.9240e-05,
        -8.2010e-05,  4.8546e-05, -2.0402e-05, -5.1933e-05, -1.5039e-05,
        -1.0497e-05,  3.6528e-05,  6.3381e-05,  6.7951e-05, -1.2209e-04,
         2.1512e-05, -4.8823e-05, -2.5668e-05,  5.1203e-05, -1.6721e-05,
        -8.5722e-05, -1.7862e-05,  2.6865e-05, -5.3033e-05,  3.1709e-05,
        -2.4564e-05, -6.6459e-05, -1.1014e-04,  2.0611e-06, -1.3683e-04,
        -1.6875e-05, -1.9932e-04,  5.2938e-06,  1.4946e-04, -3.9762e-05,
         2.1173e-04, -8.4225e-05, -7.0057e-05, -9.9091e-05, -1.2393e-05,
        -4.5309e-05,  7.7790e-05,  1.5159e-05, -6.9354e-05, -3.2227e-05,
         1.0790e-05,  4.9069e-05, -1.2400e-05, -1.0549e-04,  2.3387e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1155e-04, -6.1518e-03, -8.6362e-05, -1.5240e-05, -6.2795e-05,
        -4.9425e-05,  9.3132e-05,  3.5980e-06,  9.2620e-06, -1.0740e-04,
        -3.9405e-05,  1.7467e-05, -1.7212e-05,  1.7990e-05, -6.1080e-05,
        -4.6891e-05,  4.7297e-05,  8.9842e-06,  3.2937e-05, -1.2398e-05,
        -4.0245e-05, -9.3319e-05,  2.3491e-05, -8.3956e-05, -4.5503e-05,
         3.3447e-05, -9.9574e-06, -1.2338e-04, -6.1389e-05, -1.9974e-04,
        -8.8644e-05, -3.7327e-05, -3.6466e-06,  8.7976e-05, -1.0372e-04,
         1.1588e-04, -1.0005e-04, -1.1310e-04, -7.6435e-05, -1.7441e-05,
        -2.3475e-05, -2.7125e-05, -7.5293e-05, -4.8589e-05, -1.3775e-05,
        -4.4497e-05,  6.6234e-06, -2.6266e-05, -1.0533e-05,  3.1332e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3534e-05, -6.4677e-03,  7.0056e-05,  6.8646e-05, -7.7156e-05,
        -4.2381e-05,  4.7841e-05,  4.8757e-05, -5.8748e-05, -6.9272e-05,
         2.6988e-06,  3.4196e-05,  4.1770e-06,  3.2584e-05, -5.4666e-05,
         5.4593e-05, -3.4493e-05,  4.0496e-05,  3.4333e-05,  1.9809e-05,
        -1.7251e-04, -6.8702e-05,  1.3773e-04,  7.8346e-06,  7.1040e-05,
        -3.0140e-05, -4.5873e-05, -2.8780e-05,  1.0411e-04, -2.4673e-04,
        -3.6353e-05, -1.9608e-04,  2.5941e-05,  1.0107e-04, -1.7730e-04,
         1.2593e-04, -9.9613e-05, -1.9633e-04, -1.5595e-04, -1.0740e-05,
         2.7878e-05,  2.5976e-05,  6.1380e-05, -6.9100e-05,  9.5990e-05,
        -6.8390e-06,  3.4147e-05,  6.2362e-06, -9.7572e-05,  4.0078e-05,
        -7.2364e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1235e-03, -2.9509e-02, -5.3321e-05, -7.1527e-05,  7.0570e-06,
        -2.2701e-04,  4.8604e-05,  9.6963e-05,  1.2255e-05,  3.9289e-05,
        -1.6969e-04, -1.2047e-04,  2.7863e-04, -2.9661e-05, -1.1441e-05,
         4.4005e-05, -1.7733e-04, -1.1386e-04, -9.3673e-05,  1.5184e-05,
        -6.2311e-05,  7.7220e-05,  1.6259e-04, -1.9892e-05,  6.3002e-05,
         6.5244e-05,  1.4204e-05, -7.2276e-06, -1.7194e-05, -1.9369e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5022e-04, -1.7114e-02,  1.1363e-05, -5.9928e-05, -1.1805e-05,
        -1.8673e-04, -6.6859e-05,  4.2712e-05,  6.5403e-05,  1.4767e-04,
        -1.9722e-04, -1.9252e-05,  1.2107e-04, -4.8602e-06, -9.4517e-05,
         8.1437e-05,  4.1103e-05, -6.3025e-05, -2.6557e-05, -1.0092e-04,
        -9.5399e-06,  8.1048e-05, -3.8456e-05, -9.4313e-05,  1.6281e-05,
         1.7591e-05,  5.8595e-05, -1.1476e-05, -1.9564e-05,  1.0307e-05,
         4.0186e-04, -9.7869e-05,  6.8308e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8364e-04, -2.1889e-02, -5.1795e-05,  2.4626e-05,  7.5185e-05,
        -2.0855e-04, -6.8546e-05, -1.1551e-05, -2.0916e-05,  4.1120e-05,
         5.2227e-05, -3.2058e-05,  4.1712e-05, -5.0694e-05, -4.8080e-05,
        -5.0730e-05, -4.2661e-05, -2.3744e-05,  2.6087e-05, -1.1047e-04,
         6.5655e-05,  9.0282e-06, -5.8309e-06, -5.5423e-06,  7.4103e-05,
         2.9419e-05,  4.1398e-05, -6.0693e-05,  2.9866e-05,  2.9237e-05,
         6.5547e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3904e-03, -3.0757e-02, -3.0675e-04, -8.4946e-05,  7.4814e-05,
         2.3605e-05,  2.4750e-04,  4.2349e-04, -4.6929e-05, -2.2886e-04,
        -8.4784e-05,  1.2411e-05,  5.9456e-04,  1.1467e-04,  3.4008e-04,
        -1.0410e-04, -2.5795e-04, -4.5311e-05,  1.2224e-04,  9.6000e-05,
         7.3813e-05,  4.7819e-05,  6.8280e-05, -1.0787e-04, -2.7611e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2187e-04, -4.1635e-02, -2.5444e-04, -1.1344e-04,  2.9808e-05,
        -1.7694e-04,  2.6966e-04,  3.7629e-04, -8.6328e-05, -1.6558e-04,
         9.5989e-05,  1.1525e-04,  3.5535e-04, -7.7476e-05,  4.3782e-04,
        -3.0414e-04, -7.6309e-05,  1.2290e-05, -2.0292e-04, -2.0706e-04,
        -6.8682e-05,  2.0217e-04, -1.0644e-04, -1.0795e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4752e-03, -2.4827e-02, -1.9580e-04,  4.6852e-05,  1.1037e-04,
        -7.6328e-05,  1.4181e-04,  2.6498e-04, -1.4281e-04, -1.2073e-04,
         5.8234e-06,  6.2047e-05,  2.9437e-04,  1.0474e-04,  1.4407e-04,
        -3.8685e-05,  1.1322e-05,  1.0295e-04,  1.2261e-05, -5.4835e-05,
         3.5987e-05,  2.0730e-05,  4.6898e-05, -4.4068e-05,  1.8998e-04,
         8.0533e-06,  1.0215e-04,  2.6361e-05, -7.3188e-05, -3.6359e-05,
        -2.8476e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.0246e-03,  2.0764e-02,  5.5317e-05,  5.3562e-05, -9.3802e-05,
        -1.2292e-04,  1.0103e-04, -2.3885e-05,  1.8227e-05,  4.6567e-05,
        -7.4970e-05,  2.6659e-05, -2.3321e-05, -5.2401e-05,  5.5248e-05,
        -4.3178e-05, -6.0240e-06, -5.8326e-05, -1.1055e-04,  3.0876e-05,
        -2.1406e-05,  2.4496e-05, -4.2342e-05, -1.2513e-05, -2.2328e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6227e-04,  3.2942e-02, -1.3249e-04, -2.5192e-05,  9.0708e-05,
        -2.7711e-04, -3.9013e-05,  4.6614e-06, -2.7437e-05, -5.9890e-05,
        -5.2554e-05, -8.2692e-05, -1.3881e-05, -2.6979e-05, -6.4184e-05,
        -3.2387e-05, -1.2021e-04, -1.4581e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3517e-05,  3.2066e-02,  6.2667e-05, -5.7454e-05, -2.4642e-05,
        -2.7256e-04, -9.1189e-05, -8.7297e-05,  2.4067e-05, -2.2794e-05,
        -1.2490e-04,  2.5212e-05, -2.8508e-05,  4.8840e-05, -1.0472e-04,
        -6.6800e-05, -2.2219e-05, -1.3853e-04, -4.0587e-06, -1.2515e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7244e-05,  3.2366e-02, -1.1164e-04, -4.5257e-05,  3.2573e-05,
        -6.5469e-05, -4.2290e-05, -2.6017e-05, -3.8215e-06,  4.8100e-05,
        -1.2290e-06, -2.5855e-05, -2.5396e-05,  4.3013e-05, -8.7386e-06,
         8.8820e-05,  8.3796e-05,  5.1616e-07, -3.8594e-05,  1.0394e-05,
         4.0057e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3295e-04,  2.3423e-02, -4.7883e-05,  1.4043e-04,  2.8451e-05,
        -5.0063e-05,  7.6219e-05,  7.1346e-05, -4.4956e-06,  4.6280e-05,
         4.7243e-05,  9.0578e-05,  1.2001e-05,  1.1805e-04,  6.2959e-05,
         8.4847e-06,  7.3159e-05, -3.5189e-05,  2.3056e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8646e-03,  2.2060e-02,  1.2662e-04, -1.8163e-05,  1.1893e-05,
        -5.3953e-05, -1.2268e-05, -6.1187e-05,  1.4587e-04,  1.1280e-04,
        -2.0172e-05,  1.0084e-04,  1.0503e-04,  2.6385e-05,  1.1038e-04,
        -6.7454e-05, -1.5505e-05,  7.0501e-05,  5.2557e-05,  4.7600e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5486e-04,  1.8820e-02,  1.3113e-04,  6.3921e-06, -5.3275e-05,
        -4.7773e-06,  8.4703e-05, -8.0804e-05,  3.6092e-05,  2.4985e-05,
         8.7413e-05,  2.4600e-05,  1.4873e-04,  7.0542e-05,  5.0832e-05,
        -7.1631e-05, -5.1457e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1129e-03,  2.5254e-02, -9.3588e-05,  1.6428e-05, -3.7110e-05,
        -1.3341e-04, -1.7826e-04,  4.4303e-05,  1.9174e-05,  5.3428e-05,
         2.1307e-06,  2.6626e-05, -5.8340e-06,  8.1436e-06,  3.1152e-07,
        -3.9105e-05, -2.2555e-06,  2.6906e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5357e-04,  2.7004e-02,  3.2120e-06,  1.6124e-04, -5.3613e-05,
         5.4959e-05, -1.9291e-05,  7.3075e-05,  1.3539e-04, -1.0523e-04,
         9.8759e-05, -5.7760e-05, -8.6011e-05, -2.6135e-05, -3.4754e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8679e-03,  3.0600e-02,  6.1035e-05,  4.7628e-06, -7.9501e-05,
        -7.4405e-05,  4.8054e-05,  7.0210e-05, -6.3786e-05,  3.1345e-05,
         4.2134e-05, -4.6595e-05,  4.1800e-05, -1.0254e-04,  7.0595e-05,
        -7.0554e-05, -8.1280e-06, -1.7011e-05,  2.1706e-05, -5.2103e-05,
        -1.8550e-05, -8.3477e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9099e-04, -4.9030e-02,  8.2564e-05,  4.3408e-05, -1.7839e-04,
         8.8541e-05, -6.0221e-05, -5.9666e-05,  1.1678e-04,  1.4346e-04,
         4.6461e-05, -7.3430e-05, -7.7372e-05,  3.9057e-05, -4.5329e-05,
        -9.7753e-06, -1.2642e-04,  5.0100e-05, -5.5403e-06, -2.1160e-05,
         6.2552e-05,  1.2970e-05,  3.4708e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0702e-04,  3.7854e-02, -1.7941e-04, -1.0883e-04, -6.1668e-05,
        -1.6009e-04, -1.3954e-04, -8.6802e-05, -1.0206e-04, -1.3170e-04,
        -6.2313e-05,  1.3162e-06, -1.1003e-04,  2.9370e-05, -4.4599e-05,
         6.0734e-05,  3.9125e-05, -6.5862e-05, -6.9603e-05, -1.7328e-04,
        -7.7338e-05,  8.5703e-05,  1.9785e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 4.1766e-04, -9.8564e-02,  1.5931e-04,  5.8786e-05, -2.8719e-05,
         7.0562e-06,  1.7068e-05, -6.2249e-05, -1.7805e-05, -8.1273e-05,
        -7.5272e-05, -1.0218e-04, -3.1448e-05, -2.6257e-05,  1.1765e-04,
         8.4474e-05, -7.9137e-05,  5.7727e-05,  3.5827e-05,  2.9551e-05,
        -8.4208e-05,  1.0507e-04,  4.9589e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7562e-04, -1.4989e-01,  1.9960e-04,  2.6782e-04, -8.7671e-05,
         1.4095e-04,  1.8896e-05,  2.8092e-05,  1.9542e-04,  1.3323e-04,
         1.4894e-04, -5.6158e-05,  2.4818e-04,  6.4866e-05,  1.2618e-04,
         2.9907e-06,  1.4431e-05, -2.3301e-04,  9.7870e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4985e-04, -1.3494e-01,  1.4009e-04, -3.0712e-05,  7.8253e-05,
         8.1152e-05, -4.2114e-05, -1.3619e-04, -1.7205e-05,  5.7497e-05,
        -3.9845e-05, -1.2503e-04,  2.2858e-06, -1.4132e-05,  1.6457e-04,
        -2.0033e-04, -5.9285e-05,  8.0441e-05,  1.8704e-05, -4.1944e-05,
         3.1347e-05,  1.8954e-06, -5.5321e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1047e-04, -1.1584e-01,  6.0672e-05,  3.6835e-05,  5.8336e-06,
        -6.6497e-05, -4.7277e-05,  4.9857e-05,  3.7268e-05, -6.5206e-06,
         2.0570e-04,  2.8680e-05, -7.4183e-05,  9.1552e-05,  6.7102e-06,
         1.2145e-04, -3.0833e-05,  1.0345e-04,  3.9166e-05, -3.7472e-05,
        -3.1041e-05,  5.0486e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0979e-04, -1.1709e-01,  2.1207e-04, -1.4242e-05,  6.5407e-05,
         8.3221e-05, -5.3100e-05, -1.5629e-05,  1.2600e-04,  1.1854e-04,
         9.4254e-05,  4.4023e-05, -5.2667e-05,  1.2215e-04,  1.1726e-04,
         3.3759e-05,  2.2756e-04,  1.5199e-04,  1.7330e-04,  7.1487e-05,
         2.4016e-04,  1.4083e-04, -2.2043e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9501e-04, -1.0550e-01,  1.3417e-04,  1.3597e-04,  4.9941e-06,
         9.2344e-05, -5.9461e-05,  4.4560e-06,  1.8045e-04,  1.3352e-05,
         1.6826e-04, -1.8476e-05, -1.1108e-04,  6.3266e-05,  9.8910e-05,
         5.9649e-05,  3.8692e-05,  8.8266e-05,  1.3162e-04, -1.0658e-05,
         4.7186e-05,  9.2039e-05,  2.2066e-05, -4.3854e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1352e-04, -1.4821e-01,  2.8723e-04, -3.9290e-05,  1.8374e-04,
         1.8742e-04,  3.4943e-04, -8.6409e-06,  2.5543e-04,  1.1759e-04,
         7.5173e-05,  3.5668e-05,  9.6792e-05,  1.2358e-04,  2.3895e-04,
         9.3511e-05,  4.7231e-05,  1.7319e-05,  1.7743e-04,  9.6736e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0069e-04, -1.3753e-01,  1.4018e-04,  1.3311e-05, -6.1428e-05,
         2.3047e-04,  1.7011e-04, -7.0143e-06,  1.4360e-04, -7.2610e-05,
        -2.6969e-05,  5.5502e-05,  7.0323e-05, -2.1910e-05,  1.4710e-05,
        -2.0993e-04, -1.5048e-04, -9.4424e-05,  7.4932e-05,  1.6289e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.6373e-05,  1.1711e-01,  4.2112e-05, -4.8724e-05,  1.2820e-05,
         5.9228e-05, -8.4029e-05, -4.9221e-06,  9.6605e-05,  2.3854e-04,
         5.9933e-05,  1.1946e-04,  1.3351e-05,  1.7910e-04, -3.6780e-05,
         1.3897e-04,  2.0130e-04,  8.5533e-05,  3.8261e-05, -1.1834e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2315e-04, -1.2257e-01,  4.5036e-05,  9.5417e-06,  8.1427e-05,
         1.4374e-04, -4.0334e-05, -4.6736e-05, -3.7017e-05, -3.3534e-05,
         2.8591e-05, -1.8055e-05, -5.7027e-05,  7.1837e-05, -2.1514e-05,
        -6.9575e-05,  2.6596e-05, -1.9147e-05, -1.2284e-04, -3.3496e-05,
        -1.0334e-04,  4.2561e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.0525e-05,  1.4277e-01,  7.8995e-05, -1.6367e-04, -1.0842e-04,
        -2.0618e-05, -3.3134e-05, -4.5250e-05,  6.3333e-06, -1.6731e-04,
         6.0849e-06, -1.6368e-04, -1.9855e-04, -1.0465e-04, -6.1653e-05,
        -1.7734e-04,  2.3738e-05, -1.4202e-06, -2.1180e-05, -2.4592e-07,
        -1.3773e-04, -1.3023e-04,  2.2026e-05, -3.2163e-05, -7.0146e-05,
        -1.5401e-04, -3.9982e-07,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2725e-04, -1.2964e-01,  2.8942e-05,  8.5815e-05,  6.7072e-05,
        -4.3054e-07,  1.3006e-04,  6.0008e-05, -1.7404e-05,  1.3120e-04,
         1.0394e-04,  5.5968e-05,  2.3926e-04,  5.9237e-05,  1.1000e-06,
         5.2719e-05, -9.4651e-06,  1.2809e-04, -9.5352e-05,  3.8288e-06,
         9.4466e-05,  5.1214e-05,  1.7453e-05,  4.0157e-05,  4.5794e-05,
         1.6554e-04,  5.8017e-05, -1.1045e-04], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 3.4110e-04, -9.5186e-02,  3.8447e-05,  3.2790e-05,  8.3212e-05,
         1.0015e-05,  2.4676e-06,  2.0609e-05, -4.3628e-06, -6.8501e-05,
         3.4666e-05, -3.7772e-05, -2.2337e-05,  7.8380e-05,  6.4927e-05,
        -1.8005e-05, -2.8729e-05, -3.3138e-07, -5.0667e-05,  6.5369e-06,
        -2.9391e-05,  4.8042e-06,  2.7701e-05,  2.4706e-05,  1.5023e-05,
        -5.1523e-06,  6.7843e-05,  2.7548e-05,  1.6896e-05,  5.8465e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3870e-04, -1.4418e-01,  1.9482e-04, -3.4052e-05, -1.6381e-04,
        -5.8087e-05,  1.6821e-05,  2.0987e-05,  6.0791e-05,  1.2582e-04,
         1.0115e-04, -2.8729e-05,  7.6750e-05,  3.4215e-05, -1.9063e-04,
         9.3469e-05,  5.1297e-05,  4.8559e-05,  5.9728e-05,  7.6762e-05,
        -8.4174e-05, -8.1451e-05, -4.6515e-05, -1.0577e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4941e-04, -1.4262e-01,  1.6520e-04, -1.6910e-05,  1.6984e-04,
         5.6324e-05,  1.0423e-05,  1.9026e-04,  6.3897e-05,  4.3927e-05,
         1.6175e-04,  5.4868e-05,  4.0614e-05,  5.7132e-05, -1.3414e-04,
         3.4913e-05,  7.0713e-05, -4.2423e-05,  1.2566e-04,  5.3097e-05,
         1.9158e-05,  2.5593e-05, -1.3804e-05,  7.6559e-06,  9.4352e-05,
         8.6290e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6595e-04, -1.9647e-01, -5.8338e-05,  1.1241e-04, -4.3285e-05,
         1.1333e-04, -6.3434e-05, -1.6304e-04, -8.7287e-05,  9.1705e-05,
         4.5646e-05, -1.9598e-04,  4.9237e-05,  1.0098e-05, -7.5126e-05,
         4.3951e-05,  9.8972e-07, -1.1521e-05,  2.2044e-04,  1.3014e-04,
        -6.9869e-05,  1.0350e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7513e-04, -1.3516e-01, -3.5728e-05,  2.3618e-06, -7.3929e-05,
         3.0219e-04, -7.5408e-05,  1.8374e-04,  2.2205e-05,  9.5460e-05,
         3.7906e-05, -2.8885e-05,  1.7827e-05, -1.2810e-05, -1.2343e-04,
        -1.2329e-04, -2.0709e-04, -2.2474e-06,  5.9622e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0759e-04, -1.5574e-01,  2.2400e-04,  7.6806e-05,  4.6892e-05,
         3.5703e-04,  1.0322e-04, -5.5327e-05,  1.0181e-04,  1.2687e-04,
         1.6786e-04, -4.1233e-05, -2.0795e-04,  9.9999e-05, -2.2436e-05,
         7.3790e-05,  7.2023e-05, -1.2520e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3925e-04,  2.6490e-02,  9.8686e-05,  2.3825e-05,  8.1120e-05,
         3.5350e-05,  1.5718e-07, -2.6373e-05,  6.8877e-05,  6.0707e-05,
         7.4695e-06,  8.4107e-05,  3.3570e-05, -4.2881e-05,  1.0134e-04,
         5.2011e-05,  1.6270e-04,  1.9463e-05,  7.3900e-05,  2.9057e-05,
         1.5846e-05,  8.1891e-06,  3.4671e-05,  8.7758e-07,  4.6203e-05,
         4.4757e-05,  1.4737e-05, -7.2097e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0742e-04, -4.4562e-02,  7.9241e-05,  6.8713e-05, -2.1066e-05,
         5.4922e-05,  4.0589e-05,  7.7188e-05,  5.0577e-05,  6.6811e-06,
        -1.1743e-05,  1.3434e-05, -5.7407e-05,  4.7435e-05,  4.1590e-05,
        -3.2391e-05, -6.8064e-05,  3.9756e-05, -8.5304e-05,  5.9014e-05,
        -5.4338e-05,  5.7970e-05, -4.7539e-05, -3.2550e-06,  2.1888e-05,
        -3.8251e-05, -1.8965e-06,  2.8609e-05,  8.6857e-05, -8.9995e-06,
         3.2563e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8420e-04,  5.7489e-02, -9.0066e-06, -1.1160e-04,  1.1831e-05,
        -1.0852e-04, -1.2206e-05, -1.1197e-04, -1.3466e-04, -2.4327e-05,
        -9.0021e-05, -4.7094e-05, -3.6658e-05, -9.9174e-06, -1.2105e-04,
         1.9350e-05,  4.4135e-05, -1.0865e-04, -5.6774e-07, -3.4966e-05,
         3.4041e-06, -6.8328e-05,  4.3281e-07, -4.1835e-06,  1.5030e-05,
         7.6273e-05,  2.8004e-05,  1.9472e-05,  5.5635e-06,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6092e-04, -7.5858e-02, -1.4336e-04,  2.1755e-04,  1.2474e-04,
         1.0329e-04,  2.2964e-05, -1.1853e-05,  1.2009e-06,  1.6109e-04,
        -8.4931e-05,  3.0427e-05, -7.7942e-05,  1.0245e-04, -1.8645e-04,
         2.0743e-04,  1.9597e-06,  8.8000e-05,  7.7851e-05, -1.8988e-05,
         2.4587e-04,  6.4589e-05, -1.4502e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1407e-04, -7.0358e-02, -2.7901e-04,  2.4876e-04,  1.5101e-04,
         2.4160e-05,  1.0558e-05,  7.6581e-05,  1.9392e-05,  1.7147e-04,
         8.8665e-05, -5.5588e-06, -1.3886e-04,  6.3295e-06, -1.9206e-04,
         1.8581e-04,  7.3175e-05,  1.2671e-04,  2.5655e-06,  3.9951e-05,
         1.4613e-04,  9.9993e-06,  1.6043e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1047e-04,  6.1568e-02, -4.1624e-06, -1.9877e-04, -1.0707e-04,
        -5.0419e-05, -9.3886e-05, -1.1171e-04,  6.8229e-06, -1.1265e-04,
         5.2027e-06, -8.0427e-05, -2.9184e-05, -1.4757e-04, -1.5204e-05,
        -1.6362e-04,  6.4712e-05, -1.8030e-04, -3.4105e-05, -2.7321e-05,
        -9.6136e-05,  4.5231e-05,  1.6937e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-5.6281e-04,  4.5024e-02,  7.1863e-05, -7.3696e-05,  2.6045e-04,
         1.2032e-04,  6.2754e-05, -7.5227e-06,  3.0831e-05, -7.9595e-05,
        -1.2889e-04, -3.0494e-05,  1.2610e-05,  1.4712e-04, -4.8539e-05,
         1.2747e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2169e-05,  1.6498e-01,  7.2101e-06, -3.4328e-05,  1.8249e-05,
         4.7118e-06, -3.5135e-05, -1.1732e-04, -1.0198e-05,  1.7091e-04,
         9.3562e-05,  4.2249e-05,  8.0382e-06, -4.6293e-06,  6.4687e-05,
        -4.0827e-05,  1.9654e-05,  1.1714e-06,  4.2995e-05, -8.9346e-06,
         5.8380e-05,  2.4332e-05,  1.6753e-06,  2.5023e-05,  6.1433e-06,
         9.8748e-05,  3.6807e-05,  3.7290e-05,  2.9408e-05,  7.0052e-05,
        -8.4025e-05,  5.7644e-05,  9.1842e-05, -1.0159e-04, -4.7292e-05,
        -5.0918e-05,  1.3069e-05,  8.4667e-05, -1.4117e-04, -4.1751e-05,
         5.0756e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6534e-05, -1.1942e-01,  3.9928e-05, -2.2172e-05,  6.4848e-06,
         1.9060e-05,  6.7382e-05, -1.8141e-05,  4.6438e-05,  8.0089e-06,
         5.7033e-06,  2.4721e-05,  6.3904e-05,  3.4873e-05,  9.7874e-05,
         2.2777e-05,  4.3941e-05,  6.5106e-05,  6.1572e-05,  2.7198e-05,
        -3.6299e-05, -1.7344e-05,  9.3834e-05,  5.6978e-06,  9.8746e-05,
        -8.7792e-05,  4.4271e-05,  4.2222e-05,  3.7335e-05,  5.8448e-05,
         6.5265e-06, -1.0051e-05,  2.5640e-05, -2.5931e-06,  1.1347e-04,
         8.3015e-05, -1.4502e-05,  2.9568e-05, -8.0556e-06,  6.6878e-05,
         1.4248e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9036e-06, -1.6017e-01,  1.1728e-05, -1.6338e-08,  2.3045e-05,
         3.4936e-05, -4.3555e-06, -1.3181e-05,  5.4235e-05, -4.1278e-05,
         1.4332e-05, -5.2000e-06,  1.2610e-05,  1.8375e-05, -8.3016e-05,
         7.1312e-05, -4.9230e-05,  1.7990e-05, -2.1998e-06, -4.1611e-05,
        -2.5677e-05,  6.9392e-05,  1.7370e-05, -2.8153e-05,  5.6767e-05,
         4.5975e-05, -7.3218e-06,  3.5686e-05, -2.7306e-05,  2.6690e-05,
         7.8596e-06,  1.0673e-05,  2.3102e-05, -4.8492e-05, -4.7782e-06,
        -3.0223e-06,  1.4389e-05, -2.7247e-05, -1.8896e-06,  4.1457e-05,
        -8.7017e-06, -2.4585e-06,  3.6148e-05,  4.0095e-05,  3.4456e-05,
        -9.2236e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8387e-03,  1.9274e-01,  4.1564e-05,  6.2277e-05, -2.5450e-04,
         1.7458e-04, -1.1852e-04, -6.8475e-05, -2.8271e-05, -1.4053e-04,
        -9.8896e-05, -4.2931e-05, -8.4617e-05,  6.6926e-06, -1.6090e-04,
        -2.7656e-04,  2.5716e-05, -5.9415e-05,  1.1590e-05, -5.6898e-05,
         2.9866e-05, -1.2491e-04, -1.1735e-04, -4.7304e-05, -1.0959e-04,
         8.3482e-05,  1.5640e-06,  8.6871e-06, -1.1464e-05, -6.5417e-05,
        -1.1414e-04,  6.0925e-05, -1.1992e-05,  1.7738e-06, -6.9216e-05,
         2.4824e-07, -1.5907e-04,  4.0801e-05,  2.7420e-05, -9.2936e-05,
        -1.5571e-04, -9.3650e-05, -1.3353e-05,  3.3001e-05,  7.4426e-07,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2375e-04, -1.4374e-01, -8.9218e-05,  2.2977e-05,  3.6899e-06,
        -1.8028e-05,  5.6778e-05,  6.3892e-05,  3.8974e-05,  2.6338e-05,
         6.2980e-05,  3.0786e-05,  3.0568e-05,  4.8211e-05,  4.8445e-05,
         3.7872e-05, -2.2384e-05,  1.9464e-05, -1.1433e-05, -3.8258e-06,
         3.3029e-05, -4.1738e-05,  1.1716e-04, -2.7381e-05,  2.7396e-05,
        -5.6752e-05, -6.4593e-06,  8.5821e-06,  1.8074e-05, -6.2270e-05,
        -1.9370e-05, -3.4174e-05, -5.8346e-05,  4.7510e-06,  2.1062e-05,
        -3.7389e-05,  1.6386e-05, -9.4272e-06, -8.0172e-06, -3.4881e-05,
         2.9020e-05,  4.3684e-05, -8.7148e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4781e-04, -1.4582e-01,  2.3635e-05, -9.8984e-05, -2.3061e-05,
        -4.4328e-05, -7.9542e-05, -3.3244e-05, -2.1819e-05, -5.7750e-05,
        -2.5820e-05, -3.7012e-05, -3.9666e-05, -6.4036e-05, -4.5351e-05,
         1.0327e-04, -9.7410e-05, -9.9953e-05, -9.1274e-07, -5.3998e-05,
         7.2943e-05, -4.8130e-05,  3.1771e-05, -7.3061e-05, -2.1199e-05,
         1.0352e-05,  3.3567e-05, -5.2845e-05, -4.6644e-05,  5.4700e-05,
        -1.7759e-05, -3.9474e-05,  5.5841e-05, -2.1776e-05, -4.9998e-05,
        -3.5591e-05, -9.5006e-05, -5.6902e-05, -1.0937e-04, -2.8104e-05,
         5.7892e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0977e-03, -1.2764e-01,  3.7000e-05, -3.5434e-06,  4.6295e-06,
         7.2727e-05,  3.9632e-05,  5.3416e-06,  3.8638e-06,  1.2063e-05,
         9.5181e-06, -5.8066e-05, -1.7616e-06,  2.2381e-05, -1.4726e-05,
         8.5722e-05,  5.0105e-05,  9.2682e-05,  6.3064e-05, -2.6479e-05,
        -8.3797e-06,  1.7852e-05,  6.7127e-05,  1.3172e-05,  3.7515e-05,
        -5.4309e-05,  1.7524e-05,  6.1345e-05,  2.8239e-05,  2.2695e-05,
         1.4057e-05, -5.3119e-05,  8.2536e-05,  4.3271e-05,  7.6776e-06,
        -9.3948e-06,  4.8336e-06, -1.8120e-05, -1.1545e-05,  1.9104e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5066e-04, -9.0834e-02, -2.5288e-05, -4.4318e-05,  8.0411e-06,
        -6.4610e-05,  9.2879e-05, -5.6655e-05, -7.0252e-06,  2.5178e-05,
        -1.5844e-05,  1.4161e-05,  4.3791e-05, -1.8026e-05,  5.0888e-08,
         2.4429e-05, -1.7711e-05,  1.0582e-04, -8.3512e-07, -3.1902e-06,
        -2.1022e-05, -1.6904e-05,  1.0328e-04, -3.4566e-06,  6.3787e-06,
        -5.6025e-05,  1.9166e-05, -3.0161e-05, -2.2510e-06, -3.6633e-05,
        -1.8482e-06, -3.3426e-05,  5.6623e-05, -2.8345e-05,  4.8407e-06,
        -2.3776e-05,  1.6992e-05, -6.1652e-05, -4.7936e-05, -2.9155e-05,
        -3.4044e-05,  3.5377e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5478e-04, -1.7715e-01,  3.8283e-05, -6.1677e-05,  6.7206e-05,
        -3.2130e-05,  5.5185e-05, -4.7276e-06, -3.3930e-05, -7.2659e-05,
        -3.1577e-05,  4.8729e-05,  8.0967e-05,  2.0282e-05,  2.5955e-05,
         7.4807e-05, -2.0330e-05,  1.0427e-05,  4.8514e-05,  2.7150e-05,
         3.3987e-05, -2.8469e-05,  6.1041e-05,  5.1559e-05,  6.0623e-05,
        -1.9058e-05,  2.6065e-05,  3.8080e-05,  9.9638e-06,  6.4660e-05,
        -9.7005e-06, -8.5962e-05,  3.9022e-05, -5.7020e-05, -4.2215e-05,
        -7.8530e-06, -4.1720e-05, -7.6172e-05, -5.8387e-05, -3.6000e-05,
        -6.2433e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4289e-04, -1.4875e-01,  8.9207e-05,  2.4845e-06,  6.7655e-05,
        -7.6879e-05,  7.0437e-05,  5.5718e-05, -9.8204e-06,  5.4798e-05,
        -5.2753e-05, -2.9102e-05,  5.6553e-05, -1.3008e-05, -4.3865e-06,
         7.8140e-05, -1.2795e-05, -2.0819e-05,  7.3383e-05,  8.7162e-05,
        -2.6962e-05,  4.4018e-05,  5.9180e-05, -1.7949e-05,  7.6956e-05,
        -4.9353e-06,  1.6656e-05,  5.4204e-05,  3.9625e-05, -5.2793e-05,
         1.4138e-04,  1.5748e-06,  2.1023e-05,  2.0426e-05,  9.4410e-05,
         2.4990e-05,  7.0454e-06, -3.5866e-05, -1.4147e-05, -3.1849e-06,
        -1.7541e-05,  9.6614e-06,  6.7249e-05,  7.2822e-06,  3.2513e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0617e-04, -1.3504e-01,  1.0650e-05, -4.2749e-07,  2.7036e-05,
         6.7466e-05,  7.0665e-05,  7.5073e-06, -5.0147e-06,  3.7728e-05,
         4.1903e-05,  6.3138e-05, -2.6819e-05,  1.2690e-05, -5.5735e-05,
         2.8066e-05,  6.3903e-05,  7.3871e-06, -8.4888e-07,  3.6422e-05,
         8.4296e-06,  1.0288e-05,  2.5059e-05,  2.1928e-05, -1.9335e-05,
        -4.8575e-05,  6.2629e-05, -3.7284e-06, -5.9147e-07, -3.2621e-05,
         3.6269e-05, -7.5041e-05, -3.3204e-05,  4.0119e-05,  1.4226e-05,
         3.9068e-05, -1.7760e-05,  1.4897e-05,  3.0675e-05, -2.7812e-05,
         5.3114e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-6.3830e-04, -8.7994e-02, -3.6254e-05, -4.7935e-06,  1.6868e-05,
        -2.6032e-05,  6.0452e-06, -6.4205e-05, -9.3601e-05,  5.5090e-06,
        -2.1870e-05, -4.2710e-06,  2.6392e-05, -6.0487e-05, -2.6888e-05,
        -8.3655e-05, -2.8793e-05, -6.4088e-05, -2.6872e-05, -2.2692e-06,
        -3.6996e-05, -2.9377e-05, -4.8037e-05, -2.3224e-05, -3.1802e-05,
         2.6441e-05, -3.8610e-05, -4.3899e-05,  3.2431e-05,  2.1608e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.8488e-05, -1.2927e-01,  2.7499e-05,  6.1479e-06, -6.2560e-05,
        -1.2092e-05,  7.1486e-06, -6.1273e-05, -4.1390e-05, -5.8683e-05,
         2.7864e-06,  2.1965e-04, -4.3692e-05, -6.3512e-05, -7.4375e-05,
         4.4055e-05, -7.3423e-06, -1.9778e-05,  7.3897e-06, -2.4475e-06,
         1.5951e-05, -3.0341e-05, -4.8340e-05,  3.1997e-05,  2.8091e-05,
        -1.8593e-05, -6.5514e-05, -2.9847e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9018e-04,  1.9185e-01, -9.8458e-05, -8.1943e-05,  6.8117e-05,
        -4.6955e-05, -1.0848e-04, -1.2293e-04,  4.2634e-05, -1.2390e-04,
         1.6383e-05,  6.4867e-05, -8.5943e-06, -7.5999e-05,  6.2113e-05,
         9.2920e-05, -9.9117e-06,  3.5781e-05, -1.0435e-04, -4.6420e-05,
        -2.0327e-05, -2.1770e-04, -6.5928e-05, -9.2739e-05, -3.5189e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8772e-04,  1.9772e-01,  9.9112e-05, -1.2590e-04, -7.8711e-05,
         8.2222e-05, -8.4560e-05,  3.7054e-05, -2.0618e-04,  2.1997e-05,
         1.8036e-04, -1.2451e-04, -7.9231e-05, -2.6585e-05,  1.0491e-04,
         5.1097e-05,  8.2202e-05, -2.8019e-06,  7.8034e-05, -8.8699e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3707e-05,  1.2567e-01, -5.2596e-05, -1.0527e-04,  8.8020e-05,
        -6.4387e-05, -5.4578e-05, -1.2705e-04, -3.0899e-05,  5.5271e-06,
        -9.5028e-05, -5.0617e-05,  1.0656e-05, -5.8881e-05,  1.2544e-04,
         6.4786e-05,  1.0292e-04, -5.4544e-05,  7.9670e-06,  9.0862e-06,
        -1.4322e-04, -3.7138e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2484e-04,  3.9833e-02, -2.6887e-06,  3.2193e-05, -1.0746e-04,
        -5.8286e-05, -5.8790e-05, -9.6096e-05, -7.7397e-06,  3.5836e-05,
        -4.3532e-06,  4.1034e-06, -3.8426e-05, -5.4508e-05,  9.5400e-06,
        -1.4075e-05, -2.6999e-05,  3.5769e-05, -7.1816e-05,  3.3481e-05,
        -8.5799e-06, -2.1259e-05,  1.3765e-05,  5.0398e-05, -9.9241e-05,
        -6.0664e-05, -3.5047e-05,  4.8812e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2934e-04,  3.4026e-02,  2.4927e-05,  1.4237e-05,  2.3518e-05,
         1.1846e-05,  4.7388e-06, -2.1257e-05,  5.6605e-05,  2.9748e-05,
        -1.1357e-05, -1.7612e-05, -2.8908e-06, -2.7702e-06,  8.7971e-05,
         1.0123e-05, -6.1489e-06,  4.6831e-05,  1.8702e-05,  3.3130e-05,
         1.4655e-05,  4.0259e-05,  7.2353e-05,  3.4675e-05,  3.0811e-06,
         1.1910e-05,  2.6209e-05,  1.6011e-05,  3.9432e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0880e-04,  3.5569e-02,  3.7150e-05, -1.9524e-05,  1.2385e-05,
        -4.7545e-05, -2.3014e-05,  4.1353e-07,  3.6770e-06, -9.1371e-06,
         7.6067e-06, -2.0305e-05, -2.9475e-05, -1.7010e-05, -1.7147e-05,
        -9.0668e-06,  1.8648e-05, -3.3879e-05,  1.0616e-05, -1.6659e-05,
        -5.8214e-05, -1.9055e-05, -1.3857e-05,  1.5379e-05, -1.1127e-06,
         6.1651e-05, -2.3709e-05, -2.9163e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1639e-04, -1.8088e-01,  1.3989e-04, -8.1952e-05,  3.5914e-05,
         7.0147e-05,  8.4095e-05, -3.7951e-05, -2.7167e-05,  1.1349e-04,
        -7.4543e-05, -8.7648e-05,  6.0375e-05,  7.1772e-06, -2.8382e-05,
         1.4286e-04,  1.1348e-04,  1.2185e-05, -6.9277e-05, -3.8141e-05,
        -8.2774e-05, -1.0275e-04, -7.2565e-05, -6.7178e-05, -5.3850e-05,
        -1.7062e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7422e-04,  1.5362e-01, -1.8201e-04,  1.2353e-04,  1.4527e-05,
         1.9845e-04,  1.9569e-04,  7.5566e-05,  1.0299e-04, -9.3696e-05,
        -8.6445e-06, -6.5734e-06,  3.3846e-05,  1.6765e-04, -4.5404e-04,
         1.1058e-04, -9.5385e-05, -8.7596e-05,  1.4850e-05,  6.3839e-05,
        -9.6592e-05, -1.7542e-04, -2.2127e-05,  2.5776e-05, -1.5505e-04,
        -4.2974e-05,  1.1220e-05, -6.9259e-05,  2.3286e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4868e-04, -1.0005e-01,  9.4340e-06, -3.4049e-05, -2.2185e-05,
         4.3391e-05, -1.7227e-05, -2.9023e-05,  6.5285e-07,  1.9248e-05,
        -1.9438e-05,  6.9833e-06,  3.5240e-05, -5.0459e-05, -1.0114e-05,
        -9.8866e-06,  1.3867e-06, -1.2298e-05,  3.1405e-05, -5.7412e-05,
         2.5847e-05,  6.1545e-06,  1.7037e-05,  2.3724e-05, -5.1154e-05,
         5.6720e-06,  3.0943e-05, -1.6464e-06,  4.4415e-05,  4.8849e-05,
         2.9205e-05, -5.0185e-05, -1.0156e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1960e-04,  1.0520e-01, -4.9851e-05, -6.3722e-05,  1.5454e-04,
        -2.0371e-04, -1.0289e-04, -8.9873e-05, -1.7475e-04, -3.7864e-05,
        -5.0488e-05,  1.8002e-05,  5.0018e-05, -6.2150e-05, -1.3729e-05,
        -2.7987e-05, -5.0132e-05, -5.4787e-05, -2.9762e-05, -8.7098e-05,
         9.9336e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 6.3855e-04, -1.6045e-01,  1.3162e-05,  4.5023e-05,  8.4940e-05,
        -5.6498e-05,  8.5617e-05, -2.8119e-05,  3.6678e-05,  1.1136e-05,
        -1.2376e-05, -4.7440e-05,  1.0909e-05,  1.5451e-05, -3.9204e-05,
        -7.9848e-05,  2.2103e-05,  9.3328e-05, -1.8612e-05,  1.6421e-05,
         7.2159e-05,  4.6731e-05, -9.2436e-05,  4.8555e-05, -9.7273e-05,
         1.8529e-05,  4.9759e-05,  7.4062e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1878e-04, -1.6975e-01,  6.2971e-05,  5.6949e-05,  6.6723e-05,
        -3.6726e-05, -5.5122e-05,  4.0018e-05,  5.6898e-05, -6.4133e-05,
         4.8208e-05, -6.3426e-06,  9.0554e-05,  1.1316e-04,  6.3026e-05,
        -4.9508e-05,  2.6240e-06,  3.9036e-05,  3.7004e-06, -2.9937e-05,
         8.9808e-05,  1.2586e-05,  4.7912e-05,  2.7247e-06,  8.7502e-05,
        -1.9945e-05,  6.9109e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7317e-04, -1.4776e-01,  3.3831e-05, -6.0534e-05, -3.2419e-05,
        -9.7343e-06, -5.5438e-05,  7.9758e-05, -5.4047e-05,  3.5169e-05,
         4.8854e-05, -2.4913e-06,  1.3208e-04, -3.8817e-06, -6.6753e-05,
        -4.8972e-05,  7.2037e-05, -2.9773e-05,  7.6448e-06,  1.2119e-04,
        -5.3544e-05,  1.2512e-04,  5.7247e-06,  1.5482e-04,  3.5166e-05,
        -8.3735e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3302e-04, -1.3725e-01, -7.5104e-06,  6.4237e-05,  5.3074e-05,
        -3.0666e-05,  3.5115e-05,  1.1110e-06,  4.3008e-05,  6.4046e-05,
         3.7775e-05,  7.6288e-05,  6.5344e-05,  6.4138e-05,  8.6144e-06,
        -9.2602e-06,  8.9277e-05,  4.2180e-05,  5.0555e-05, -5.1379e-06,
         2.8996e-06,  5.5618e-05,  5.3545e-05,  1.8398e-05,  4.3252e-05,
         3.0909e-05, -3.3495e-05, -4.3511e-05,  1.2533e-05,  1.4118e-05,
        -5.0202e-05, -3.5848e-05, -3.4718e-05, -2.2882e-05, -4.1478e-05,
        -5.4102e-05, -4.6881e-05, -2.8480e-05, -4.0803e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9731e-04, -1.4702e-01, -1.0042e-04, -4.9469e-05,  2.3143e-05,
         2.2818e-05,  3.2054e-05,  5.7141e-05,  2.5995e-06,  3.5475e-06,
         1.0809e-05,  2.7682e-05,  6.6836e-05,  4.7505e-05, -3.0566e-05,
         4.6883e-05,  6.4388e-05,  2.2157e-05,  2.7777e-05,  6.5455e-05,
         7.1680e-06,  1.7966e-05,  4.5125e-05,  6.5888e-05,  6.5014e-05,
        -2.7438e-05,  1.4069e-04,  3.6748e-05, -6.1804e-06, -2.0375e-05,
         5.3767e-05, -4.1609e-05,  8.8110e-05,  2.3587e-05,  4.6772e-05,
         5.4357e-05, -4.3495e-05, -2.0677e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1293e-04, -1.7936e-01,  9.0036e-06,  1.4769e-05,  2.9053e-05,
         5.3415e-05,  5.1573e-05,  1.6857e-05, -6.1396e-05, -1.3255e-05,
         8.4431e-05, -6.6543e-07,  2.7484e-05,  4.3225e-05, -4.1167e-05,
        -2.1476e-05,  1.8231e-05,  3.2380e-05, -2.7873e-05,  4.1825e-05,
        -4.3987e-05,  8.5491e-05,  2.3823e-05, -4.3153e-07, -3.0383e-05,
        -1.5926e-05, -1.2485e-05, -2.4807e-05, -1.0843e-04,  5.4855e-05,
        -2.4587e-05, -2.0124e-05,  1.5218e-05, -4.1700e-05,  5.7421e-05,
         3.7393e-05, -2.2072e-05, -2.6360e-05,  5.0137e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9130e-04, -1.3491e-01,  2.7548e-05,  8.8580e-05, -2.9138e-05,
        -1.1480e-06,  3.3210e-05, -2.0299e-05,  7.7805e-05,  4.1388e-05,
         7.9295e-06,  1.0890e-05,  4.3866e-05, -4.4530e-05,  6.6673e-05,
         5.7511e-05,  7.3658e-05,  8.2468e-05,  1.4428e-06,  8.9259e-05,
        -2.3687e-05,  6.1102e-05,  6.3096e-06,  2.2047e-05, -4.9087e-05,
        -2.3646e-05, -5.4520e-06, -3.1176e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9636e-05, -1.7104e-01,  7.1523e-05, -4.4822e-05,  1.1832e-04,
        -1.0312e-06,  7.2435e-05,  6.3307e-05,  1.2559e-05, -4.5246e-05,
         1.3302e-05,  2.2649e-05,  4.5892e-05,  3.9340e-05,  1.7802e-05,
         1.5128e-04, -2.2235e-05,  1.2286e-04, -1.2316e-05, -2.7936e-06,
        -1.4263e-05,  4.0220e-05, -2.0239e-05, -4.3829e-05,  2.1342e-06,
         2.9034e-05, -2.2532e-06, -4.4198e-05, -3.9538e-05, -1.4862e-05,
        -3.2993e-06, -7.0328e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8192e-04, -1.5766e-01,  1.8439e-05, -1.6958e-05, -4.2716e-05,
        -5.6699e-05,  2.5331e-05,  1.8845e-05,  2.0492e-05, -3.2051e-05,
        -1.5011e-05,  3.0889e-06, -1.6960e-05, -2.7646e-05, -4.2442e-06,
         4.2606e-05,  5.5731e-05,  8.4924e-05,  4.1790e-05,  2.8683e-05,
        -2.4479e-05,  3.8461e-05,  2.4592e-05, -4.7132e-06,  9.4059e-05,
         3.9584e-05, -4.8338e-05,  3.0424e-05,  2.0507e-05, -9.0330e-05,
        -9.3759e-06, -2.2882e-05,  4.5873e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4790e-04, -2.5813e-01, -6.7606e-05,  1.0916e-04,  1.9195e-04,
        -9.1404e-05,  1.4813e-04, -4.0582e-05, -1.4144e-05,  9.5852e-05,
         1.5091e-04,  8.6730e-06,  6.2108e-05,  1.3816e-04,  1.0107e-04,
         1.3758e-04,  1.2706e-04, -3.5255e-05,  1.5132e-04, -9.8651e-05,
        -4.3595e-05,  6.7513e-05,  1.0121e-04, -2.7493e-05,  4.1201e-05,
         1.5321e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.2335e-04, -1.9491e-01,  7.6741e-05, -6.2922e-05,  4.0579e-07,
        -1.8668e-05,  9.0871e-05, -2.1805e-05, -1.4433e-04, -5.1135e-05,
        -3.6008e-05, -4.1805e-05,  9.7714e-05,  7.1643e-05,  3.0700e-05,
         4.4612e-05, -7.4135e-05,  4.3915e-05,  1.3081e-04, -5.6745e-05,
         8.8205e-05,  1.3531e-04,  6.0989e-07, -3.0750e-05,  2.7410e-06,
        -5.1055e-05, -1.1546e-04, -6.2873e-05,  7.7795e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8476e-04, -1.3589e-01, -1.7075e-05,  3.2438e-05,  7.6503e-06,
        -6.0879e-06,  1.4183e-05, -5.1107e-05,  7.3389e-07, -3.6979e-05,
         3.6239e-05,  3.0057e-05,  1.4356e-05,  4.2014e-05, -5.8859e-06,
        -4.3094e-05, -6.7141e-05,  9.3936e-05, -1.9997e-05,  7.9490e-06,
         6.0066e-05,  7.2362e-05, -2.7673e-05, -5.2097e-06, -1.7771e-05,
        -1.0271e-05, -2.2210e-05,  1.0561e-04,  6.0891e-05,  9.0504e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #450: [tensor([-7.4105e-04,  6.8642e-02,  5.6449e-06,  1.0505e-04,  8.2437e-05,
         1.1599e-05, -3.5375e-05,  6.0112e-05,  8.7634e-05,  9.3567e-05,
        -6.6071e-05,  6.2485e-05,  5.0385e-06,  9.5549e-05, -1.0460e-05,
         1.2113e-05,  1.1232e-05, -1.8410e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3447e-04, -1.4486e-01,  8.7045e-05,  1.0408e-04,  1.4812e-04,
         4.4535e-06, -2.2415e-04,  2.2138e-04,  1.0695e-04,  3.0728e-05,
        -1.1890e-04,  1.3751e-04,  4.5593e-05,  9.5472e-06, -4.9167e-05,
         4.5344e-04,  6.7090e-05,  4.8080e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2495e-06, -2.9791e-01,  8.4331e-06, -1.7990e-05, -1.5143e-04,
         1.1003e-04,  6.2694e-05, -1.1912e-04,  1.4265e-04,  2.4561e-04,
        -1.8550e-04,  1.9450e-05,  2.7445e-04,  3.3569e-05,  1.3286e-04,
        -4.1179e-05, -9.4545e-05, -4.5545e-05, -5.2249e-05, -4.4459e-05,
         1.6931e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1305e-05,  1.9869e-01, -2.7505e-05,  8.4149e-05,  6.7473e-05,
         1.8281e-05, -4.9589e-05,  1.2648e-04,  7.7913e-05,  2.9211e-05,
         3.0499e-05,  5.3372e-05,  8.8351e-05,  1.1740e-05,  1.1175e-05,
         6.0037e-05,  1.9309e-04,  7.1256e-05, -3.4098e-06,  5.7177e-05,
        -2.2348e-07,  1.1520e-04,  1.6327e-05,  1.0193e-04,  8.1960e-06,
         7.0549e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9703e-04, -1.2279e-01, -6.2525e-05, -6.9545e-05, -8.0450e-05,
        -1.0536e-06,  3.0157e-05, -4.3422e-05,  1.3523e-05, -6.7844e-05,
        -5.5727e-05, -9.7286e-05, -7.0614e-05, -5.3578e-05, -2.7153e-05,
         3.4900e-05, -5.9510e-05,  2.2867e-05,  3.9498e-05,  6.6956e-05,
        -1.5868e-05,  4.7602e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5617e-04, -1.7981e-01,  5.1171e-05,  6.8389e-05,  8.3358e-05,
        -7.6625e-05,  5.0891e-05,  1.4348e-04, -6.0174e-05,  7.4258e-06,
         1.9723e-05, -4.4758e-06, -2.0932e-05, -4.6364e-05,  5.1498e-06,
        -3.0303e-05, -1.1280e-04, -1.3085e-05, -1.3439e-04, -9.5669e-05,
        -8.6478e-05, -8.8528e-05, -3.0156e-06, -2.7923e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3409e-04, -1.6144e-01,  6.7700e-05,  4.6226e-05, -1.1537e-04,
        -7.1419e-05,  2.9375e-05,  1.5067e-05,  4.0288e-05,  1.3850e-04,
         3.5206e-05, -7.3518e-05, -5.6484e-05, -4.5347e-05,  7.9983e-05,
         4.5234e-05,  6.5009e-06, -2.1806e-05, -1.0166e-04, -6.6142e-06,
        -5.0897e-05, -1.6211e-06, -1.1358e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2018e-04, -2.3043e-01, -4.5062e-06,  6.7594e-05, -3.3249e-06,
        -1.2604e-04, -1.3379e-04, -1.6736e-05, -5.4211e-05, -4.4816e-05,
        -5.5464e-05, -1.2361e-05, -1.0910e-04,  7.9857e-06, -1.0584e-04,
         1.1449e-05, -3.3388e-05, -1.9907e-04,  5.4436e-06, -1.1159e-04,
        -7.5239e-05, -3.8210e-05, -1.3951e-04, -1.5326e-05,  9.4744e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3189e-03,  5.9761e-02, -2.9572e-05,  7.8679e-05,  6.3922e-05,
        -6.0296e-05, -1.1476e-04,  1.1133e-05,  7.2023e-05,  8.1009e-06,
        -8.9326e-06, -4.8244e-05,  9.9256e-06,  7.5257e-05, -5.0561e-05,
         6.7863e-05, -5.0896e-05, -9.0355e-06,  1.0936e-04, -9.4119e-06,
        -5.8907e-05, -3.3189e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8317e-04,  7.2199e-02, -7.6458e-05, -8.5692e-05,  1.8205e-05,
        -1.3221e-05, -1.2920e-04,  9.2220e-06, -7.3903e-06, -6.1707e-05,
        -8.7211e-05, -4.7013e-05,  3.2903e-05,  8.6086e-05,  9.7290e-06,
         7.4271e-05, -5.7458e-05,  1.2469e-05, -4.9578e-05, -1.0861e-05,
         1.4926e-04, -5.3457e-05, -3.9799e-05, -1.3195e-04, -3.2560e-05,
         7.0379e-05,  7.7083e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5076e-04,  8.2970e-02,  3.3991e-05, -1.8013e-05,  6.2899e-05,
         1.9253e-05,  5.9592e-06, -4.8174e-05,  1.6808e-05, -1.4452e-04,
        -2.3776e-05, -6.6480e-05, -6.9747e-05,  5.7641e-05,  6.6150e-05,
         9.0077e-05, -2.6771e-05, -7.2909e-05,  1.0570e-05, -3.1313e-05,
         2.8780e-05,  1.8573e-05, -1.9320e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6726e-04, -1.2349e-01, -7.7645e-05, -2.0755e-04,  1.0766e-05,
        -2.9536e-05, -1.3665e-04, -1.1756e-05,  7.7643e-06,  3.5711e-06,
         6.0284e-06,  1.5815e-04,  2.1272e-04,  1.1252e-04,  2.3340e-05,
        -2.9667e-04, -2.0233e-04, -3.6677e-05, -1.3919e-04, -8.3084e-05,
         7.6147e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 3.0790e-04,  4.6429e-02, -1.4125e-04, -5.2121e-05,  9.5174e-05,
        -3.9231e-05, -5.0855e-05,  4.8247e-05, -6.3873e-05, -1.4261e-04,
        -6.6349e-05,  2.7127e-05, -8.8028e-05,  5.9698e-05, -5.5294e-05,
        -4.8949e-05,  4.7372e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7856e-04,  3.7239e-02, -1.0072e-04,  2.8259e-06, -3.0792e-05,
        -4.4888e-05, -3.1596e-05, -6.3977e-05, -5.0364e-05, -1.0263e-04,
         3.4308e-05, -1.2967e-05, -4.0139e-05,  5.6053e-05, -8.7707e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1022e-04,  2.4389e-02, -5.7554e-05, -4.0529e-05,  2.6430e-05,
        -6.3810e-06, -3.8454e-05,  6.3075e-06,  4.9505e-05,  3.7630e-05,
        -2.8447e-05, -7.4010e-06,  7.1698e-05,  1.4840e-05,  2.2523e-05,
        -2.0725e-05,  5.6416e-05,  6.3903e-05,  6.8261e-06, -2.0686e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8377e-04,  3.7625e-02,  4.6226e-06, -1.2381e-04, -4.2221e-05,
        -3.6217e-05, -4.3122e-05, -3.7369e-05,  6.8854e-06,  5.6693e-05,
        -1.7645e-05, -4.0946e-05, -1.4520e-05,  1.0837e-04, -4.1189e-05,
         9.1849e-06,  3.9773e-05,  2.5066e-05,  9.7176e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1825e-04,  3.4728e-02,  7.3787e-05, -5.7300e-05,  8.5369e-06,
        -2.7836e-05, -8.1333e-06, -2.8985e-05, -1.4911e-05, -3.2049e-05,
         1.1835e-04,  2.8674e-05, -1.7153e-05,  3.9310e-05,  2.6754e-06,
        -1.3684e-05,  5.8817e-05,  3.7699e-05,  8.3244e-05,  4.0046e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4424e-04,  6.6038e-02, -5.4449e-05, -1.3414e-05, -1.3773e-04,
        -3.0875e-05, -6.1825e-05, -7.2798e-05,  7.6918e-05,  6.7293e-05,
        -9.2008e-06, -1.7161e-05, -9.9229e-05, -8.3298e-06,  2.5789e-05,
        -1.2344e-04, -3.2890e-05, -3.4144e-05, -8.8476e-05, -1.2542e-05,
         3.6754e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4758e-04,  7.3435e-02, -2.9059e-06, -1.1896e-04, -1.3371e-04,
         2.6189e-06, -6.8282e-05,  8.9344e-07, -6.5646e-05,  1.9098e-05,
         2.7976e-05, -4.1635e-05, -7.1806e-05, -1.1148e-04, -6.3765e-05,
        -2.2892e-06, -4.8368e-05, -2.5138e-05, -7.7546e-05,  2.8649e-05,
        -1.6476e-04,  2.5641e-05,  4.8758e-05,  1.4465e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8417e-04,  5.7818e-02, -1.5538e-04, -1.0469e-04, -9.9343e-05,
         1.4734e-05,  2.7180e-05,  2.7613e-05,  5.4599e-05,  2.8887e-05,
        -3.1684e-05, -9.6716e-05, -2.7473e-05, -7.2572e-05,  1.7412e-05,
        -3.6486e-05,  1.7273e-05,  2.8756e-05, -6.5749e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6126e-04,  3.9673e-02,  3.1921e-06, -7.1284e-05,  2.0415e-05,
         1.9187e-05, -9.9052e-05,  5.6933e-05,  3.9561e-06, -1.8857e-05,
         3.1325e-05, -4.0970e-05,  2.3085e-05, -3.3826e-05,  7.0470e-06,
        -4.0323e-05, -1.3889e-05, -1.8291e-05,  5.7185e-06, -5.6390e-05,
        -5.5247e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7596e-04,  2.8865e-02, -6.1389e-05, -1.9282e-05,  1.4851e-05,
        -6.5918e-05, -2.0818e-05, -5.0533e-07, -7.8662e-05,  4.0678e-05,
        -2.7313e-06, -1.2086e-05,  1.9900e-05, -3.5237e-05,  7.0349e-05,
         1.0249e-06,  4.3507e-05,  2.5999e-05,  4.8342e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0496e-04,  4.0519e-02, -8.8777e-06,  2.3006e-05, -2.6306e-05,
        -9.2323e-06,  3.5047e-05,  3.7140e-06, -6.7028e-05,  3.3255e-05,
         3.9935e-05, -5.5333e-05,  5.5194e-05, -4.6221e-05,  1.4315e-04,
         2.9660e-05, -2.1054e-05,  3.8867e-05,  3.5733e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6198e-04, -2.0343e-01,  2.0302e-05, -7.1497e-05,  1.4069e-04,
         5.5844e-05,  2.0907e-04, -3.5335e-05, -1.1449e-05, -8.4228e-05,
         5.0903e-05,  1.9425e-04,  6.4902e-05,  4.0649e-05, -7.7197e-05,
         2.0964e-05, -7.3886e-06, -8.7960e-05, -7.6668e-07,  1.3433e-04,
         7.2459e-05,  1.5947e-04,  5.5654e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #550: [tensor([-8.8149e-04,  2.3454e-01, -3.6904e-05, -6.7053e-05,  2.2442e-05,
         5.5173e-05, -9.9871e-05,  7.7579e-06,  8.4307e-05, -1.6080e-04,
        -1.4169e-04,  7.0020e-05, -8.3422e-05, -1.5667e-04,  8.2121e-06,
         1.8844e-05,  3.8393e-05, -4.5687e-05, -4.6057e-05, -9.5733e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2824e-05,  2.4913e-01, -9.1689e-05,  3.3047e-05,  1.6995e-04,
        -4.3581e-05, -4.1607e-05,  1.2154e-04, -1.7889e-04,  2.2425e-05,
         4.9582e-05, -4.1715e-05, -5.0414e-05, -1.0115e-04, -5.3980e-05,
         1.1645e-05,  1.3238e-04,  6.3043e-05,  2.5447e-05, -1.3973e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1163e-04, -1.4442e-01, -1.2101e-04,  5.1738e-05, -1.0178e-04,
        -7.7261e-05,  2.9945e-05, -3.1400e-05,  6.0454e-05, -4.3717e-05,
        -2.4077e-05, -2.8524e-05, -5.3502e-05, -1.2265e-04, -7.7733e-05,
         6.2475e-05,  3.6177e-05,  2.5105e-05, -1.1424e-04, -1.1842e-05,
         4.9677e-05, -2.1885e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0538e-03,  2.0112e-01, -4.0391e-05,  1.3409e-04,  9.2845e-05,
        -6.7482e-05, -2.3255e-04,  1.4797e-04, -9.7349e-05, -1.2862e-05,
         6.0895e-05, -7.7445e-05,  5.0318e-05, -3.3788e-05,  1.0906e-05,
        -1.2197e-04, -2.2179e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.4967e-04, -2.0968e-01,  6.2438e-05, -1.2639e-04, -1.3385e-04,
        -1.3786e-05,  1.3466e-04, -8.6149e-05, -1.9649e-04,  4.3530e-05,
         9.9660e-05,  1.7046e-05,  2.0648e-05,  8.9855e-05,  1.1565e-04,
         7.8024e-05, -5.4441e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9510e-05,  1.6618e-01, -1.6201e-04, -5.5784e-05,  9.4316e-05,
         2.5427e-05, -8.4792e-05,  1.8370e-04,  1.2694e-04,  1.0177e-05,
         1.6413e-04, -1.0879e-06,  6.1718e-05, -1.7988e-04,  1.0452e-05,
        -1.4753e-05, -6.7012e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4122e-04, -8.0907e-02,  1.0012e-05,  2.6568e-05, -8.2961e-05,
        -5.3867e-06,  4.3397e-05, -3.8504e-05, -1.0548e-05, -8.2963e-05,
         6.0080e-05, -7.4575e-06, -9.1395e-06, -3.7481e-05,  1.3802e-04,
         8.6658e-05,  1.4494e-04, -1.1253e-05, -1.2250e-05, -6.3780e-05,
         2.1095e-05, -2.6808e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0553e-04,  5.5190e-02,  4.8289e-06, -6.3812e-06, -1.2050e-05,
         6.8704e-05, -1.1219e-04, -6.9753e-05,  8.9333e-05, -5.7005e-05,
         1.2956e-04,  5.0892e-05, -1.2121e-05,  1.1898e-04,  3.0632e-05,
         8.5243e-06,  1.0087e-04, -1.5770e-05,  6.4928e-05, -3.7144e-05,
         3.3390e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6794e-04,  5.8831e-02, -1.3241e-04,  2.5917e-05,  8.6966e-05,
        -3.8871e-05, -7.6352e-05, -6.1263e-06,  6.6444e-05, -6.7639e-05,
        -7.3233e-05, -1.2330e-06, -2.5800e-05,  1.9772e-05,  4.5382e-05,
        -1.3757e-05, -4.3997e-05, -6.0487e-06, -2.9522e-05, -9.6261e-05,
        -2.5414e-05, -2.2429e-05,  8.0384e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9270e-06,  2.6346e-01, -9.2351e-05,  1.3850e-04,  5.1414e-05,
        -4.8638e-05,  2.7701e-05, -7.0834e-05, -1.2048e-04, -1.1511e-04,
        -8.0155e-05,  3.4355e-05,  2.7817e-04,  6.3380e-06, -9.2133e-06,
        -1.7534e-05,  2.6892e-04,  9.5170e-05, -5.1469e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4605e-04,  1.9160e-01, -2.6314e-06, -8.8115e-05,  5.1416e-05,
        -3.4061e-05, -9.8584e-05, -8.1152e-05,  1.7400e-04,  3.1293e-05,
         3.3157e-05,  5.6951e-05,  1.2741e-04,  1.1557e-05, -3.4906e-05,
         8.7112e-06, -1.0070e-04, -1.8172e-05, -6.1203e-05,  1.8422e-05,
         5.6928e-06, -1.3620e-04, -8.3790e-05, -1.9719e-05,  1.4477e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.9429e-04, -2.1351e-01, -4.0547e-05,  1.7803e-05,  1.1318e-04,
         2.8659e-05,  2.9345e-05,  8.0902e-05,  9.3490e-05,  1.9597e-04,
         9.7747e-05,  5.6779e-05, -8.6068e-05,  3.0187e-05, -5.3522e-05,
         5.6904e-05,  3.5194e-05,  5.1275e-05,  1.3369e-04,  1.7683e-05,
        -4.3020e-05,  2.4252e-06, -1.1705e-05, -3.4120e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #600: [tensor([ 3.9696e-04,  9.1115e-02,  8.4405e-05,  6.1873e-05, -6.0276e-05,
        -4.7175e-05, -9.5178e-05,  6.4643e-05, -5.4140e-05,  2.4655e-05,
         4.4505e-05, -6.5843e-05,  4.7837e-05,  7.9108e-06, -6.5307e-05,
         4.4645e-05, -3.4214e-05, -2.9500e-05,  1.3470e-05, -4.8422e-06,
        -7.7918e-06, -7.3259e-05,  6.4613e-05, -4.8898e-05, -1.5284e-04,
        -3.5816e-06,  5.9034e-05, -3.5601e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7247e-04,  3.8053e-02,  1.4318e-05,  8.0111e-06, -3.9724e-05,
        -4.5014e-05, -1.4157e-05, -2.3917e-05,  1.3004e-05,  1.0272e-05,
         6.1068e-07,  1.0413e-05,  5.9471e-05, -1.2354e-05,  3.5722e-05,
        -3.5214e-05, -2.8653e-05,  5.1125e-05, -2.0307e-05,  1.4961e-06,
         2.6343e-05,  7.5974e-05,  4.5800e-05, -5.5088e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7925e-04,  6.2097e-02,  2.0134e-05, -7.1454e-05, -1.0484e-05,
        -2.8913e-05, -5.4907e-05, -1.7812e-05, -1.9300e-05,  7.1318e-06,
         5.3781e-05, -4.5065e-06, -4.7200e-05, -1.9552e-05, -2.7846e-05,
         1.4518e-05, -1.4346e-05,  6.6373e-07, -2.7190e-05, -3.7109e-05,
        -3.6647e-05, -9.6391e-06, -1.2879e-05, -2.1847e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3515e-04,  8.4348e-02, -7.2975e-06, -1.1799e-05,  9.8574e-06,
         5.4235e-06, -7.0879e-05, -3.2240e-05, -7.0114e-06, -8.6037e-05,
        -8.2338e-05, -1.7849e-05, -3.1505e-05, -1.6732e-04,  6.1120e-06,
        -9.6268e-05,  3.7042e-05, -6.7788e-06,  2.3888e-05, -5.3603e-05,
         4.7927e-05, -5.4233e-05,  3.3259e-05,  2.0302e-05,  6.1296e-05,
         1.4027e-05,  2.2571e-05,  9.7035e-05,  6.1851e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9002e-04,  4.6991e-02,  4.9446e-05,  4.8362e-05, -2.7173e-05,
        -8.4681e-05, -1.1726e-05,  2.3682e-05,  9.9177e-05,  1.7799e-06,
        -7.9893e-06,  6.2975e-05,  5.7707e-06, -3.3057e-05,  4.1170e-05,
         1.5608e-05,  6.6210e-06,  9.1233e-08,  5.7255e-05,  2.7466e-05,
         5.2990e-05,  3.0970e-05,  1.3153e-05,  8.3498e-05,  2.0261e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0778e-04, -2.3851e-01, -9.0187e-05, -4.6246e-05,  6.0181e-05,
         1.2234e-04,  9.8721e-05, -6.3767e-05, -1.0202e-04, -6.0769e-05,
        -4.3114e-05,  4.5932e-05, -3.7672e-05, -7.8070e-05, -9.4817e-06,
        -3.9353e-05, -7.3517e-05,  2.3717e-05, -5.7887e-05,  1.1431e-05,
        -3.1959e-05,  1.5354e-04, -5.9346e-05,  8.5647e-06,  1.3160e-05,
        -3.0140e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0155e-04,  2.6763e-01,  1.4689e-05, -9.5176e-05,  1.1900e-04,
        -6.1906e-05, -1.8044e-04, -1.6195e-04,  4.7256e-05, -7.3049e-05,
        -2.7449e-05, -4.4380e-05, -2.2504e-04, -2.0643e-05, -3.4884e-05,
         1.0529e-04,  7.6778e-06,  1.5777e-05,  5.4416e-05, -7.1920e-05,
        -6.0024e-05, -5.4179e-05, -6.7888e-05,  3.5359e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3833e-04, -2.7575e-01, -9.8292e-05,  4.3922e-05, -1.2626e-04,
         4.5374e-05,  5.0537e-05,  6.4801e-05, -7.8885e-05, -4.8294e-05,
        -2.6021e-05, -1.6762e-04, -1.6763e-04, -9.1986e-06,  2.4221e-05,
        -4.8432e-06, -8.3412e-05, -2.1785e-06,  3.8236e-05,  2.4807e-06,
         2.5729e-05, -1.9881e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3076e-05, -1.8806e-01, -3.6243e-05, -2.1726e-04, -1.9189e-05,
         2.8799e-05, -3.7040e-05, -1.8073e-04,  3.6460e-06, -1.4939e-05,
         7.6370e-05, -6.7382e-05,  5.3947e-05, -1.0115e-04,  7.0488e-06,
        -1.0493e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1541e-04, -2.1130e-01,  3.9013e-05,  1.8953e-04,  6.7869e-05,
        -1.0702e-04,  4.9943e-05,  5.9370e-05, -2.2320e-05,  1.7529e-04,
        -1.9671e-05, -3.8752e-05, -6.6945e-05, -1.3747e-04,  2.2110e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5246e-04, -2.0214e-01,  6.8079e-05,  1.1557e-04, -4.5625e-05,
         1.3543e-05, -3.5816e-05,  7.3666e-05, -1.2418e-04,  2.6849e-04,
        -4.4804e-05, -2.3637e-05, -4.4059e-05, -2.6400e-05,  1.9094e-05,
         8.5408e-05, -1.8765e-04,  2.1733e-08,  2.8861e-06, -3.0040e-05,
         8.3831e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5986e-05,  3.3628e-01,  6.9035e-05,  1.0158e-04, -3.9820e-05,
        -1.8793e-05, -1.1461e-04, -2.7669e-05,  1.8836e-04,  1.5537e-05,
         1.2093e-04, -9.0256e-05,  2.9427e-05,  4.6075e-05,  1.8149e-04,
         1.2740e-04, -1.2275e-05,  4.3886e-05, -1.1096e-04,  1.0404e-04,
         1.9970e-05,  5.5331e-05,  1.2895e-04, -2.7972e-05,  1.2663e-04,
        -1.4763e-05, -1.9162e-04,  1.4307e-05,  5.4364e-05,  6.6625e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #650: [tensor([-3.9915e-04, -2.4009e-01,  1.0283e-04,  1.0012e-04, -1.1510e-04,
        -8.7993e-05,  1.1120e-04, -1.4373e-05, -2.1239e-04,  2.6231e-05,
         6.9713e-05,  2.4692e-05,  2.2405e-05,  1.5126e-04, -3.6685e-06,
        -1.4786e-04,  1.8844e-05, -3.0111e-05, -1.3264e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6576e-04, -2.4200e-01,  4.0457e-05,  2.3787e-05,  3.0520e-05,
         1.1014e-06, -8.8633e-05, -1.6538e-04, -2.3699e-05, -4.2235e-05,
        -4.1926e-06,  1.1553e-04,  2.0023e-05, -1.4740e-04,  6.4160e-05,
        -3.4322e-05,  1.2018e-05, -1.3236e-04,  9.7853e-05, -1.3785e-04,
        -2.5024e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6929e-04,  2.3266e-01,  2.7045e-05, -3.0184e-05, -2.4381e-05,
         3.1653e-05,  1.4581e-04,  1.9077e-04,  6.0667e-05,  1.4336e-04,
        -1.0552e-04, -6.7473e-05, -1.0050e-04, -9.2815e-05, -1.1113e-04,
         3.2786e-05,  3.4768e-06,  1.0927e-04, -9.7998e-05,  3.6722e-05,
        -6.0080e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3846e-04, -2.6340e-01,  1.4074e-04, -1.4436e-04,  1.3683e-04,
         1.9019e-05,  1.9246e-04,  8.3117e-05,  1.0979e-05, -1.1244e-04,
        -7.9598e-05,  1.0036e-04,  3.9380e-05,  2.4718e-05, -1.9725e-04,
         5.1796e-06, -1.8113e-04, -6.6004e-05, -2.6942e-05,  2.3630e-05,
        -8.6825e-05, -2.6969e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8682e-04,  6.3681e-02,  1.5005e-04,  6.1305e-05,  7.3218e-05,
        -5.8638e-05,  3.2745e-05,  2.7288e-05, -3.7528e-05,  9.4395e-05,
        -7.8769e-05,  5.1347e-05, -4.2851e-05,  3.1622e-05, -3.7370e-05,
        -3.6763e-05,  6.7911e-05,  2.7646e-04, -6.9123e-05,  1.7065e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8374e-05,  1.1508e-01, -3.9221e-05, -1.5693e-04, -4.8477e-05,
        -5.8752e-05,  6.6963e-05, -4.8808e-05, -4.0169e-05, -1.7475e-04,
        -1.2237e-04,  1.1282e-04,  9.1560e-05,  8.2290e-05, -4.0807e-05,
        -1.2907e-04, -4.6318e-05, -2.8227e-05,  1.4608e-05, -9.1883e-05,
         9.9278e-05, -5.3625e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2834e-04, -1.2007e-01, -1.0422e-04,  2.3152e-04,  1.6386e-04,
         1.5360e-04,  3.1118e-05,  1.8407e-04,  3.3335e-04,  1.1023e-04,
         7.3183e-05, -2.5986e-04,  1.8322e-04,  6.7305e-05,  1.3374e-05,
         3.7268e-04, -4.7793e-05,  3.6906e-05, -1.3548e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8337e-05,  6.0100e-02, -7.5110e-05,  1.9450e-05,  3.2893e-06,
         1.1801e-04,  1.3573e-04,  4.1159e-05, -3.4004e-05, -4.7774e-05,
         7.2815e-05, -3.2401e-05, -1.2822e-05,  4.2568e-05,  6.6566e-06,
         6.2860e-06,  2.9235e-06, -1.7829e-05, -2.4879e-05, -4.3350e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7106e-04,  8.3260e-02,  1.7007e-05, -2.7099e-05,  1.9429e-05,
        -1.5400e-04,  1.0383e-04, -7.8723e-05,  2.5888e-06,  3.2263e-05,
         1.4612e-05, -3.4128e-05,  2.4520e-05, -6.4742e-05, -7.9082e-05,
         9.1608e-05, -2.9102e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0556e-04,  6.1563e-02, -8.9599e-05,  9.6706e-06, -5.0577e-05,
        -3.8947e-05,  3.4996e-05, -1.6175e-05, -3.5699e-05, -9.8449e-05,
         3.1021e-05,  1.9198e-05,  2.5998e-05,  2.1281e-05, -1.4289e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9059e-04, -2.1778e-01, -8.3934e-05,  1.3849e-04, -1.2562e-04,
         5.8551e-05, -3.7180e-05, -8.7780e-05,  9.7847e-05,  3.3556e-05,
         2.7146e-05, -1.3274e-04,  1.7012e-04, -4.0715e-06, -7.3280e-05,
         1.1789e-04, -3.7526e-05,  6.1409e-05,  4.4173e-05, -3.8529e-05,
        -9.2358e-05, -2.7278e-05, -1.0648e-04,  3.1482e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7924e-04, -1.0701e-01, -9.7620e-05,  3.0971e-05, -3.6218e-05,
        -3.4381e-05, -3.3656e-05, -1.4827e-05, -9.7997e-06, -6.0625e-05,
         2.4532e-05, -2.2473e-05,  7.1388e-05, -1.1576e-04,  8.9719e-06,
        -5.3236e-05,  6.0992e-06,  8.2085e-05,  7.8027e-05, -8.4120e-05,
        -2.6533e-05, -1.0812e-04,  1.3144e-05, -1.3913e-04,  5.1394e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #700: [tensor([-3.8494e-05, -1.0589e-01, -2.3745e-05,  8.9004e-06,  2.8237e-05,
        -1.7358e-05, -1.4175e-05, -2.8858e-05,  3.6853e-05, -1.6527e-05,
        -2.1516e-07,  1.9527e-05, -6.3003e-05,  2.2094e-06, -2.2826e-05,
        -5.7424e-05,  2.5226e-05,  8.0703e-06, -1.0883e-05,  1.1331e-05,
         7.3540e-06,  1.4487e-05, -4.0573e-06, -4.0479e-05, -3.4320e-05,
        -1.8068e-05,  2.6504e-05, -2.7444e-05, -4.8270e-07,  3.3790e-07,
        -4.7378e-05, -1.5700e-05, -6.7042e-06, -8.5416e-06, -7.4809e-06,
        -1.9015e-05, -4.1027e-05, -1.6733e-06,  2.1244e-05,  3.3080e-05,
        -3.8903e-05,  1.4609e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.3283e-04, -9.4649e-02, -2.3543e-05, -2.2208e-05, -1.3747e-05,
        -2.8676e-05, -1.5410e-05, -1.0761e-05, -3.0530e-05, -1.6938e-05,
        -3.0159e-05,  7.1268e-06, -2.0100e-05, -2.3462e-05, -3.0042e-06,
        -2.8540e-05, -2.4198e-05,  3.9688e-06,  1.5355e-06,  3.7720e-07,
         1.2967e-06, -2.8100e-05, -2.9810e-05,  5.4846e-05, -1.2868e-05,
        -2.5483e-05, -2.5751e-05, -8.8719e-06,  1.0628e-05, -8.7483e-06,
        -7.1697e-05, -2.9833e-05, -4.6260e-06,  5.2948e-06,  1.0783e-05,
         4.4529e-06, -1.3360e-05, -8.6109e-06,  5.3162e-06,  1.1559e-05,
        -1.1853e-05,  1.1570e-06, -2.7180e-05,  1.5561e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1750e-04, -2.6128e-01, -1.7809e-05, -4.0776e-05, -4.5935e-05,
        -1.0923e-05,  4.1491e-05,  1.2108e-05, -2.7313e-05, -1.9244e-05,
         3.1536e-05, -3.4281e-05, -5.0923e-05, -9.5094e-05,  3.3639e-05,
         2.0355e-05, -1.0081e-04, -4.8472e-05, -9.1755e-05, -8.0994e-05,
         2.5952e-06, -7.2254e-05, -3.3371e-05,  3.3864e-06, -4.9658e-06,
         1.9002e-05,  4.4729e-07, -7.3755e-05, -2.4390e-05, -5.2888e-05,
        -2.7244e-05,  3.9013e-05,  1.8437e-05, -1.3099e-04, -8.8870e-05,
         3.5588e-05,  5.3597e-05,  4.1015e-05, -1.1454e-04,  2.0562e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1996e-04,  3.6707e-01,  1.2655e-04, -7.7043e-05, -4.1580e-05,
         1.2220e-04,  3.1615e-05,  1.1873e-04, -1.8967e-04, -2.2854e-04,
         2.0716e-05,  6.7660e-05, -1.4037e-04,  3.7318e-05,  4.5855e-05,
        -1.8247e-05,  1.1673e-04,  7.5885e-05, -9.6006e-05,  5.6620e-05,
         1.1412e-04, -4.3948e-05, -1.1284e-04, -3.0393e-05,  8.0094e-05,
        -1.0415e-04,  2.4863e-05,  1.5551e-04, -4.0568e-05, -8.9506e-05,
         2.2875e-05,  6.3807e-05,  8.2783e-05, -4.2799e-05, -1.1876e-04,
         1.2254e-04,  6.7715e-05,  3.3726e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9194e-04, -2.3647e-01,  3.5594e-05,  1.0480e-04, -1.5717e-06,
        -8.2454e-05, -5.1848e-05,  4.3275e-05,  4.0945e-05,  5.8690e-05,
        -6.6273e-05, -5.6346e-05, -1.0887e-04, -6.2698e-05,  2.2074e-06,
         5.3380e-05, -1.0105e-04,  2.5332e-05, -4.9922e-05, -5.8938e-05,
         6.8166e-05,  4.1668e-05, -8.0340e-05,  8.9677e-05, -4.0713e-05,
         4.5645e-05,  6.4138e-06,  4.3025e-06,  1.3955e-05, -2.3717e-05,
        -2.4462e-05, -1.0918e-04, -3.1765e-05,  1.3327e-05,  2.5882e-05,
         3.9207e-06, -9.3703e-05,  3.1622e-05,  8.4233e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4742e-05, -6.6376e-02, -7.9938e-05,  2.1478e-05, -2.5853e-05,
         4.4903e-05,  3.6540e-06,  2.0561e-06,  4.4082e-05,  3.6518e-05,
         1.5670e-05,  6.7657e-05, -2.1527e-05,  1.2902e-05, -4.1476e-05,
        -1.9954e-05, -8.3646e-05,  2.7621e-05, -1.2754e-05,  4.7730e-06,
         1.1153e-05,  1.5710e-06, -2.3503e-05,  2.9346e-05, -3.5580e-06,
         1.0453e-04, -9.2671e-05,  3.4688e-05, -4.8436e-05, -3.2494e-05,
        -4.8656e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6917e-04,  1.3229e-01,  4.3150e-05, -8.5027e-06, -5.1594e-05,
        -8.5865e-05, -4.8186e-05, -3.5394e-05, -5.3878e-05, -3.9211e-05,
        -6.2077e-05, -1.1333e-04, -4.4444e-05, -5.0675e-05, -3.6436e-05,
        -6.9285e-05,  1.1810e-04, -1.3888e-05, -6.1029e-06, -7.2244e-05,
         7.4319e-05, -4.7679e-05,  1.7218e-05,  1.2580e-04, -6.9919e-05,
        -2.2009e-04, -1.6198e-06,  5.3534e-05, -1.4743e-05, -1.2609e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3236e-05,  1.2911e-01, -2.3803e-05, -2.0718e-05,  7.4094e-05,
        -5.5495e-05,  7.5120e-06, -3.4181e-05,  6.0890e-05, -4.0167e-06,
         1.4694e-05, -7.4620e-05,  4.8947e-05, -1.7309e-05,  7.6385e-05,
        -4.4907e-05,  1.1450e-04, -1.5261e-04, -1.0840e-04,  3.7425e-05,
         1.4386e-05, -2.2977e-05,  2.9825e-05, -4.4455e-05, -6.6416e-05,
        -3.8317e-05,  2.8105e-05, -1.1642e-04, -2.6249e-05,  4.3436e-05,
         7.9351e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3697e-04, -1.6568e-01,  7.2899e-05, -4.7082e-05,  3.7841e-05,
        -5.1055e-05,  1.2647e-05, -2.7147e-05, -2.0751e-05,  4.0450e-05,
         1.0525e-04, -1.6739e-06,  4.1956e-05,  6.9289e-05,  9.2853e-05,
        -2.9647e-05, -4.2585e-06,  2.9953e-05, -4.9437e-05, -3.2201e-05,
         7.2465e-05, -7.6329e-05,  1.0535e-04, -2.9362e-05,  1.1472e-06,
         7.1651e-05,  8.6213e-05, -2.6444e-05, -6.9983e-06, -1.3607e-05,
        -7.6072e-05, -9.2951e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.9278e-04, -7.4997e-02,  5.5380e-06, -4.0881e-05,  1.4772e-05,
        -5.7656e-05, -9.3990e-05, -7.5597e-05,  9.9302e-06, -1.8502e-06,
         6.0263e-05, -2.9309e-05, -1.0957e-04, -1.3637e-05, -1.4371e-05,
        -5.0143e-05,  8.0952e-06,  6.9687e-07, -2.9475e-05, -3.7995e-05,
        -6.2544e-05, -5.5299e-05,  3.9550e-06, -3.7174e-05, -4.3521e-07,
         6.5579e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8112e-04, -9.4518e-02, -1.2680e-04, -4.2197e-05, -3.4603e-05,
        -5.6255e-05, -1.0087e-04, -7.6448e-05, -6.5729e-05, -1.2887e-04,
         2.9614e-05,  7.4495e-05,  4.1766e-05, -8.5817e-05, -6.2021e-05,
         1.8941e-05, -1.0217e-05, -3.3689e-06,  3.9368e-05, -1.9613e-05,
         1.2131e-05, -1.0428e-05,  2.3592e-05,  1.6748e-05, -1.0103e-04,
         1.7194e-05,  2.5886e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8669e-05,  1.3357e-01, -1.4204e-04,  7.5693e-05,  1.8084e-07,
         1.2878e-05,  2.0442e-06,  7.1903e-05, -3.2599e-05,  2.7987e-05,
        -5.5148e-05, -3.0095e-04,  6.0872e-05, -9.8772e-05, -1.1571e-05,
        -5.6547e-05, -4.6679e-05,  1.8144e-04,  1.8659e-06,  9.1620e-05,
         9.2593e-05, -1.7705e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #750: [tensor([ 1.1807e-04, -2.9941e-01, -3.1741e-05,  1.2030e-04,  1.0474e-04,
         4.3617e-05, -3.2049e-05,  1.8307e-04,  7.8087e-05,  5.2315e-05,
        -7.7571e-05,  3.3986e-05, -3.2961e-05,  3.7348e-05,  1.2221e-04,
        -1.0419e-04, -8.6649e-05,  6.5639e-06, -4.5746e-06, -6.0068e-05,
        -1.2175e-04, -4.1784e-05, -4.9653e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2756e-04, -1.6914e-01,  3.7150e-05, -1.0718e-05,  5.0601e-05,
         6.1004e-05, -5.8638e-05, -5.6897e-06,  1.3439e-04,  8.0114e-06,
         8.6608e-05,  1.1964e-05, -3.0824e-05,  2.9528e-05,  4.2244e-05,
         1.5679e-05, -9.1743e-05,  1.7727e-05, -1.1624e-04, -1.4685e-04,
        -4.7493e-05, -9.6666e-05, -4.6398e-05, -4.3635e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9430e-04, -1.6086e-01,  7.3715e-05,  3.3421e-05, -2.6409e-05,
         7.6134e-05,  3.8141e-05,  6.1192e-05,  8.1559e-06, -4.2819e-05,
         4.4686e-05, -2.8076e-05,  5.2912e-05, -4.0296e-05,  1.1038e-05,
        -3.1353e-05, -1.0202e-04, -3.9546e-05,  2.5024e-05, -1.7998e-05,
        -9.4590e-06, -1.0890e-05, -6.0527e-05, -5.0486e-05, -4.1224e-05,
        -2.5407e-05, -5.0626e-06,  3.0739e-05, -3.8462e-05,  1.8337e-05,
         3.7710e-05, -8.3705e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2405e-04, -1.2004e-01,  5.1377e-05, -1.6219e-05, -1.0542e-05,
         2.8486e-05, -2.5526e-06,  6.9934e-05, -1.0701e-05, -3.3309e-06,
        -6.0873e-05,  6.2591e-05,  2.6545e-05,  6.2486e-05,  8.7327e-05,
         1.7157e-05,  6.3517e-05,  3.5788e-06,  1.2826e-04, -4.9786e-05,
         2.5260e-05, -7.1842e-05, -2.9582e-05, -3.8084e-05, -7.4916e-05,
        -2.2173e-05,  5.6935e-05,  3.4477e-05,  2.1149e-05, -2.6970e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9771e-05, -1.7167e-01,  6.9041e-05, -1.1571e-05, -2.5259e-05,
         5.1036e-05,  1.2210e-05,  2.1257e-04,  7.5635e-05,  9.5655e-05,
         2.9300e-05,  1.4913e-05,  5.1454e-05, -5.8238e-06,  3.4863e-05,
         2.4008e-05,  7.6768e-05,  1.6704e-05,  9.3794e-05, -4.2495e-05,
         8.1709e-06,  2.1056e-05, -2.9453e-06,  4.4644e-05, -3.7622e-05,
         1.1948e-04,  2.7311e-05, -8.4266e-06,  6.2135e-05,  4.0994e-05,
        -6.5540e-05,  1.1715e-05,  1.0421e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1341e-04,  1.8498e-02, -3.2610e-05,  9.2873e-06, -3.7205e-05,
        -5.9002e-05,  3.2059e-05, -1.2479e-05,  1.9282e-06, -4.6662e-05,
        -2.3972e-05, -2.8059e-05,  8.7078e-06,  1.7529e-05,  4.5719e-05,
         2.8790e-05, -1.6695e-05,  3.9579e-05, -8.8835e-07, -1.2620e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0241e-05,  2.4205e-02, -1.6029e-05,  1.1186e-05,  4.3332e-05,
        -2.0610e-05,  6.3696e-05,  9.9270e-06,  1.2511e-05,  2.4642e-05,
         2.1918e-06, -6.4152e-06,  2.5701e-05,  1.8083e-05,  2.4017e-05,
         1.3657e-05,  3.5958e-06, -1.2287e-05, -1.6193e-05, -1.1600e-05,
         2.0675e-07,  1.4212e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8859e-04,  2.7024e-02,  3.3703e-05,  2.6838e-05, -3.6393e-05,
         4.5849e-05, -6.1907e-06, -3.7374e-05,  1.9325e-06,  2.7003e-05,
        -5.8824e-05, -1.7965e-05,  1.9806e-05,  2.7739e-05,  6.4252e-06,
        -2.4895e-05, -6.7592e-06, -2.6574e-05,  2.1340e-06,  4.2885e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7659e-04, -1.6326e-01,  1.0344e-04,  7.6410e-05,  1.2199e-04,
         3.5418e-04,  3.3945e-05, -1.1655e-04, -1.4061e-04, -3.7350e-05,
        -4.3662e-05,  8.5456e-05, -1.2740e-04, -1.2128e-04, -8.0728e-05,
         1.2979e-04,  2.4654e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8433e-05,  1.1167e-01, -1.0214e-04,  3.9733e-05, -4.2077e-05,
        -2.6174e-04,  1.2947e-04,  1.1491e-04,  8.6237e-05,  9.3516e-05,
         9.6637e-05,  8.0350e-05,  2.0936e-04, -2.3277e-05,  1.2713e-04,
         6.9623e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7314e-04,  1.5994e-01, -4.2839e-04,  1.3757e-04, -1.8651e-04,
        -1.4371e-04,  7.6038e-05, -2.7336e-04,  2.7720e-04,  5.9471e-05,
        -2.9611e-05,  2.8459e-05, -1.1493e-04, -2.1700e-05,  3.2871e-04,
         1.0912e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8855e-04, -1.3271e-01,  6.7299e-05,  3.6680e-06,  5.8584e-05,
        -4.0888e-05,  4.4738e-05,  4.6433e-05,  1.0267e-04,  8.2438e-05,
         8.3202e-05,  3.0135e-05, -4.2912e-05,  1.0143e-04,  3.0815e-05,
         6.4732e-05,  6.9268e-05,  1.6871e-05, -2.0650e-05, -2.6414e-05,
         7.8502e-06,  2.9752e-06,  1.4414e-06, -1.4090e-05,  3.2661e-05,
        -2.3014e-05, -6.2640e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #800: [tensor([-4.4398e-05, -3.0253e-01, -8.4389e-05, -2.2367e-05,  1.4731e-04,
         4.0307e-05,  7.8602e-05, -2.1819e-04,  1.7121e-04,  1.0445e-04,
        -2.3624e-05, -3.3692e-05, -9.6716e-05, -1.3458e-05,  2.0127e-05,
        -8.9108e-05, -9.5486e-05, -1.1069e-04, -8.8080e-05, -1.9268e-04,
         2.8935e-05, -2.3872e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0929e-05, -7.7780e-02, -6.7416e-05, -1.0167e-05, -5.9261e-06,
        -5.1474e-05, -8.0684e-06, -2.2753e-05, -5.1378e-06, -3.9020e-05,
        -1.9341e-05, -1.8627e-06, -3.9510e-05, -7.2298e-05, -4.3902e-05,
        -1.9422e-05, -3.4825e-05,  1.5924e-05,  1.1915e-05,  4.9192e-06,
         5.7219e-06, -8.1931e-06, -3.5926e-05,  7.5517e-06, -2.6839e-05,
        -6.5226e-06,  2.0479e-05, -4.7112e-05, -3.9531e-05,  1.7475e-05,
         6.1768e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4178e-04, -1.7286e-01,  1.0969e-04,  1.1077e-04,  6.4222e-05,
        -1.3910e-05,  2.7727e-05,  2.4586e-05, -4.9389e-06,  5.1628e-05,
        -4.9187e-06, -1.1061e-04,  6.2780e-05,  3.8652e-05,  1.9228e-05,
         2.2912e-05,  2.0385e-05, -6.4201e-05,  6.9835e-06, -1.0700e-04,
        -5.9469e-06,  1.0704e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7377e-04, -2.4788e-01, -3.4396e-05, -1.5582e-04,  4.4566e-06,
         9.9460e-05,  3.4853e-05,  8.2279e-06, -1.0381e-05,  5.4129e-05,
         8.8412e-05,  2.1933e-05,  1.1936e-04,  1.2677e-05,  1.0538e-04,
         1.2314e-07, -7.5173e-06, -6.0343e-05,  5.8321e-05,  5.5653e-05,
        -5.1128e-06,  1.3667e-04, -2.0378e-05, -3.7252e-06,  9.4102e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8487e-04, -1.3150e-01, -1.2605e-04,  4.2900e-05,  1.9473e-04,
        -5.8571e-05,  5.9158e-05, -4.0948e-05, -4.6638e-06,  8.2147e-08,
        -3.9913e-05,  1.6184e-04,  2.7108e-05,  9.2673e-05,  1.3401e-04,
        -4.6342e-05,  1.5332e-04, -2.4725e-05, -1.0316e-05, -6.9862e-05,
        -6.7468e-06,  1.3743e-04,  4.5930e-05, -6.3446e-05,  6.3432e-05,
        -3.4019e-05, -3.9805e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6395e-05, -8.0715e-02,  3.8200e-06, -7.1679e-05, -2.1541e-05,
         3.8729e-05,  1.7454e-05, -8.4634e-06,  7.9612e-05,  2.1293e-06,
        -2.9719e-05,  1.6386e-05, -3.0726e-05, -1.4503e-05, -6.4953e-05,
         5.3190e-05,  3.8774e-05, -4.8725e-05, -3.6834e-05,  1.9798e-05,
         1.9328e-05, -3.7304e-05, -4.0469e-05,  2.6537e-06, -4.2175e-05,
        -2.0324e-05, -4.3510e-05, -9.7310e-06, -2.1266e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2819e-04, -1.3214e-01, -8.0708e-05,  1.7267e-05,  1.0401e-04,
         9.1556e-05, -1.8766e-06,  6.1231e-05,  3.0697e-06, -6.6669e-05,
        -4.7207e-05,  2.0395e-04,  4.1613e-05,  1.3127e-05,  1.2811e-04,
         5.1016e-05,  3.5915e-05, -1.1775e-05,  3.0028e-05, -8.3969e-06,
        -5.1176e-05,  7.9081e-05,  5.0439e-06, -6.2767e-05, -5.3285e-05,
         5.5577e-05,  7.8586e-05, -1.3959e-04, -2.8134e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5056e-04,  1.0772e-01,  2.5146e-05, -1.4857e-05, -7.3999e-05,
        -4.5159e-05, -1.6046e-05,  9.9217e-05, -5.5314e-05,  2.7187e-05,
        -2.4002e-06, -6.8226e-06, -6.6888e-05,  2.8555e-05,  9.7926e-06,
        -7.0131e-05, -2.6793e-05, -3.7710e-06, -1.0384e-05, -4.1215e-05,
         1.9540e-05,  2.3937e-05,  4.6493e-05,  3.4064e-06,  6.9728e-05,
         1.2953e-05, -1.5250e-05, -7.8407e-05,  1.0218e-04, -4.3863e-05,
        -7.2068e-05,  2.6764e-05, -8.3876e-05,  5.9308e-05,  1.3902e-05,
         1.1526e-05,  3.8865e-05,  4.3774e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4837e-05, -9.8048e-02, -2.4365e-05,  5.7896e-05,  3.1962e-05,
        -2.4430e-05, -7.2428e-05, -3.4487e-05,  6.7861e-05,  3.9157e-06,
        -2.0849e-05,  7.3757e-06, -4.5889e-05, -9.7995e-06,  3.4780e-05,
         5.8035e-05, -3.9578e-06,  1.6317e-05,  1.9408e-05, -2.3000e-05,
        -3.7299e-05,  6.2872e-05, -3.4132e-05, -7.0243e-06,  1.1739e-04,
         8.5467e-06, -7.7583e-05,  4.2368e-05, -2.7074e-05, -3.6619e-05,
         1.2765e-05, -1.8093e-05,  9.7934e-06, -2.9916e-05, -3.1566e-05,
         4.6764e-05,  5.4704e-05,  3.4768e-05, -7.0749e-05,  7.5798e-05,
         1.3587e-05,  2.9653e-05, -2.0603e-05,  4.3466e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1945e-04, -7.6408e-02,  2.5147e-06,  8.9534e-05,  7.5886e-05,
         9.1667e-06,  9.2012e-06, -1.8293e-05,  3.4524e-05,  6.8156e-05,
         1.4862e-07, -6.7566e-06, -2.9678e-05,  2.6189e-05, -1.6592e-05,
         2.4022e-05, -2.3655e-05,  2.3711e-05, -1.8959e-05,  2.1031e-05,
         4.4763e-05,  1.3325e-06,  1.6770e-05,  3.1221e-05,  6.9680e-05,
         2.9524e-05, -2.1608e-05,  5.0029e-05, -2.8772e-06,  3.9924e-05,
         1.0441e-04, -1.2760e-05,  5.2148e-05,  3.7662e-05, -2.4591e-05,
         6.7810e-05, -5.6498e-05, -4.4360e-06, -5.0247e-05, -6.1638e-05,
        -4.7471e-07,  2.8207e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7671e-04, -1.9967e-01, -2.1076e-05,  1.3932e-05, -1.4149e-05,
        -4.9522e-05, -4.8356e-05,  2.9519e-05,  1.7370e-05, -1.6452e-05,
         7.7039e-05,  2.8027e-05, -8.7710e-05,  5.2749e-05, -2.1662e-05,
         6.9814e-06,  4.1320e-05,  1.7608e-05,  2.5060e-06, -1.0370e-06,
        -2.7241e-05,  1.8971e-05,  3.1274e-05,  3.0826e-05,  8.0733e-05,
        -2.0747e-05,  3.5013e-05,  2.0357e-06,  2.5030e-05,  1.8772e-05,
         3.6958e-05, -1.3030e-05, -1.6652e-05, -7.0292e-06, -3.2694e-05,
         3.5598e-05, -3.5059e-06, -2.7728e-06,  5.8770e-05, -9.5510e-06,
        -2.3213e-05, -3.7046e-05, -2.9509e-05, -9.5426e-06,  3.3305e-05,
         2.3817e-05,  2.5077e-05,  5.4593e-06,  2.1736e-06,  3.0968e-05,
         3.0031e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7042e-04, -1.6483e-01,  3.1977e-05, -2.9434e-05,  7.4600e-05,
        -2.0958e-05, -3.8619e-05, -1.8739e-05, -6.2591e-05, -1.8438e-05,
         3.9375e-05, -7.3537e-06, -2.7721e-05,  1.2554e-05, -4.8611e-06,
         5.8531e-06,  7.2965e-06,  3.5928e-05, -1.5827e-05, -6.1747e-05,
        -6.6759e-05,  1.6594e-05, -9.6030e-06, -6.6077e-05, -4.5791e-06,
         2.2153e-05, -5.7983e-06, -8.5608e-06, -4.0114e-05,  1.6411e-06,
        -2.6563e-05, -1.7229e-06, -6.3053e-05,  6.7832e-06,  1.7207e-05,
        -2.8820e-05, -5.5396e-05, -2.0256e-06, -3.0085e-05, -5.9547e-06,
        -1.4987e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #850: [tensor([-5.6583e-04,  2.3683e-01,  5.9678e-05,  9.2443e-05, -4.5747e-05,
         5.4004e-06, -9.1771e-05,  6.6827e-06,  1.8184e-05,  4.9608e-05,
        -1.9908e-05, -7.9164e-06, -6.5910e-06,  7.6024e-05, -1.6787e-05,
         1.1314e-04,  4.5341e-05,  8.7449e-05,  7.2919e-05,  8.5752e-05,
         2.2520e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1736e-04, -2.8008e-01, -2.6724e-05, -1.1935e-05, -7.6349e-05,
        -5.7574e-05, -1.1125e-04,  9.8690e-05,  1.5950e-06,  4.3521e-05,
         9.7366e-06, -7.1809e-06, -8.3678e-05,  2.4324e-05, -3.3553e-05,
        -2.7182e-06,  1.6583e-05,  7.1862e-05,  6.7893e-05,  7.5916e-05,
         6.5536e-05,  1.3725e-04,  1.2032e-05, -5.4030e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3449e-04, -3.0041e-01, -2.4512e-05,  5.1750e-05,  1.3352e-04,
        -3.1267e-05,  3.9158e-05, -1.8781e-04, -6.0424e-05, -6.7963e-05,
         1.1383e-04,  2.9884e-05, -3.9024e-05,  2.1213e-06, -7.2889e-05,
        -1.0656e-04, -4.5119e-06,  9.9887e-05, -2.6271e-05,  1.0511e-04,
        -1.1266e-04, -4.7837e-06, -8.9929e-05,  4.3574e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5539e-04, -3.1475e-01,  1.1243e-04, -3.2895e-05,  5.3718e-05,
         2.2063e-04,  1.3097e-04,  1.0641e-04,  1.5398e-04,  2.0605e-04,
        -3.5131e-05, -9.5478e-05, -1.2325e-04,  1.3733e-04, -1.7768e-04,
        -1.3058e-04, -1.7502e-04, -5.0580e-05, -4.5889e-05, -2.7639e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6775e-04,  3.0673e-01, -1.3564e-04,  3.5452e-05,  6.0517e-05,
        -2.2467e-04, -1.7737e-04, -1.3815e-04,  7.9328e-05,  7.1066e-05,
        -1.5408e-04, -1.1837e-04, -8.8387e-05,  1.2462e-04,  9.7410e-05,
         1.1149e-04, -3.9892e-05, -1.3978e-04,  4.6741e-05,  3.2549e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5751e-04,  2.9510e-01,  3.9174e-05, -1.0152e-04,  2.1524e-05,
         6.9168e-05, -1.5418e-04,  1.2996e-04,  4.6041e-05,  1.3884e-04,
         7.2777e-05,  3.9603e-06, -4.1528e-05, -3.8876e-05,  1.8247e-04,
         4.8318e-05, -4.8362e-05, -7.2801e-05,  1.7416e-04,  4.4375e-05,
        -5.6437e-05, -8.3626e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8873e-04, -3.3113e-01,  7.2236e-05, -8.5354e-05, -5.2432e-05,
        -8.7751e-05, -2.5630e-05, -3.5941e-05, -1.3765e-04,  7.2483e-05,
         1.4280e-04,  1.0254e-04, -1.5559e-04, -1.1531e-04,  7.6085e-05,
        -2.9469e-05,  4.9064e-05, -5.5739e-05,  9.8074e-05, -3.6657e-05,
        -2.9960e-06,  5.4233e-05, -2.3965e-05,  1.4056e-04,  1.0222e-05,
         1.0713e-04,  6.2433e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2099e-04, -2.7335e-01, -1.1438e-04, -7.6377e-05, -1.0954e-04,
        -1.1336e-04, -6.6445e-05,  4.7047e-05,  6.9991e-05, -5.9502e-06,
         5.0566e-05, -3.6006e-05, -5.8308e-05,  1.4886e-05, -2.1791e-05,
         4.9672e-05,  5.1503e-05,  1.1838e-04,  1.1124e-04, -2.5188e-05,
         3.6910e-05, -4.1916e-06,  4.5192e-05,  7.7397e-05,  4.3112e-05,
         4.0533e-05, -9.4049e-06,  3.0253e-05, -4.9781e-05, -1.7444e-05,
         1.0556e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3997e-05,  3.1814e-01,  4.7023e-05, -3.8662e-05,  2.1563e-04,
         1.1049e-04, -3.5041e-06, -1.1417e-05, -9.0279e-05, -1.9007e-04,
        -1.4859e-04,  1.2089e-04,  4.4026e-05, -4.8210e-05,  1.1622e-04,
         2.7836e-05, -8.8559e-06,  2.8742e-05,  3.0637e-05,  9.8263e-05,
         1.5038e-04, -8.3603e-05,  5.7335e-05, -1.1036e-04, -2.0970e-05,
        -1.6933e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5377e-05, -1.1858e-01, -7.7012e-05,  8.0991e-05,  1.1276e-05,
         2.4547e-05, -7.0457e-05, -2.4852e-05,  1.6643e-05, -1.7845e-05,
         3.0218e-06, -2.9531e-05,  1.4686e-05,  1.5208e-05,  8.6555e-06,
         7.3909e-05,  4.6352e-05,  8.3341e-05, -6.2648e-05,  4.2949e-05,
         3.3425e-05,  2.6609e-05,  3.3450e-05,  4.9044e-05, -2.4214e-06,
         3.6403e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5000e-07, -1.2528e-01, -6.0170e-05, -5.7580e-05, -2.4354e-05,
        -1.2122e-05,  5.8918e-05, -8.1706e-05, -1.4287e-04, -1.4833e-05,
         1.2797e-04,  2.8201e-05,  1.0112e-04, -1.8265e-06,  9.3641e-05,
         3.2667e-05, -3.7570e-05, -1.3103e-05,  8.3768e-05,  3.7120e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5662e-04,  2.3873e-01, -3.4604e-05, -9.0261e-05, -8.2822e-05,
         8.8751e-06,  4.5182e-05,  5.3056e-05, -1.5540e-04,  5.2002e-05,
         8.8898e-05, -6.7413e-05, -6.2129e-05,  3.3428e-05, -1.2313e-04,
        -5.6105e-05, -1.3089e-04, -1.6773e-04, -8.8428e-05, -1.5403e-04,
         1.5902e-04, -1.4760e-05, -1.2880e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #900: [tensor([ 2.3421e-04,  1.8571e-01, -4.1220e-05,  5.9774e-05,  8.7917e-05,
         4.0889e-05,  7.7027e-05,  1.1004e-04,  4.5127e-05, -1.9441e-05,
         1.2086e-05, -6.7196e-05,  5.9666e-05, -2.8242e-05,  1.9528e-04,
        -9.6492e-06, -5.8945e-05, -1.5437e-05,  3.2891e-05,  4.5892e-05,
        -1.7832e-05,  5.3255e-05,  1.0147e-05,  9.6728e-05,  2.0758e-05,
         8.7811e-07,  3.5418e-05, -1.6520e-05,  1.5664e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4409e-05,  1.5531e-01, -9.8112e-05,  1.0596e-04,  8.9011e-05,
        -7.3724e-05,  8.5624e-05,  4.7794e-05, -5.2522e-05, -5.8269e-05,
        -4.0979e-05,  8.7687e-05, -8.1304e-06,  2.7441e-05,  1.4202e-04,
         1.6345e-04,  1.2728e-05,  7.7005e-05, -1.4613e-05, -1.0062e-04,
        -1.9541e-05, -6.7335e-05, -2.3148e-05, -1.7193e-04, -7.4535e-05,
         9.0739e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.4996e-05,  1.4480e-01, -1.0885e-04, -1.0781e-04, -5.9542e-05,
        -1.0450e-04, -1.2423e-04, -6.5183e-05, -5.5764e-05, -6.0263e-05,
         7.7776e-06, -3.3626e-05, -2.6411e-05,  3.4020e-05, -1.1471e-04,
         5.6560e-05,  5.9288e-06, -1.7526e-05, -4.1032e-05,  2.0559e-05,
        -8.5558e-05,  3.5933e-05,  1.6811e-05, -6.4833e-05,  4.8778e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8027e-05,  1.9313e-01,  3.8646e-05, -1.8677e-05, -2.0287e-06,
         1.2279e-04, -1.5802e-04, -4.6985e-05, -3.3139e-05, -3.2837e-05,
        -6.0830e-05,  1.4244e-04, -6.6471e-05,  2.0376e-05, -1.4413e-05,
         6.9122e-05, -5.4146e-05, -1.7988e-05, -1.5603e-04, -1.4910e-04,
        -8.0674e-05, -3.7978e-05, -8.1341e-05,  6.8772e-06, -1.2651e-04,
         6.3899e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.9656e-04,  1.3876e-01, -8.7865e-06,  1.9947e-05, -3.1106e-05,
         7.4733e-06,  3.1346e-06, -1.9231e-05, -4.4450e-05, -1.7573e-05,
        -1.0336e-04, -5.0769e-05, -5.2428e-05, -1.3524e-05,  8.3138e-05,
         2.7247e-05, -8.2852e-06,  9.8057e-05,  3.1330e-05,  5.9630e-05,
         9.9348e-06,  2.1150e-05,  6.9725e-05,  4.9090e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2538e-04,  9.9318e-02, -3.4670e-05, -1.4792e-04, -1.2836e-04,
        -2.8563e-05,  2.9793e-05,  1.1023e-05, -1.9948e-05,  3.1622e-05,
        -5.2321e-05,  4.4612e-05, -1.7674e-05,  5.1796e-07,  3.6746e-05,
         6.4497e-05, -7.9160e-05,  1.7152e-05,  4.8406e-05,  5.5977e-05,
         7.5124e-05,  5.9888e-05,  2.0474e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8317e-04,  9.5791e-02, -4.2562e-05, -4.0846e-06,  4.6954e-05,
         4.7722e-05,  6.1194e-05,  1.3476e-05,  3.6096e-05, -8.1289e-05,
        -6.1955e-06, -3.5228e-05,  6.3286e-05, -3.8367e-05,  4.3085e-05,
         5.5679e-05, -2.2525e-05,  2.2585e-05, -2.4835e-05, -5.8978e-06,
         8.6049e-05, -1.4342e-05, -1.5246e-05, -3.3021e-05,  3.7906e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9395e-04,  2.0720e-01, -8.4934e-05, -2.6185e-05,  8.5394e-05,
         1.4176e-05, -1.7880e-04, -1.6640e-04, -1.1957e-04, -8.6399e-05,
         2.4300e-05, -8.4196e-05,  1.3561e-05,  6.0146e-05,  4.4240e-05,
         1.2022e-04, -3.7434e-05,  2.7674e-05,  2.4176e-05, -1.1744e-04,
        -2.4742e-05,  1.3052e-05,  7.7728e-05, -9.4670e-05, -1.8898e-05,
         4.5586e-05, -1.4632e-04,  5.1986e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3651e-04,  1.1222e-01, -5.7365e-05, -6.3817e-05,  1.3010e-05,
         7.7266e-05, -5.1795e-05,  1.1979e-05,  1.7909e-05, -8.0705e-05,
        -1.8275e-05,  3.0167e-06,  3.9645e-05, -4.5337e-05,  1.1744e-04,
         5.3926e-05,  5.2487e-05,  2.1702e-05, -2.9838e-05, -1.4732e-05,
        -6.7261e-05, -5.1646e-06,  1.7132e-05, -1.2855e-05, -3.8646e-05,
        -1.5076e-05,  1.3923e-05,  2.3687e-05,  5.5009e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8792e-06, -1.3319e-01, -2.5490e-05, -3.3581e-05,  1.7262e-05,
        -1.3204e-04,  7.4092e-06,  6.3376e-05, -5.9673e-05, -1.6546e-05,
         9.6205e-05, -2.6113e-05,  1.1879e-05, -1.0311e-05, -1.1709e-04,
        -3.9424e-05,  1.9836e-05, -2.7054e-05, -8.3303e-05, -4.1762e-05,
         2.0868e-05, -5.8584e-05,  6.8510e-06,  5.2463e-05,  3.3151e-05,
        -5.2056e-05,  8.0111e-06, -7.8937e-06,  2.1163e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8172e-04,  1.9246e-01, -5.9382e-05,  6.2630e-05, -9.5224e-05,
        -2.8515e-05, -4.6242e-05,  7.2154e-06, -4.2744e-05, -2.2698e-05,
        -1.0485e-04,  1.3689e-04,  1.7441e-05,  1.1741e-04,  1.8977e-06,
         4.5143e-05, -5.5662e-05,  1.2174e-05,  1.2459e-04,  1.1993e-04,
        -1.5393e-04,  5.5600e-05, -1.0975e-04,  1.1257e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5860e-04,  1.5417e-01, -5.0326e-05,  1.1793e-04,  3.9536e-05,
         2.3570e-05, -1.2176e-04,  7.4055e-05, -1.9369e-06,  1.7926e-05,
         6.9791e-05,  1.3562e-04,  3.7803e-05, -7.5645e-05,  2.4481e-05,
         1.0880e-04,  8.3696e-06, -6.3328e-05, -7.1003e-06,  7.6420e-05,
        -2.9901e-05, -5.2460e-05, -8.7053e-06, -7.3749e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #950: [tensor([-3.0579e-06,  2.5181e-02, -2.8986e-06, -3.2690e-05, -2.8246e-05,
        -2.2829e-05, -2.6333e-05, -1.8930e-05, -8.2223e-06, -1.1312e-06,
         2.3911e-05, -7.4916e-06,  9.7538e-06,  1.4692e-05, -4.0355e-05,
        -1.6561e-05,  4.6687e-05, -1.3366e-05, -4.3416e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3820e-04,  1.7820e-02, -4.7410e-05, -1.1312e-06, -3.1442e-05,
        -2.8201e-05, -4.5703e-05,  3.3229e-05, -4.3311e-05, -3.3723e-05,
         2.2601e-05, -1.3132e-05, -1.0396e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0667e-05,  2.4242e-02,  3.3978e-05,  4.6241e-06, -2.6313e-05,
        -1.9807e-05, -1.6271e-05,  2.1316e-05, -3.5375e-05, -1.9199e-05,
         8.5536e-06,  3.1271e-05,  2.3069e-05,  1.5408e-05,  5.3640e-06,
         4.9242e-06, -3.2887e-06, -1.7701e-05, -4.7156e-05,  7.9009e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6873e-05,  1.8801e-02,  4.0863e-06,  4.8287e-06,  8.3391e-06,
        -1.7951e-05, -1.2890e-05,  1.7138e-05, -3.2123e-06, -2.3796e-05,
         3.1295e-06, -1.0020e-05, -5.1758e-05, -3.7256e-06, -3.1296e-05,
         1.4251e-05, -1.0436e-05, -3.9583e-05, -1.8245e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8400e-04,  3.5929e-02,  9.7639e-06, -2.9626e-05, -1.3786e-05,
         1.3550e-05, -1.2683e-05,  1.3197e-05, -4.0338e-06,  1.6232e-05,
        -8.3450e-06, -3.4167e-05, -7.3318e-06, -3.8656e-06, -5.5190e-06,
         4.5890e-06,  9.1809e-06,  3.2961e-06,  2.5101e-06,  1.2013e-05,
         6.5548e-07,  3.7499e-05, -5.5046e-05, -3.5876e-06,  1.9373e-05,
         3.2672e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7335e-05,  2.0930e-02, -5.4827e-06,  1.4644e-05, -1.6097e-05,
         2.2488e-05, -7.7164e-06,  7.6617e-06, -3.9380e-05,  1.9411e-05,
         7.0351e-07, -1.2869e-06, -2.5015e-06,  5.4760e-06, -8.8893e-06,
         1.3317e-05, -5.4368e-06,  9.5846e-06,  1.8554e-06,  4.7178e-06,
         2.2290e-05,  2.0356e-05,  2.0438e-05,  1.1560e-05,  5.6813e-07,
         3.7059e-05,  2.1498e-05, -1.6248e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6361e-05,  2.7921e-02, -2.3008e-05,  4.7280e-06, -8.4700e-06,
         1.2368e-05, -4.5387e-07, -9.5130e-06,  9.8433e-06,  3.8989e-05,
         9.9157e-06, -1.9944e-05, -1.3394e-05,  1.5588e-05, -1.7321e-05,
         7.7662e-06,  2.6435e-05, -2.1541e-05,  5.6935e-06, -1.2637e-05,
         4.1692e-07,  2.4913e-05,  2.6276e-05,  1.4618e-06,  1.1144e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9431e-04,  2.0334e-02,  1.4010e-05,  4.0709e-06,  4.3948e-06,
        -3.6615e-05, -6.3107e-06,  4.6313e-06,  5.5104e-05,  1.3030e-05,
        -8.6170e-06, -1.1457e-05, -2.6026e-05,  2.1500e-05, -9.8090e-06,
        -4.5378e-06, -2.2769e-05, -3.8174e-06, -7.4801e-07, -1.9850e-05,
         3.3570e-06, -1.2274e-05, -4.9948e-06, -1.0576e-06,  4.2181e-06,
         1.2675e-05,  4.7000e-06,  1.1317e-05,  9.8063e-06,  1.2474e-05,
         2.7255e-05,  1.4652e-05, -9.2288e-06, -1.3080e-05,  1.9001e-05,
        -3.8582e-06, -1.4083e-07,  7.7106e-06,  1.4124e-05, -4.0610e-06,
         1.3806e-05,  6.5211e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1130e-04,  2.8748e-02,  1.3806e-05, -2.2483e-05, -7.2197e-07,
        -2.6315e-05,  1.1757e-05,  8.6262e-06,  9.4337e-06,  2.4596e-05,
         1.5983e-06,  8.7387e-06,  5.5424e-07,  2.6302e-05, -1.3578e-05,
         7.0297e-06, -1.6346e-05,  1.6684e-05,  1.2473e-05, -1.2436e-05,
         7.1332e-06, -1.5860e-05,  1.0387e-05, -8.7952e-06,  1.0740e-05,
        -2.9090e-06, -6.5740e-06, -3.0664e-05,  1.0499e-05,  3.6971e-05,
         4.9652e-05, -1.3497e-06, -1.6126e-05,  5.9965e-06,  2.1671e-05,
        -1.0311e-05,  1.9003e-05,  1.7598e-05,  6.6312e-06,  7.7854e-06,
         2.0189e-05,  6.3424e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6557e-05,  2.6583e-02,  8.1289e-06, -1.7463e-05, -1.1610e-05,
        -1.9470e-05,  7.5223e-06, -8.6278e-07,  2.9782e-05,  1.0427e-05,
         3.2520e-06, -2.4238e-07,  5.6887e-06,  2.1613e-05, -8.2415e-07,
         1.6818e-05,  1.4787e-05,  1.3162e-05, -1.3334e-06, -1.0720e-05,
         6.1602e-05, -9.6539e-06, -5.5305e-06, -1.2772e-05,  1.8170e-05,
        -5.9934e-06, -6.9682e-06, -2.7428e-06, -2.3904e-06,  4.0868e-07,
         1.4648e-05,  3.3089e-06,  1.0490e-05,  2.7717e-06,  3.0858e-05,
         3.2066e-06,  2.8185e-05,  4.3564e-05, -7.0539e-07, -4.4753e-06,
        -3.7501e-06, -1.3338e-06,  6.1242e-06,  8.2701e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.8019e-04,  3.0176e-02, -4.0460e-06, -2.4286e-05, -2.5191e-05,
         7.7786e-06, -3.7926e-05,  1.1912e-06, -1.6954e-05, -3.3922e-05,
        -4.2181e-05, -1.5641e-05,  6.2160e-05, -5.2990e-06, -4.6211e-05,
        -1.8370e-05,  1.3004e-06,  1.1882e-05, -7.2503e-06,  1.4766e-06,
         1.3588e-06,  2.5837e-05, -1.5956e-05, -1.5340e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8754e-04,  2.5658e-02, -3.8000e-05, -1.7100e-05,  3.4522e-07,
         2.2497e-05, -3.4234e-05, -2.1478e-05, -2.8985e-05, -4.6929e-05,
        -4.6667e-05, -1.5686e-05,  3.1125e-05, -3.1334e-05, -3.3416e-05,
        -2.3847e-06,  2.2990e-05, -1.4238e-05,  9.7049e-08, -7.4391e-06,
         8.9914e-06, -1.2447e-05, -3.0055e-05,  2.2989e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1000: [tensor([ 3.1198e-04,  9.2131e-02, -1.0814e-04,  5.8207e-05,  1.2276e-04,
         5.7968e-05, -6.6255e-05,  7.2530e-05,  2.3248e-05, -5.7628e-05,
         1.4604e-05,  9.4791e-05, -6.0587e-05,  2.4555e-05, -2.5368e-05,
         9.6110e-06,  8.3257e-05,  2.0825e-05, -8.5540e-05,  1.0126e-04,
         1.3659e-04, -6.8254e-05, -3.2479e-05,  9.5806e-05, -1.1162e-04,
         9.7747e-06,  4.8913e-05,  3.7637e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9732e-04,  1.2374e-01, -4.1526e-05,  7.0691e-05, -6.2598e-05,
        -1.4800e-05, -8.4068e-05, -7.4790e-05, -8.9273e-05, -2.0814e-05,
        -5.0109e-05, -3.7661e-05,  2.4555e-05, -1.0473e-04,  1.0805e-04,
        -1.7867e-04,  2.3577e-05, -1.4951e-04, -1.4798e-05,  9.0491e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0450e-04,  1.0946e-01,  3.4214e-05,  1.2783e-04, -1.1243e-05,
         4.6843e-05, -1.1690e-04, -1.0096e-04, -7.2952e-05,  6.0449e-05,
        -4.0804e-05, -2.8112e-05,  3.5914e-05,  3.8281e-06, -1.4236e-05,
        -6.5973e-05, -5.0424e-05, -4.1394e-05, -1.1289e-04,  4.4200e-05,
        -2.3144e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9091e-05,  1.2707e-01,  6.6889e-05, -5.0060e-05, -6.4220e-05,
        -7.6672e-05, -8.8587e-05,  7.0735e-05, -6.4964e-05,  1.6170e-05,
        -9.1648e-05,  5.6988e-05, -8.0177e-05, -1.0599e-05, -3.0732e-05,
        -1.0067e-04, -1.8874e-05, -3.8857e-05,  3.6121e-05, -3.4186e-05,
         5.6814e-05, -1.6418e-05, -4.0421e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2858e-05,  2.1237e-01,  4.8942e-05,  5.9764e-05,  8.2816e-05,
         2.1166e-05,  1.6672e-04,  1.8450e-05, -4.8257e-05, -6.8238e-05,
        -1.0504e-05, -2.1623e-06, -5.7987e-05, -4.7260e-05, -7.2121e-05,
         2.4310e-05, -1.0437e-05, -1.6534e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9762e-05,  2.2505e-01,  9.3736e-06, -5.2833e-05,  3.3102e-05,
         4.4392e-05,  8.3431e-05, -4.2879e-07,  1.6830e-04, -5.9252e-05,
        -7.9312e-05,  9.1768e-05, -1.2256e-04,  6.1867e-05, -7.4332e-05,
        -4.0176e-05, -2.2084e-05,  1.6916e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9700e-04,  1.4668e-01,  3.6714e-05,  1.6122e-04,  7.0491e-06,
         8.1161e-05,  6.3478e-05, -1.0151e-04,  5.4491e-05,  2.6229e-05,
         2.4996e-05,  6.2501e-06,  9.4826e-06,  4.4246e-05,  2.5961e-05,
         7.1814e-06,  1.1631e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4563e-04,  9.6737e-02, -2.6207e-05,  4.3602e-05,  1.0215e-04,
         5.4386e-05, -9.2471e-06, -2.2624e-05,  1.2042e-05,  6.4415e-05,
         2.6759e-06,  8.8238e-05, -8.7564e-06,  6.5135e-05, -1.5049e-05,
         2.3403e-05,  2.3952e-05, -3.1466e-05,  5.1850e-05,  2.9297e-05,
         4.4905e-05, -2.4370e-05,  6.3896e-05,  4.6676e-05,  5.3053e-05,
         2.0731e-05,  7.2121e-05, -8.9725e-05, -1.9128e-05, -1.3377e-05,
         4.5081e-05,  6.0396e-05,  1.1783e-04,  7.2267e-06,  4.5356e-05,
         2.9144e-05, -3.0821e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8756e-05,  9.0118e-02,  2.5543e-05, -7.6608e-05,  1.5736e-04,
        -1.2874e-05,  3.8527e-05, -2.2046e-05,  2.1980e-05, -4.2941e-05,
        -4.8198e-06,  2.2666e-05, -1.5358e-05, -5.3302e-06, -8.3620e-06,
        -3.1281e-05,  6.2086e-07,  8.9520e-05,  7.7701e-06,  4.4491e-05,
         6.1027e-06, -6.2641e-05,  6.2815e-06,  6.4926e-05,  3.4714e-05,
        -2.5494e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3545e-04,  4.5074e-02,  5.3955e-06, -5.5372e-05,  5.1709e-05,
        -8.6450e-06,  2.8514e-06,  1.0852e-05,  2.7237e-05,  4.6502e-06,
        -3.3852e-05,  4.5515e-05,  1.2010e-05,  1.1284e-05, -2.7801e-05,
         3.7024e-05,  1.7599e-05,  2.5612e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0293e-04,  4.7080e-02, -3.2728e-05,  3.5829e-05,  2.5999e-06,
        -4.4242e-05,  5.1741e-05,  1.7817e-06,  1.4590e-05,  2.4215e-05,
        -2.0012e-05,  3.8462e-07, -2.8143e-05,  1.1985e-05, -9.4986e-06,
        -3.8423e-05, -3.0846e-07, -5.4545e-06,  1.3987e-05, -4.4462e-06,
        -2.6971e-05,  6.5301e-05, -2.5279e-05,  3.4098e-05, -1.0808e-05,
         2.2553e-06,  4.8746e-05,  2.7792e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7438e-04,  3.7644e-02, -2.1581e-05,  5.2292e-05,  3.7904e-06,
        -2.9153e-06, -1.2612e-05, -1.2132e-05, -4.5099e-06,  2.7430e-05,
         3.1302e-05,  2.8277e-05,  1.4630e-05,  4.8316e-05,  1.7262e-05,
        -1.1168e-05, -1.4809e-05, -4.3801e-06,  2.1604e-05,  4.5325e-06,
         4.9845e-06,  2.8454e-05, -2.2000e-05,  6.0589e-06, -7.3846e-06,
        -5.1905e-06,  2.0597e-05,  7.6188e-06, -4.9886e-06,  2.6752e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1050: [tensor([-1.0494e-04,  1.6679e-01,  4.3974e-05,  3.5667e-05, -2.4047e-05,
        -2.7599e-05,  6.1885e-05,  4.6799e-07,  2.4611e-05, -2.4281e-05,
        -6.0252e-06,  4.8403e-05, -1.4139e-05,  3.6449e-05,  5.7814e-05,
         2.9669e-05, -2.8106e-05,  2.8308e-05, -7.8585e-06,  6.6117e-05,
         6.5448e-06, -1.1274e-05, -1.4670e-05,  1.8754e-05,  2.9471e-05,
        -3.4056e-05,  1.1025e-05,  1.1754e-05,  9.7212e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.1373e-05,  2.4208e-01, -5.7072e-05, -4.3056e-05,  1.4291e-04,
        -2.2633e-06,  9.2055e-06,  4.6337e-05,  3.8679e-05,  2.4985e-05,
        -4.1193e-05,  9.6272e-05,  7.7812e-05,  4.1787e-05, -1.0701e-05,
         2.3374e-05,  1.0641e-04, -3.3108e-05,  9.8066e-05,  2.7225e-05,
        -6.8684e-05,  3.6708e-05, -3.2946e-05, -4.5963e-05, -4.3057e-05,
         1.2967e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3791e-04,  2.8315e-01, -9.2255e-05,  5.3456e-06, -6.6966e-05,
         7.7263e-05,  2.3105e-05, -5.5305e-05, -1.6146e-07, -5.8565e-05,
         7.7824e-05,  7.6801e-05, -4.3771e-06,  8.0463e-05, -1.0155e-05,
        -6.1626e-05, -8.0955e-05, -6.8591e-06, -4.3503e-05,  1.3527e-05,
         1.3324e-04, -5.7903e-05,  3.2156e-05,  6.9332e-05,  5.7035e-05,
        -6.1778e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8968e-04,  1.2190e-01, -1.7084e-05, -2.8630e-06,  6.2206e-06,
        -3.4417e-05, -4.5815e-05,  1.5247e-05,  1.8107e-05, -4.6045e-06,
        -8.5265e-05,  2.4805e-05, -4.3521e-05,  8.7401e-06,  1.2221e-05,
         5.2371e-06,  4.3202e-06,  4.3154e-05, -9.3468e-06, -4.8412e-05,
        -7.6069e-07, -4.1187e-05,  8.9024e-06, -2.6792e-05, -1.2822e-05,
         5.7274e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9729e-05,  1.4374e-01,  1.4938e-05, -1.0554e-04,  1.1803e-04,
         1.4069e-05,  2.7327e-05,  7.1446e-05, -3.0904e-05, -2.0575e-05,
         1.0216e-04,  4.9582e-05, -5.3540e-05,  1.4866e-06, -3.0937e-05,
        -1.4828e-05,  1.7019e-05, -6.0815e-05, -2.6496e-05,  4.7236e-05,
         2.3136e-05, -5.3924e-05, -2.5658e-05,  2.1603e-05, -1.1373e-04,
        -4.7028e-05, -1.8695e-05, -3.4683e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4371e-04,  1.2099e-01, -4.6488e-05, -5.1552e-06,  3.7606e-05,
        -6.1206e-05, -1.3688e-05, -5.1409e-05, -5.1314e-06, -3.5414e-05,
        -6.0326e-05,  1.0057e-04, -9.3314e-05,  6.5411e-05,  1.6689e-05,
        -1.9548e-05,  3.4073e-05, -4.4740e-05,  2.7658e-05, -4.3291e-05,
        -6.9740e-05,  4.0752e-06,  4.1168e-05,  1.2820e-05, -4.1703e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8416e-04,  1.9480e-01,  4.8084e-05,  2.6005e-05, -5.9228e-05,
        -1.9529e-05, -8.8836e-07, -9.9793e-05, -7.4853e-05, -7.8051e-05,
        -7.1426e-06, -7.7380e-05,  1.6976e-05,  1.6718e-05, -4.4632e-05,
         1.5343e-05,  2.0439e-05, -1.4476e-04, -8.4683e-05,  7.3248e-05,
         2.9706e-04, -1.0766e-04, -6.9384e-05, -8.5424e-05, -6.7961e-06,
        -4.9754e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0450e-04,  9.2189e-02,  1.3067e-05,  3.0174e-05, -6.2205e-05,
        -4.4032e-05, -1.9991e-05,  6.7244e-06, -1.7392e-05,  2.5472e-05,
        -1.1370e-05, -1.6358e-05,  4.0345e-05, -7.4106e-05, -2.3785e-05,
        -1.4222e-05, -6.9613e-05,  1.9823e-05, -2.9938e-05,  1.1786e-05,
        -1.9219e-05,  2.0614e-05,  3.6295e-05, -1.2062e-06, -3.3478e-05,
        -4.1254e-05, -4.4533e-05,  2.9162e-05, -2.1640e-05, -7.7834e-06,
         3.9653e-05, -4.5910e-05,  1.6274e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1211e-05,  1.4084e-01,  3.8692e-05,  5.7387e-05,  6.4372e-05,
         7.7968e-05,  5.0762e-05, -1.1552e-05,  6.5079e-05, -3.8778e-05,
         1.1930e-04,  1.0801e-04, -5.0724e-05, -6.7055e-05, -3.0526e-05,
        -1.8197e-06,  1.8892e-05, -2.2923e-05,  1.2827e-04,  5.8773e-05,
        -3.4886e-05,  2.4306e-05, -4.9074e-05, -7.6379e-07,  2.9100e-05,
         4.7253e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4370e-04,  1.4409e-01,  3.9244e-05, -1.5475e-05, -1.7163e-06,
        -5.3657e-05,  9.7027e-05,  5.6938e-05,  3.7291e-06, -7.3684e-06,
         1.3227e-05,  4.9525e-05,  6.7211e-05,  2.0181e-05, -3.9010e-05,
         2.6449e-05, -7.8555e-05, -1.2542e-05,  3.0607e-05,  3.8191e-05,
        -8.1869e-06,  9.6506e-05, -7.5677e-07,  1.7089e-05,  2.8838e-05,
        -1.6777e-05, -1.5837e-05,  8.2885e-06,  1.7240e-05,  2.1126e-05,
        -1.5279e-05, -4.5218e-06,  2.9394e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7598e-05,  9.3460e-02, -8.6557e-07, -7.4468e-05,  6.8443e-05,
         9.5288e-05, -7.6752e-05, -7.1630e-05, -2.3388e-05, -6.3894e-06,
         1.5603e-05, -1.2907e-05,  2.5161e-05,  1.1935e-05, -2.3135e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4363e-04,  1.1326e-01, -5.7913e-06, -4.0377e-05, -1.4796e-05,
        -1.3451e-05, -1.3059e-04,  1.8480e-05,  3.4996e-05,  2.0390e-06,
        -6.8142e-05,  3.7038e-05,  3.2780e-05,  3.8664e-05, -3.9152e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1100: [tensor([ 6.9264e-05,  1.8193e-01, -7.5076e-05,  6.2562e-06,  4.5708e-05,
         5.0108e-05,  5.1311e-05, -3.7060e-05, -4.8538e-06,  6.8570e-05,
         9.8591e-05, -4.1185e-05,  4.1757e-05, -4.5465e-06,  7.0132e-05,
         1.9769e-05,  1.9182e-05,  9.0504e-06,  7.3498e-05,  4.8032e-05,
         4.8026e-07, -6.3580e-06, -1.6060e-05,  8.6265e-06,  5.1139e-07,
        -4.8208e-05, -1.7828e-05,  9.0780e-06,  3.7752e-05, -3.7679e-05,
        -7.3928e-06,  3.4286e-05, -9.3411e-06,  9.4060e-06, -3.8854e-05,
         6.1223e-05,  7.0483e-06, -6.6238e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6409e-04,  2.3133e-01,  1.1148e-05, -2.4885e-05,  4.3805e-06,
        -3.2898e-05, -2.4215e-05, -5.0026e-05, -1.0997e-04,  1.2438e-05,
        -8.1716e-06, -2.4366e-06, -2.5870e-05, -2.0582e-05, -9.5556e-06,
        -1.5906e-05, -6.3044e-05, -7.6711e-05,  1.0707e-05, -6.6170e-06,
         3.1689e-06, -3.8971e-05,  2.9451e-05, -6.3451e-07,  1.0463e-04,
         5.2200e-06,  3.5519e-05, -8.0445e-06, -6.7682e-05,  2.6956e-05,
         1.3775e-05, -4.3743e-05,  5.7831e-05, -1.3240e-07,  2.1290e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4890e-04, -1.0268e-01,  2.0525e-05, -2.2751e-05,  4.7464e-05,
        -1.1199e-05, -2.6049e-05,  1.8649e-05, -5.4322e-06, -4.5247e-06,
         3.1880e-06,  1.2770e-05, -3.0297e-06,  7.6455e-06, -1.5278e-06,
         1.5984e-05, -1.6090e-07, -8.2268e-06, -3.9860e-05, -3.9024e-05,
        -3.6662e-05,  4.6075e-05,  1.7740e-05, -7.6059e-06, -1.7849e-05,
         8.5028e-06,  2.1794e-05, -7.4222e-05, -8.7170e-06,  1.3891e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3917e-04, -9.1187e-02,  3.5250e-05, -3.8967e-05,  1.8154e-05,
        -4.0481e-05, -1.8133e-05, -2.9921e-05, -7.9495e-05, -5.1805e-05,
         1.5145e-05, -3.0822e-05, -3.2359e-05,  2.1348e-06,  1.8833e-06,
        -1.0562e-06,  1.1682e-05, -3.0653e-05, -2.0950e-05,  5.5301e-06,
        -1.6639e-05, -2.6333e-05,  4.5239e-06,  1.2358e-05,  2.7350e-05,
         3.3931e-05, -4.7498e-05,  2.0392e-05, -4.2259e-05,  2.1699e-05,
         1.7150e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3161e-04,  2.6531e-01, -2.4417e-05,  1.3591e-05, -6.0118e-05,
         7.5174e-05,  8.2583e-05, -1.3263e-06,  9.6935e-05,  6.7410e-05,
         5.0621e-05, -6.6101e-06,  4.5995e-05, -7.9839e-05, -4.9346e-05,
        -3.4704e-05,  3.0363e-05, -3.0139e-05,  1.1749e-04,  6.5143e-05,
         3.6360e-05, -4.8879e-06, -1.9460e-05,  3.6671e-05, -7.5177e-05,
         2.6071e-06,  2.7844e-05, -9.2125e-07,  2.5020e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5728e-04, -1.1892e-01,  1.3819e-06,  1.8170e-05,  4.8959e-05,
         3.9070e-05,  3.1549e-05,  7.8456e-06,  1.9587e-05,  2.5567e-05,
         9.3979e-06,  1.5356e-05,  2.1501e-05,  8.1752e-05,  3.1887e-06,
         5.1537e-05,  5.8235e-05,  4.1426e-05, -1.4649e-05, -2.6995e-05,
         4.7802e-05,  1.9808e-05, -2.0512e-05, -4.3989e-05,  2.0369e-05,
         1.1349e-05, -2.6652e-06,  9.9739e-06, -5.5569e-06,  7.1645e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1152e-04,  2.7400e-01,  1.0384e-04,  8.0737e-05,  1.6123e-05,
        -2.5442e-05,  6.7615e-05, -1.8814e-05, -1.0084e-05, -2.5421e-05,
        -3.6803e-05,  1.8332e-05,  8.6431e-05,  3.5863e-05,  2.4987e-05,
         3.5420e-05, -5.0315e-05, -3.6470e-05,  2.3829e-06,  3.5748e-05,
         3.9106e-06, -1.2088e-04,  2.7048e-05,  2.0843e-05,  4.8618e-07,
         3.4409e-05, -4.9380e-05,  5.9573e-05,  2.7002e-05, -8.6683e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.4694e-05,  2.6172e-01,  7.2421e-07,  1.6084e-06, -4.1053e-05,
        -4.0967e-05,  9.9866e-05,  1.3704e-05,  8.4650e-05, -2.9924e-06,
         1.4512e-05, -1.4041e-06, -4.2138e-05,  3.5044e-05,  5.7399e-05,
        -3.5828e-05,  1.8323e-05, -1.0644e-05,  5.6311e-05,  2.9758e-05,
         6.3238e-05,  2.7390e-05, -2.6970e-05, -3.8027e-05,  1.6320e-05,
         2.7999e-05, -2.4568e-06,  1.1564e-04,  1.7024e-05, -3.9549e-06,
         4.7465e-05, -3.8694e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6656e-04,  2.8279e-01, -3.2857e-05,  1.3131e-04, -7.0685e-05,
         4.1651e-05, -9.5874e-06,  5.0646e-05, -5.5920e-05, -7.6354e-05,
        -7.1730e-05,  2.7408e-07, -3.0363e-05, -9.8729e-05, -4.0387e-05,
        -5.1572e-05,  1.8312e-05, -7.1113e-05,  3.9407e-05, -5.8568e-05,
        -3.5435e-05, -1.9811e-06,  5.9963e-05,  2.1817e-05, -2.5349e-05,
         1.2083e-05,  3.2017e-05,  4.9911e-05,  5.9196e-05, -3.4336e-05,
         7.1647e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1553e-04,  2.9340e-01,  1.6384e-05, -1.3709e-04, -7.2494e-05,
        -1.2657e-04,  3.3181e-05, -1.4059e-05,  4.2229e-05,  7.9333e-05,
        -1.6669e-04,  4.8837e-05,  1.4081e-04, -1.6850e-04,  3.9499e-05,
        -8.2328e-05,  2.0082e-04,  3.9615e-05,  3.9686e-05,  8.1987e-05,
        -8.9284e-05, -9.1047e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6665e-05,  2.4143e-01, -5.6229e-05, -2.4055e-05,  1.9452e-04,
        -9.0971e-06, -1.2283e-05,  3.7764e-05, -5.0470e-05, -9.2183e-05,
         1.3364e-04, -6.0341e-05, -3.1994e-05,  1.4751e-04,  1.3190e-04,
        -8.3332e-05,  1.1391e-06,  3.5263e-05,  3.1258e-05,  1.3013e-04,
         8.2480e-06,  1.0556e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9359e-04, -1.2127e-01,  5.2514e-05,  5.4012e-05,  8.1648e-06,
        -1.5667e-05,  5.3065e-05, -1.3918e-06, -4.7273e-05,  3.4455e-05,
         1.8628e-07,  1.4590e-05,  4.7894e-05, -3.4789e-05, -1.7659e-07,
         4.4158e-05, -5.1396e-06, -1.3174e-05, -2.8923e-05,  2.9415e-05,
         2.3223e-05,  1.6378e-05, -8.1009e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1150: [tensor([ 6.4226e-04,  2.7784e-01,  3.4339e-05, -1.0534e-05,  2.8860e-05,
        -7.1909e-05, -2.8224e-05, -1.8688e-05,  7.1278e-06, -6.6779e-06,
        -5.3364e-06, -1.3547e-05, -4.9682e-05, -3.6699e-05,  6.4499e-05,
        -2.7492e-05,  2.8355e-05, -6.0309e-06,  2.2990e-05,  4.1928e-05,
         7.0469e-05, -1.5596e-05,  6.2461e-05, -2.3715e-05,  3.4619e-06,
         2.6290e-06, -5.9571e-05,  5.4976e-05,  2.8934e-05, -5.6043e-05,
        -2.1994e-05, -4.3642e-05, -4.6968e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9273e-04, -1.8271e-01, -2.7895e-05,  1.6894e-05,  2.6953e-05,
        -3.8251e-05, -5.8183e-05,  4.4104e-05, -1.2917e-06, -5.5256e-05,
        -2.7744e-05,  4.4011e-05,  1.2460e-05, -5.9758e-06, -2.6162e-05,
         1.7991e-05, -8.2684e-07,  5.5606e-06,  4.5015e-05, -1.5412e-05,
        -6.9446e-05,  3.8852e-06,  1.6774e-05, -8.1290e-06, -3.5983e-05,
        -6.4523e-06, -3.5717e-05,  6.8299e-07, -5.4652e-06,  7.2191e-06,
        -4.9145e-05, -3.9442e-05, -6.3427e-06, -1.0268e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4845e-04,  1.7646e-01, -2.6456e-05,  2.3254e-05, -7.5342e-05,
        -2.0918e-05, -4.9741e-05, -6.1326e-05,  4.9499e-05,  6.2551e-05,
        -8.2550e-05, -7.5424e-05,  1.0002e-05, -3.3770e-05, -7.0889e-05,
        -1.5472e-05, -3.0019e-05, -7.4770e-05, -7.5905e-06,  5.5866e-05,
         7.5633e-05, -5.0411e-05, -3.0013e-05, -9.9313e-06,  7.0277e-05,
        -5.4067e-06,  8.3037e-06, -7.9029e-05,  9.1735e-06, -3.8910e-05,
         7.1186e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.1005e-05, -1.1511e-01,  1.8070e-05,  6.5793e-05,  2.9475e-05,
         8.1058e-06,  1.0210e-05,  3.4952e-05,  2.8510e-05,  2.2468e-05,
        -7.5195e-07,  9.7876e-06, -1.2907e-05,  4.6185e-05,  3.2974e-05,
         6.7810e-05,  2.5535e-06,  5.6280e-05,  1.0721e-05, -2.6117e-05,
        -2.1898e-06, -2.3757e-05,  2.5967e-05, -1.1424e-05,  9.5756e-07,
         6.6458e-05,  3.5413e-05,  6.1377e-05,  5.7921e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6453e-05,  1.2831e-01, -4.1422e-05,  2.7907e-06,  6.5362e-05,
        -2.8957e-05,  2.6888e-06,  2.4001e-05,  7.4571e-05,  1.6724e-05,
        -4.1579e-05, -2.0000e-05,  1.0998e-04, -1.4461e-05,  4.6361e-05,
         3.5359e-05, -2.6998e-05,  1.8092e-06,  4.9826e-05,  3.7368e-06,
         1.8382e-05, -3.3509e-05, -4.4929e-06,  2.0979e-05, -2.0194e-06,
        -1.3725e-05, -2.2052e-05,  7.2783e-05, -1.1198e-05,  1.7047e-06,
         1.0319e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9155e-04,  1.7290e-01,  6.1022e-05, -1.2121e-04, -9.8052e-06,
         1.0625e-04, -8.0620e-05, -3.7101e-06,  1.2582e-04,  2.2197e-05,
         4.5418e-05,  2.7276e-04,  1.0439e-04, -5.5543e-05, -9.1271e-06,
        -4.9419e-05,  2.5659e-04,  3.7065e-05, -5.5269e-05,  5.4795e-05,
         5.7081e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2972e-05,  7.2542e-02,  2.5304e-05,  1.0169e-05,  2.1074e-05,
        -5.3046e-06, -1.1969e-04,  5.6252e-05,  4.0352e-05, -5.5694e-05,
         2.3632e-06, -1.8471e-05,  1.0728e-05,  7.0399e-07,  6.4727e-07,
         6.5822e-05, -3.2738e-05,  5.3241e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3579e-04,  1.9716e-01,  1.2251e-04,  8.6956e-05, -2.1488e-05,
         9.9320e-05,  8.6408e-05,  9.8532e-05,  1.4878e-04,  3.5446e-05,
         7.6206e-05,  1.0987e-04, -1.7658e-05, -5.3043e-06, -3.0418e-06,
        -8.0002e-05, -1.7848e-04, -6.5072e-05,  5.7968e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0627e-04,  1.9923e-01,  9.8206e-05, -1.5421e-05,  1.6003e-04,
        -1.8536e-04,  1.1925e-04, -5.7951e-05,  3.5311e-05,  6.6533e-05,
        -1.3859e-04,  1.2935e-04, -5.1073e-05,  6.6558e-06,  1.6764e-04,
         9.1213e-05,  1.7559e-05, -1.5835e-04,  8.6085e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1563e-04,  1.9720e-01,  8.2223e-06,  1.0064e-04,  2.0048e-04,
         6.6763e-06, -1.7546e-04, -2.0072e-04,  1.4054e-04,  1.9481e-04,
        -1.2684e-04, -3.7405e-05,  6.5864e-05, -1.4723e-04,  6.2168e-05,
         9.2949e-05, -7.0203e-05, -1.4760e-06, -9.0611e-05, -1.8467e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9715e-05,  1.9805e-01, -1.0581e-04,  1.4274e-05,  3.0849e-05,
        -2.9242e-05, -3.5240e-05, -6.4394e-06, -4.7108e-05,  2.0201e-05,
        -6.6997e-05, -7.6779e-05,  3.6825e-05,  6.4372e-06,  1.5531e-04,
        -4.0741e-05,  5.7766e-07,  5.1624e-05,  5.9096e-06, -6.5959e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0371e-04,  2.0911e-01,  6.0649e-05, -2.2256e-05,  3.0164e-05,
         1.4503e-04,  9.0967e-05,  5.2956e-05,  1.4444e-04,  1.6048e-04,
         3.6708e-05, -2.6764e-05, -9.5080e-05,  9.0898e-05,  4.1925e-06,
        -1.0651e-04, -4.0661e-05, -1.4278e-04,  5.0120e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1200: [tensor([-1.0673e-04,  1.6649e-01,  3.7595e-04, -1.4415e-04,  3.6819e-05,
        -3.6091e-05,  1.2948e-04,  1.4089e-05,  2.9783e-04, -9.8807e-05,
         7.8657e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4551e-05,  7.4389e-02,  1.6992e-04, -5.1523e-05, -3.9656e-05,
         6.5643e-05,  3.8471e-05,  1.5243e-04,  3.1027e-05,  4.3178e-05,
         5.1411e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1189e-04,  1.7557e-01,  1.6006e-04, -1.7171e-04,  2.1326e-06,
        -4.3988e-05, -7.8943e-05, -1.1525e-04, -1.4634e-04, -4.8974e-05,
        -2.1421e-04, -1.1074e-05, -9.5523e-05,  3.6250e-05,  2.9925e-05,
         7.0856e-05,  1.5252e-05,  3.3867e-06,  2.9686e-05, -1.2822e-05,
        -9.4859e-05, -6.6723e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1127e-04,  2.4971e-01, -5.1799e-05, -3.8258e-04, -5.8420e-05,
         1.4348e-04, -7.9797e-05, -1.4359e-04,  2.9780e-04, -1.2552e-04,
        -3.3813e-05,  2.0045e-04,  1.2668e-05, -2.9047e-05, -3.6758e-04,
         7.4153e-05,  8.3194e-05, -2.1074e-05, -2.8263e-04, -2.0830e-04,
         4.4783e-05, -9.6884e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8661e-04,  1.7438e-01, -2.8619e-05, -1.8609e-04,  2.6412e-05,
         5.5475e-05,  9.5857e-07, -4.3618e-05,  1.2026e-04,  1.5062e-04,
         4.6375e-05,  5.1542e-05,  7.3708e-05,  1.3809e-05, -1.2562e-04,
        -2.9740e-05,  2.6684e-05, -2.4163e-05, -7.2068e-05, -3.0918e-06,
         9.5846e-05,  5.7615e-05, -1.2406e-04, -2.8787e-05, -7.3159e-05,
        -1.0258e-04, -1.0620e-04,  2.1985e-05,  4.0611e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8918e-05, -1.4918e-01, -3.2360e-05, -4.1315e-05,  6.3179e-05,
        -2.0656e-05,  1.7305e-05,  4.5661e-05, -5.1056e-05, -2.0588e-05,
         3.3471e-05,  3.1000e-05, -8.0249e-06, -5.7139e-05, -7.8761e-05,
        -7.3997e-05,  8.6321e-05, -4.5363e-05, -2.8452e-05, -3.5846e-05,
        -4.9208e-05, -9.8975e-06, -4.8043e-05,  2.1414e-05, -1.1045e-05,
         5.5158e-05,  1.8158e-05, -3.6289e-05, -5.2675e-05, -1.1953e-05,
        -2.6860e-05,  2.1705e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.4819e-05,  2.4342e-01, -5.7413e-05,  4.7606e-05, -7.8416e-06,
        -4.6430e-05, -6.2220e-05, -7.3185e-05,  6.4211e-05, -7.6490e-05,
        -4.4703e-05, -3.4334e-05, -3.3110e-05,  3.9032e-05, -6.0748e-05,
         1.7053e-05,  8.8210e-05, -2.8071e-06,  9.6452e-06, -9.1819e-05,
        -9.9364e-05,  4.3904e-05,  2.8002e-05, -1.9572e-05, -7.6598e-05,
        -1.9825e-05,  2.1407e-05, -3.3783e-05, -3.8537e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3802e-05,  2.9259e-01, -4.4342e-05, -4.4908e-05,  2.2007e-05,
         5.5958e-06, -1.7475e-05,  1.4933e-04,  1.2580e-05,  1.0570e-05,
         5.1296e-06, -5.6064e-06, -1.9432e-04,  2.8590e-05,  7.9973e-05,
        -4.8389e-05,  1.2476e-04,  3.3521e-05,  1.8228e-05, -3.6931e-05,
         6.2316e-05,  6.7439e-06,  6.6641e-05, -3.5014e-05,  1.0634e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9747e-04,  6.4626e-02,  4.8098e-06, -4.4091e-06, -3.2309e-05,
         5.5611e-06, -1.5365e-05,  5.5241e-06, -5.5046e-05,  3.0189e-05,
         2.5877e-05,  2.7620e-05,  1.4088e-05,  2.7269e-06,  2.6660e-05,
        -9.9202e-06, -2.0182e-05,  5.2921e-06,  1.7536e-05, -6.6938e-05,
        -2.2233e-05, -4.1545e-05,  1.0367e-05,  6.9614e-06,  2.0660e-05,
        -4.6257e-05, -5.5443e-05, -7.9950e-06,  6.0215e-06,  2.4062e-05,
        -1.1554e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7635e-05,  6.5332e-02, -3.5216e-05,  6.2826e-06,  2.8431e-05,
        -2.8764e-05,  3.2336e-06, -1.0790e-04, -1.7756e-05, -2.4272e-05,
         5.3379e-06, -4.0330e-06,  3.3636e-07, -3.6974e-05,  2.0183e-05,
         5.2122e-05, -8.8003e-06,  2.1479e-05,  4.6940e-05, -2.9960e-05,
         5.4236e-05, -4.6720e-05, -2.8482e-05,  6.4229e-07,  4.2551e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.5250e-05,  9.5763e-02, -3.1183e-05,  4.3027e-05,  9.4494e-06,
         8.9855e-07, -2.8926e-05, -1.8492e-05, -5.8516e-05,  3.9763e-05,
         1.7338e-06,  3.8098e-05, -3.4950e-05,  7.0744e-06, -4.0575e-05,
        -6.6699e-05, -4.7506e-05,  7.7266e-06, -7.9366e-05,  8.7200e-06,
        -2.7232e-06, -3.1737e-05,  2.2377e-06,  8.9759e-06, -2.3621e-06,
        -2.5906e-05, -1.1174e-04,  1.5768e-05, -8.0741e-05, -1.7227e-05,
        -3.7775e-05, -1.6074e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5125e-05, -1.6895e-01, -8.8047e-05,  1.0459e-05, -2.9829e-05,
        -2.3482e-05, -8.0776e-05, -2.6291e-05, -2.2215e-05, -1.3201e-05,
         5.5972e-05, -2.3406e-05, -3.9135e-05, -3.1710e-05, -1.4180e-05,
        -3.1857e-05, -2.5423e-05,  8.7434e-06, -5.5385e-06, -1.7708e-05,
         8.0503e-06, -7.5982e-06, -9.7773e-06,  2.2561e-05, -7.1089e-05,
        -1.3639e-05,  3.6683e-05,  3.8308e-06,  2.9607e-05, -3.9950e-06,
        -4.3949e-05, -9.2365e-06, -5.1638e-05, -3.0730e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1250: [tensor([-1.4955e-04,  2.3526e-01, -6.7295e-06,  1.3455e-06,  1.0753e-04,
         7.0553e-05,  9.2824e-05,  8.3636e-05,  2.0778e-05,  2.0761e-05,
         6.4595e-05, -2.0856e-05,  4.3908e-05, -9.9246e-05, -4.0905e-05,
         1.1253e-04, -1.8411e-06, -9.5696e-05,  6.5059e-05,  3.6511e-05,
        -1.7850e-05, -7.8821e-06,  6.5688e-05, -9.9640e-05, -3.0330e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9091e-04,  3.4756e-01, -5.9922e-05, -6.7468e-05,  8.4891e-05,
         6.0212e-05,  1.0961e-04, -1.0253e-04, -6.8347e-05, -5.0144e-05,
        -3.9920e-05, -1.0251e-06,  4.1741e-05,  1.5343e-04,  2.1040e-05,
        -7.7632e-05, -6.7859e-05,  1.9902e-05,  6.9882e-05,  9.2468e-06,
         8.3848e-05, -4.7144e-05, -6.5341e-05,  3.2386e-05,  3.8502e-05,
        -2.2528e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5721e-04,  4.2765e-01, -1.7520e-04, -4.5094e-05, -1.4424e-05,
        -1.0823e-05,  6.3277e-05, -2.1052e-06, -5.7510e-05, -6.3254e-05,
        -2.2402e-04, -5.9973e-05,  4.2030e-05,  2.4559e-05,  4.6557e-05,
        -1.4940e-05,  3.7294e-05,  3.6584e-05, -4.0479e-05,  6.9494e-06,
        -3.0744e-05,  5.5816e-05,  8.0541e-05, -3.6653e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7630e-05, -2.4947e-01,  6.2600e-05, -1.4639e-05, -1.0081e-04,
         5.9039e-05,  7.5798e-05, -5.9692e-05, -6.6904e-07, -7.8871e-05,
        -4.5531e-05, -8.8366e-05,  3.5107e-05, -4.7650e-05, -3.4053e-05,
         4.3115e-05,  2.7351e-05, -4.7358e-05,  6.9728e-06, -4.2530e-05,
        -3.8333e-05, -2.2142e-05, -8.8818e-05,  2.7375e-05, -8.4369e-05,
         8.9438e-06, -4.6401e-05,  1.8330e-06, -8.1207e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1697e-05,  4.4044e-01, -7.7286e-05,  1.1757e-04,  1.6444e-04,
         1.5295e-04,  7.1884e-05,  9.7461e-05, -2.7945e-05,  1.6621e-04,
         9.8109e-05,  9.7048e-05,  9.0609e-05,  8.8036e-05, -1.8429e-04,
        -2.1355e-05,  9.8022e-05,  3.1814e-05,  1.2409e-04,  3.8745e-05,
        -5.0435e-05, -1.3850e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6060e-04,  1.6589e-01, -2.8253e-06, -1.0368e-04, -6.9271e-06,
        -5.6089e-05, -8.5100e-05,  2.0299e-05, -3.4985e-06,  5.2981e-05,
         2.8147e-05, -3.0555e-05, -7.6634e-06, -5.6149e-05, -6.4579e-05,
         1.5513e-05,  5.2591e-05, -4.6771e-05,  2.2442e-05, -2.5778e-05,
        -1.9143e-05, -2.8672e-05, -4.5264e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4957e-05,  3.6342e-01, -7.2001e-05,  1.5946e-04, -7.1394e-05,
        -6.3324e-05,  4.1441e-06, -1.7657e-04,  1.1406e-04, -1.3602e-05,
         6.6964e-05, -1.1146e-05,  2.9305e-05,  7.5212e-07, -1.0806e-04,
         9.7495e-05, -7.4636e-05,  1.0897e-05,  2.0146e-04, -3.4623e-05,
        -1.1141e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2146e-04,  2.0060e-01,  1.1390e-04,  3.0437e-05,  4.5763e-05,
         1.6433e-05,  9.6408e-06,  1.2049e-05, -1.0365e-04, -2.7476e-05,
        -1.5252e-04,  4.4000e-05,  1.9371e-04,  2.5389e-05, -1.3033e-04,
         1.2321e-04,  5.6878e-05, -9.1250e-05,  1.1408e-04,  9.8973e-05,
         3.2233e-05, -5.6382e-05,  6.3833e-05, -9.6128e-05, -1.1975e-04,
        -3.4166e-05,  1.1366e-04,  1.7932e-04,  2.7068e-05, -2.2175e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7899e-04,  1.5528e-01, -3.0249e-06,  1.5399e-04, -5.4465e-05,
         5.7910e-05, -9.5683e-05, -2.9890e-06, -1.1633e-04, -2.8564e-05,
        -1.0131e-04, -6.4517e-05, -2.6817e-05, -5.3974e-05, -1.0077e-04,
        -6.4194e-05,  3.9094e-06,  5.4780e-05,  7.5982e-06, -6.1611e-05,
        -1.4087e-05, -1.8544e-05,  1.0879e-04,  5.2797e-05, -1.5610e-05,
        -2.3032e-06,  2.9676e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7259e-04,  1.8554e-01,  1.2167e-05, -5.6948e-05, -5.4591e-05,
         1.3826e-04, -9.4751e-05,  6.7932e-05, -1.1709e-04, -3.7642e-05,
        -9.1068e-05, -2.1548e-05, -6.5342e-05,  2.9030e-05, -5.0415e-05,
         1.9385e-05,  9.0304e-05, -2.5156e-04, -1.9866e-05, -4.4762e-05,
        -8.4750e-05,  1.0177e-04, -5.1850e-05,  1.7798e-05, -2.9851e-05,
         9.7597e-05,  6.5415e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.5504e-05,  2.3035e-02, -1.9016e-05,  1.2989e-05, -8.9781e-07,
        -1.3038e-06,  2.3385e-05,  4.1985e-05, -1.4109e-05,  1.1376e-05,
         9.2702e-06,  4.2078e-05,  2.4526e-05, -3.2987e-05,  3.5295e-06,
        -2.4471e-05,  1.6014e-05,  4.3380e-06,  2.0289e-05, -9.0439e-06,
        -1.0219e-05, -8.3516e-06,  1.6406e-05,  1.3109e-05, -9.4503e-06,
         2.0116e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9557e-05,  2.7997e-02, -4.7339e-05,  3.1285e-05, -1.4737e-05,
         9.8841e-06,  7.8005e-05,  5.2903e-05, -4.9211e-06,  5.8447e-05,
        -2.1403e-05,  1.1170e-05,  2.7376e-06, -6.3433e-06, -1.0165e-05,
        -2.6220e-06,  1.7165e-05,  5.2259e-05,  7.1162e-05,  8.1457e-06,
         2.6187e-05,  2.8903e-05, -4.2135e-05, -2.5415e-05,  5.5383e-06,
         2.0190e-05,  9.8647e-06, -1.0327e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1300: [tensor([-8.4836e-05, -2.0039e-01,  1.2184e-05,  8.7087e-05, -3.0164e-05,
        -3.2854e-05,  5.1064e-06, -5.9195e-05,  8.3977e-05, -1.9464e-05,
        -4.9636e-05,  9.0238e-05, -4.3516e-06,  1.8600e-06, -5.3978e-05,
        -2.8856e-05, -4.7094e-08,  4.7879e-06,  4.9403e-05,  6.8547e-06,
        -3.8372e-06, -3.6190e-05,  9.5815e-08, -3.6864e-05, -1.3035e-05,
        -3.1668e-06,  2.0981e-05, -6.6823e-05, -2.3784e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2160e-05, -1.4542e-01, -3.9524e-06,  1.1724e-05, -2.1268e-05,
         5.5324e-05, -1.6874e-05, -1.4206e-05, -3.2708e-05,  6.4365e-05,
        -2.6121e-05, -4.1960e-07,  1.4232e-05,  2.5719e-06,  2.9697e-05,
         3.3903e-05,  1.2034e-05, -1.4938e-05,  5.9723e-06,  1.7906e-05,
         1.2896e-05,  2.9175e-05, -1.6113e-05, -1.5911e-05,  1.7919e-06,
        -9.2696e-06,  3.0410e-05,  1.3863e-05,  6.5408e-06,  1.8311e-07,
         3.0709e-05, -3.9591e-07,  2.9216e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8568e-04, -1.7236e-01,  1.1812e-04,  5.9954e-05, -2.0812e-05,
        -1.0559e-05, -4.3666e-05,  3.8699e-05, -5.5265e-05,  3.4192e-05,
        -5.1939e-06, -2.0397e-05,  1.2531e-05,  1.0206e-05, -1.7064e-07,
        -2.1378e-05, -4.9907e-06, -1.0826e-05,  1.1773e-05,  3.4544e-05,
         2.1339e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5157e-04,  3.3930e-01,  2.5118e-05, -3.0658e-05, -7.8914e-06,
        -1.0611e-04, -1.0395e-04,  4.3321e-05, -1.2371e-04,  6.4685e-05,
        -5.8972e-05, -4.4138e-05,  3.7698e-05, -2.1264e-05, -1.7111e-05,
         6.9252e-05,  1.3002e-04,  4.2191e-05, -3.1683e-05,  1.0223e-04,
         7.4528e-05,  6.1065e-06,  9.1353e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1469e-04, -2.5146e-01, -1.6150e-05,  3.4672e-05, -6.7353e-06,
         2.0272e-04, -5.9474e-05, -3.7012e-06, -2.3805e-05,  1.1113e-05,
         1.8802e-05, -2.8924e-05,  9.3522e-06,  9.4084e-05,  8.8463e-05,
         6.2660e-05,  2.3135e-05,  8.1653e-05,  8.5423e-05,  1.0846e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9443e-06,  3.9007e-01, -3.2840e-04, -9.4452e-05,  2.4111e-05,
        -1.7215e-04, -1.4771e-04, -3.5343e-05,  7.3866e-05, -1.2996e-04,
        -1.4358e-04, -1.3532e-05, -1.3613e-04, -2.4495e-04, -1.3153e-04,
        -1.0887e-04,  3.2738e-05, -1.9951e-06,  4.1861e-05, -1.4528e-04,
        -3.9977e-05, -2.6969e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4523e-04,  3.2223e-01, -1.5316e-04, -3.1175e-05,  1.1138e-04,
         1.3457e-05,  6.3620e-05,  6.3747e-05,  6.0102e-05, -6.4753e-05,
        -5.3437e-06, -3.2396e-05, -1.1219e-04, -1.0870e-05,  6.5134e-05,
         7.7268e-05,  5.0704e-05,  8.7902e-05,  5.7364e-05,  2.3518e-05,
         2.6046e-05,  7.6169e-06,  3.6883e-05,  7.5367e-05, -3.7837e-05,
         3.0949e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0493e-04,  3.4304e-01, -1.6612e-04,  2.7485e-04,  3.2416e-05,
        -2.8722e-05, -6.4550e-05, -1.0108e-04,  2.1771e-04,  3.8623e-05,
        -7.1846e-05,  2.2939e-05,  3.6446e-05, -8.0407e-05, -5.5455e-05,
        -4.8238e-05,  7.9882e-05,  1.6033e-04, -4.5387e-05,  1.1992e-04,
        -2.1811e-04, -9.4516e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2818e-05, -1.9118e-01, -3.8737e-05,  1.9775e-05,  2.6120e-05,
         1.1678e-04,  2.8546e-05, -3.9152e-05, -7.0312e-05,  1.0753e-05,
         3.7973e-05, -2.7593e-05,  4.4322e-06, -3.6185e-06,  9.3000e-05,
        -1.3712e-05,  5.7749e-05,  1.1494e-04, -5.2522e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3544e-05,  2.8994e-01,  1.1262e-05, -5.0504e-05, -5.5555e-05,
        -1.1625e-04, -1.5908e-04, -5.5423e-05, -2.7933e-05,  1.9280e-05,
         4.5099e-05,  1.9490e-05,  9.3169e-05, -2.8241e-05,  6.7037e-05,
         1.6818e-04, -3.4490e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7516e-05,  4.1617e-01,  8.4141e-05,  1.6130e-05,  5.7687e-05,
         1.2049e-04, -6.9476e-05, -2.2170e-05,  5.9868e-05,  1.5163e-07,
         1.4464e-04, -9.4433e-05,  8.4016e-05,  1.6759e-04, -1.7281e-04,
        -2.9588e-05, -8.5218e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5871e-05,  8.0888e-02, -4.0755e-05,  1.0870e-05, -9.7286e-06,
        -5.6014e-05, -4.2972e-06, -4.9167e-05,  4.0967e-05,  1.5193e-05,
         1.1406e-05, -3.1995e-05,  2.8887e-05, -6.0429e-05,  2.6506e-05,
        -6.8340e-05, -2.9794e-05, -3.4197e-05, -1.5654e-06, -2.8305e-05,
        -2.9881e-05, -2.0752e-05,  5.4450e-06, -2.0845e-05,  4.3625e-05,
        -4.0927e-05, -2.7328e-06, -6.5472e-05,  3.2183e-05, -4.6397e-05,
        -4.7655e-06, -2.3220e-05, -3.3682e-05, -2.8074e-05, -2.7974e-07,
        -3.4855e-05,  6.4221e-06,  1.2091e-05, -2.5685e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1350: [tensor([ 5.0602e-05,  3.4414e-02,  3.6588e-05,  4.3296e-05, -1.8601e-05,
        -1.0941e-05,  8.8574e-06,  6.8958e-06,  6.1665e-05,  3.6465e-05,
        -8.3027e-06, -6.0041e-06,  1.5686e-05,  1.0555e-05, -2.7992e-05,
         1.1577e-05,  1.3181e-05,  1.7431e-05,  3.0897e-05, -4.7640e-05,
        -1.6932e-06,  2.5298e-05,  3.4167e-05,  1.8264e-05,  1.9003e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2373e-04,  3.9973e-02, -2.6585e-05, -9.8260e-06, -6.3087e-06,
        -1.0501e-04, -2.8632e-05,  2.3374e-05, -3.2226e-05, -2.6511e-05,
        -3.4840e-06, -2.8137e-05,  1.8094e-06, -9.4840e-06,  2.8960e-05,
         2.1243e-05, -3.2242e-05,  2.5535e-05,  2.4963e-05,  1.7360e-05,
         2.9316e-06, -1.4162e-05,  1.1330e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.6389e-05,  2.3681e-01, -6.6443e-05,  9.6713e-05,  1.9443e-05,
        -9.6967e-05,  4.4870e-05,  1.1867e-05, -3.0610e-05,  1.1259e-04,
         3.5283e-05,  3.8995e-05,  4.5539e-05, -7.9882e-05,  1.2255e-04,
         4.3134e-05, -1.8770e-05, -3.7043e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2362e-04,  4.4679e-01,  5.2233e-07,  2.8888e-05, -1.7307e-05,
        -1.1305e-04, -2.9079e-04,  1.2620e-04,  6.2783e-06,  2.3602e-05,
         8.4032e-05, -1.3679e-04, -2.3340e-05, -5.7676e-05, -9.1854e-05,
         5.7766e-05,  1.2932e-04, -7.5693e-05, -8.8904e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2853e-04, -1.2652e-01,  3.5806e-05, -6.5789e-05,  3.1055e-05,
         3.7613e-05,  2.7886e-06,  7.9544e-05,  6.2494e-05,  6.8404e-06,
         6.7571e-05,  3.7517e-05, -4.3861e-05,  1.6920e-05, -2.1899e-05,
         1.5604e-05,  1.4879e-06,  4.8582e-05, -6.8338e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7211e-05, -2.4972e-01,  2.1033e-05,  4.7697e-05,  4.0483e-05,
         9.8348e-05,  8.0279e-05,  3.4185e-05,  7.4245e-05, -3.1490e-05,
        -5.6726e-05, -3.6711e-05,  1.9078e-05, -2.8475e-05,  1.6669e-05,
         3.7697e-06, -4.7247e-05, -2.5171e-05,  2.3188e-05, -5.6476e-06,
        -1.4081e-05, -2.3806e-05, -1.1818e-04, -4.0252e-05, -2.9824e-05,
         3.1273e-05, -5.1664e-05,  1.4316e-05,  2.5700e-05,  2.5180e-05,
        -5.4128e-05, -1.6717e-05, -3.0537e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3479e-05, -1.5192e-01, -2.8818e-06,  1.7352e-05,  5.2678e-05,
         5.1153e-05,  2.2887e-05, -1.9041e-05, -3.2628e-05,  5.1898e-06,
        -3.4197e-05, -6.7048e-06, -1.0749e-05, -2.0583e-05,  4.7138e-05,
         2.1054e-05,  7.7053e-05,  2.8482e-05,  1.1653e-05, -5.8959e-06,
         1.5119e-05, -1.0477e-05, -2.1852e-05, -1.9923e-05,  6.9223e-06,
        -3.7806e-05,  4.6911e-05, -5.4099e-05, -2.2552e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3200e-04,  3.8561e-01,  4.4818e-05, -5.7053e-05,  1.1837e-04,
         9.7887e-05, -6.3225e-05,  1.4073e-04, -5.4378e-05,  1.6401e-06,
        -5.3590e-05,  8.0181e-05, -6.8369e-05, -2.1930e-05, -1.4239e-05,
         2.2541e-06,  5.3634e-05,  6.0082e-05, -8.5546e-05, -4.7572e-05,
        -2.2981e-05, -7.6775e-05, -2.5235e-05,  1.2160e-05,  2.7058e-05,
        -3.0272e-05,  8.9581e-06,  7.0052e-05, -5.7904e-05, -1.3584e-05,
        -3.5396e-05,  3.9892e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1599e-04, -1.7178e-01,  1.2235e-05,  3.3382e-06, -4.2663e-05,
        -1.7620e-05, -3.7201e-05, -2.3283e-05,  3.6380e-05,  8.3238e-06,
        -8.0656e-06, -7.0066e-06, -3.6466e-05, -1.8698e-05,  2.3354e-05,
        -2.2033e-06, -4.7952e-05, -3.6770e-05,  8.4335e-07, -2.3132e-05,
        -1.9655e-05,  4.4655e-06,  1.0734e-07, -1.4530e-05,  2.4652e-05,
        -3.2246e-05, -3.4939e-05, -7.0681e-06,  1.1106e-05,  3.5815e-05,
        -3.2048e-05,  2.5701e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1107e-05, -1.7759e-01,  6.2414e-05,  5.2304e-05,  1.6121e-05,
         4.3016e-05,  1.2316e-05, -1.0209e-06,  6.7018e-06,  1.3071e-05,
        -6.2727e-06,  9.5800e-05,  3.3112e-06, -7.9203e-05,  1.8417e-05,
        -1.3893e-05,  3.1527e-05,  9.7800e-05,  2.1117e-05, -1.0007e-05,
         2.3022e-05, -6.4971e-05, -2.1537e-05, -2.9516e-06,  5.5593e-06,
        -4.3170e-05, -2.5801e-05, -5.0971e-05, -6.6562e-06, -1.7870e-05,
         1.4241e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3287e-04, -1.4150e-01,  1.2220e-05,  1.6653e-05,  7.1134e-06,
        -2.6506e-05,  1.8703e-05,  4.2244e-06, -8.8832e-06, -1.3073e-05,
        -1.0541e-05,  2.4546e-05, -2.8332e-05,  3.2978e-06,  5.4431e-06,
         1.9843e-05,  3.7426e-05,  1.5401e-05,  1.9408e-05, -3.9257e-05,
         2.4629e-05, -1.8679e-06,  1.4006e-05, -1.7325e-05, -2.9769e-05,
         1.9496e-05, -9.9779e-06, -1.4178e-05,  2.0287e-05,  2.0937e-06,
         1.4101e-05, -2.3808e-05,  3.0589e-06, -2.1662e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5124e-04,  4.3304e-01,  1.5833e-04, -2.0893e-04, -1.0173e-04,
        -6.0000e-05,  1.6517e-04, -1.4318e-05,  5.9818e-05,  5.5029e-05,
         1.2715e-04,  1.7350e-04,  2.4859e-04, -1.5088e-04, -8.9696e-05,
         1.9490e-04, -3.4871e-05,  2.0933e-04,  1.4959e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1400: [tensor([-3.3099e-04,  2.8357e-01,  9.1996e-07,  2.4894e-04,  8.4830e-06,
        -1.2832e-04,  6.9738e-05,  1.8442e-04,  1.3918e-04, -7.9315e-05,
         1.6402e-05,  5.2192e-05,  5.6000e-05, -1.2995e-04,  2.5975e-05,
        -7.9319e-06, -4.1757e-05, -8.4328e-05, -1.3141e-05, -1.8790e-04,
        -1.0949e-04, -2.2878e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8708e-04, -2.0261e-01, -2.8520e-05, -9.7394e-05,  3.1527e-06,
        -1.9269e-05, -2.2690e-06,  1.7472e-05,  2.9361e-05, -3.3434e-05,
        -1.0024e-04,  4.3326e-05,  2.5837e-06,  3.8408e-05,  4.1746e-06,
        -1.1038e-05, -9.7786e-05, -8.4348e-05, -4.8723e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4536e-04,  4.8546e-02,  3.3325e-05, -2.0034e-05, -8.6547e-06,
        -9.2180e-05, -3.4257e-05, -1.8548e-05,  9.2607e-07, -2.9964e-05,
         3.7700e-05,  1.4014e-05, -3.8210e-05, -8.2762e-05, -8.1684e-05,
        -3.0605e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7599e-04,  4.9037e-02, -7.0878e-06, -6.7256e-05,  1.4216e-05,
        -1.2366e-04, -7.2306e-05,  8.9685e-06, -5.3969e-06,  1.4258e-05,
        -1.3835e-05,  2.9285e-05, -5.0800e-05,  6.5577e-06, -5.4458e-05,
        -4.4215e-05,  1.8315e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9101e-04,  1.7414e-01, -2.5198e-05, -1.3412e-04, -6.8479e-05,
        -1.0697e-04, -4.6014e-05,  1.7843e-04, -8.5720e-05,  1.0796e-04,
        -1.9566e-04,  1.2520e-04, -2.8125e-05, -1.0295e-04,  1.3244e-04,
        -1.1667e-04, -6.1720e-05,  5.5959e-05, -3.2897e-05, -3.5180e-05,
        -1.8461e-05, -1.2319e-04, -5.4660e-05,  1.1974e-04, -2.9024e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1122e-06,  4.0075e-02,  1.7723e-05, -1.4024e-06, -3.5593e-05,
        -6.6923e-05,  2.7324e-05, -4.8402e-06,  1.0020e-05, -2.5663e-05,
        -8.3801e-05, -3.0651e-05, -5.1922e-05, -2.8147e-05, -2.3986e-05,
         1.2286e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8121e-04,  9.5943e-02, -1.1087e-05,  1.9354e-04,  1.4712e-05,
        -1.7141e-04, -4.9188e-05,  6.4023e-05, -5.5850e-05,  1.1371e-04,
        -6.7125e-05,  1.5596e-05,  8.8559e-05,  5.5428e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5145e-05,  1.1225e-01, -1.9427e-05, -5.4014e-05, -7.7249e-05,
        -4.2635e-05,  2.9860e-05,  1.0004e-04, -4.4018e-05,  6.9838e-05,
        -2.5721e-05, -8.6464e-05, -1.3441e-04, -6.8161e-05,  2.0963e-05,
        -7.9296e-05, -6.2315e-05,  1.7238e-05,  1.1248e-04, -6.8380e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7614e-04,  7.7438e-02, -1.5299e-05, -4.6851e-05,  1.1369e-05,
        -5.9301e-05, -8.4664e-05,  7.1616e-05,  6.2808e-05,  1.9177e-05,
        -3.5489e-05, -4.3832e-05, -1.0446e-05,  3.0601e-05,  3.4917e-05,
        -7.3738e-05, -1.0660e-05,  5.9734e-06,  1.5375e-05,  3.9570e-06,
         9.3720e-06,  7.6685e-05, -6.2220e-06,  5.2666e-05,  5.1647e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1344e-04,  2.1947e-01, -1.5632e-05,  3.9253e-05,  6.3650e-05,
        -2.3288e-04, -1.9369e-04,  1.1209e-04, -2.5781e-04,  9.3245e-05,
         1.4385e-04,  3.8919e-05,  2.8971e-04,  3.7583e-05, -5.6263e-06,
         2.8786e-05,  8.5328e-06,  3.1963e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2284e-04,  7.7954e-02, -7.2148e-05, -9.1072e-05, -2.3793e-05,
        -1.8037e-04,  1.1993e-05, -4.4754e-05,  7.1654e-05,  1.7784e-05,
        -9.2105e-05, -4.7527e-05, -3.8344e-05, -5.7355e-05, -3.5026e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1335e-05,  1.0851e-01, -5.7095e-05,  1.1808e-04, -7.9871e-05,
        -1.3570e-04, -1.6454e-05,  1.2229e-04,  2.8378e-05, -5.6536e-05,
        -1.7476e-05, -7.9081e-05,  2.5643e-06, -4.4869e-05, -1.8907e-04,
        -1.9555e-04, -8.0980e-05, -3.3832e-06, -6.3795e-05, -8.2304e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1450: [tensor([ 1.1529e-04,  1.7187e-01,  1.1814e-04,  1.0163e-05,  3.5348e-05,
         5.1850e-06,  2.5952e-05,  1.0117e-04, -6.0901e-05,  1.7067e-05,
         4.8501e-05, -1.1376e-04, -3.3008e-05,  3.2048e-05, -1.0937e-04,
         1.1374e-04,  6.2177e-05, -6.5281e-05,  2.8331e-06, -9.9343e-05,
         5.3634e-05,  5.3341e-05,  2.0089e-05, -7.2663e-05,  3.7244e-05,
        -1.5263e-05, -1.4856e-04, -8.9464e-05, -1.3623e-04, -4.1083e-05,
        -3.7036e-05, -2.5648e-05, -2.9554e-05, -6.8526e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4836e-04,  4.0942e-01,  2.8639e-04,  2.0929e-05, -6.4635e-05,
         4.5178e-06,  3.3143e-05,  1.7143e-04, -1.1812e-04, -1.6134e-04,
         1.8094e-04,  3.4040e-04, -2.2179e-05, -1.9958e-05,  7.6071e-05,
        -6.3195e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0568e-04,  3.9458e-01,  2.3483e-05,  6.4449e-05,  2.6321e-04,
         1.1261e-04,  2.0104e-04,  4.5291e-05,  2.5442e-04,  2.3203e-04,
        -2.2491e-05,  1.1507e-05, -2.3945e-06, -1.0288e-04, -9.4070e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3581e-05,  1.5845e-01,  4.5973e-05,  7.5012e-05,  9.7082e-05,
         9.6985e-05,  4.7576e-05,  2.5525e-05, -6.0001e-05,  1.5215e-05,
        -1.3889e-05,  3.0064e-05,  1.2033e-04,  5.3032e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3728e-04, -1.8175e-01, -8.8029e-06, -9.3474e-05,  1.9016e-05,
         5.8082e-05,  9.2976e-06,  5.2406e-05, -4.7175e-05,  1.5072e-05,
        -9.1567e-06, -8.3855e-05,  1.5569e-05, -2.7408e-05, -4.2796e-05,
        -1.1539e-07,  8.2133e-06, -7.2714e-05,  4.9214e-05,  4.3567e-05,
         5.8735e-05,  6.2414e-05,  4.4312e-05, -1.2925e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2659e-05, -1.1091e-01,  2.3215e-05,  7.4465e-06,  1.0143e-05,
        -1.5703e-05,  3.2053e-05, -5.4587e-06,  4.9379e-05, -7.8853e-06,
         9.6335e-07,  3.5555e-05, -1.8511e-06, -2.2540e-06,  2.5685e-06,
         2.6471e-05,  1.6656e-05, -1.6179e-06, -3.6034e-05,  1.6704e-05,
         1.5008e-06,  2.0061e-05,  1.7631e-06,  3.3646e-06,  1.8367e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2215e-04, -1.1002e-01,  1.4880e-05, -7.3943e-06, -8.5123e-05,
         3.2974e-05, -1.7050e-05,  9.3422e-06,  3.4636e-05, -1.5857e-05,
        -3.0462e-06, -3.3764e-05, -2.3996e-05, -3.3766e-05, -1.4929e-05,
         9.7166e-06,  2.5044e-05,  2.7058e-05,  1.2096e-05,  1.1507e-05,
         2.5151e-05, -5.1784e-05,  2.3335e-05, -2.0111e-05,  1.7940e-05,
        -1.0709e-05,  6.0613e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8475e-05, -2.3168e-01,  1.4824e-05, -1.2184e-04, -1.0189e-04,
         1.5287e-05,  8.1933e-05, -1.5247e-06, -1.7258e-05, -3.0659e-05,
        -2.1520e-05, -1.0776e-05, -1.9088e-05, -1.4644e-04,  1.7913e-05,
         1.2657e-04,  4.6613e-05, -5.2620e-05,  2.0534e-05, -8.4128e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2463e-04, -2.4310e-01,  2.8755e-05,  1.2576e-04, -1.9523e-05,
        -8.7424e-05,  1.9858e-05,  5.7344e-06, -1.7853e-05, -2.9072e-06,
        -7.9799e-06,  1.8220e-04, -1.5606e-05, -3.9875e-05, -7.1748e-05,
        -5.8337e-05, -1.0706e-04, -1.0776e-05,  5.5077e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4749e-05, -2.3400e-01, -2.0970e-04, -1.3229e-04,  3.0017e-05,
         9.5908e-05,  7.5294e-05,  1.5376e-04,  1.8459e-05, -5.7057e-06,
        -2.7251e-05, -6.7098e-05, -6.3919e-05,  5.4766e-05,  9.1739e-05,
         1.0769e-04,  9.1198e-05,  9.9681e-05,  3.3474e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5485e-04, -1.9327e-01,  1.7977e-05, -1.9078e-05, -8.6055e-05,
        -4.1779e-06, -5.4583e-05, -2.0728e-05,  2.5330e-06,  1.9296e-05,
        -1.7276e-05,  7.9277e-06,  3.7310e-05, -1.7195e-05, -2.7766e-05,
        -3.7438e-05,  4.1804e-05,  4.1548e-05, -4.6364e-05, -1.8578e-06,
        -4.7524e-05, -5.0672e-05, -4.7062e-05,  2.6669e-05,  3.4845e-06,
        -1.2788e-05,  7.3274e-05, -1.9112e-05,  8.0604e-06,  2.2231e-05,
        -7.6832e-06,  2.1424e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9387e-04,  4.7527e-01, -6.0637e-05,  1.8030e-04,  1.6249e-04,
        -1.4906e-04,  2.9321e-04,  1.4885e-04, -1.8159e-04, -1.6021e-04,
         8.0460e-05,  1.4910e-04, -1.4743e-04,  3.3510e-05,  8.3706e-06,
         4.7157e-05,  1.5337e-04,  1.5947e-05,  3.4244e-04,  1.1160e-04,
         6.9477e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1500: [tensor([-5.8214e-04,  4.1208e-01,  4.5095e-05,  1.7725e-05,  9.4538e-05,
        -2.5029e-05, -1.0136e-04,  2.0136e-04, -1.6249e-05, -2.4581e-04,
        -9.0620e-05,  1.0069e-06, -4.2406e-05, -8.7567e-05, -1.1718e-04,
         1.5003e-05, -4.9028e-05, -2.2633e-05, -2.7188e-05, -2.5146e-06,
         2.9920e-05,  5.0273e-05,  5.3870e-05,  6.3172e-05,  4.6579e-05,
         6.0760e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5060e-04,  3.8340e-01,  2.5541e-05,  1.3499e-04, -8.7647e-05,
         1.3163e-04,  8.5335e-06,  9.9251e-06, -5.3047e-06,  1.2202e-04,
        -1.2900e-04,  9.6222e-05,  4.6039e-05,  5.4934e-05,  1.1709e-04,
        -9.6784e-07, -4.9339e-05,  3.3004e-05, -7.9393e-05,  8.5342e-05,
        -1.3708e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8809e-05,  4.9094e-01, -1.7789e-04, -1.1621e-05,  4.3167e-06,
         4.2622e-05, -7.4802e-05, -6.2013e-05, -2.2261e-04, -8.4040e-05,
         5.1652e-05, -1.8778e-04,  8.1889e-05, -2.5274e-05,  6.6692e-05,
        -4.8367e-05, -6.9317e-05,  4.3690e-05, -1.3009e-04,  1.1227e-04,
        -1.2617e-05,  1.3638e-04,  1.1730e-04, -1.6701e-05, -1.2056e-04,
        -4.4000e-05,  5.7240e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1265e-04, -1.9181e-01,  3.3106e-05,  2.0979e-05,  1.6935e-05,
         5.8237e-05,  3.3745e-05, -6.0710e-05,  3.4185e-05,  5.0271e-05,
         1.0066e-04,  1.0659e-04, -1.0276e-05,  2.8930e-05,  2.2576e-06,
        -6.8339e-05,  5.4854e-05,  4.1512e-05,  3.5081e-06,  3.1136e-05,
         1.4264e-05,  3.1027e-05,  4.3027e-05, -3.7592e-05,  4.7279e-05,
         3.6817e-06,  1.3772e-05, -3.4135e-06, -8.0162e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.8045e-05,  5.8141e-01,  1.4563e-04, -2.5159e-06,  9.9272e-05,
        -1.0292e-04,  6.2113e-05, -1.9872e-04, -2.4999e-04, -8.2263e-05,
         1.9086e-05,  1.2121e-04, -4.3966e-06, -4.4589e-05, -2.8154e-04,
        -1.7374e-04, -5.5549e-05,  1.4133e-04,  6.5825e-05, -4.8006e-05,
        -2.2953e-04,  1.1863e-04, -2.7719e-05, -2.0355e-04,  6.4903e-05,
        -2.5501e-05, -1.3498e-04,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9524e-06, -1.7517e-01,  7.4337e-05,  7.4118e-06,  3.3620e-06,
         3.1735e-05,  1.2932e-05,  2.7238e-06,  6.5714e-05, -3.5393e-05,
        -2.2982e-05, -2.7234e-05, -6.7479e-05, -4.8936e-05,  7.1535e-05,
         1.2228e-05,  6.1966e-05, -9.0381e-06,  4.4133e-05,  1.9304e-05,
         5.6016e-06, -3.6448e-05,  6.6024e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4199e-04, -9.0205e-02, -3.1016e-05,  9.3945e-07, -1.8934e-05,
         4.1168e-05,  3.6246e-05,  1.8884e-05, -8.1195e-06,  1.7770e-06,
        -7.1330e-07, -8.1504e-06,  2.1681e-06, -4.4339e-06, -1.2482e-05,
        -4.2424e-06, -3.0143e-05, -1.6404e-05,  2.1706e-05, -1.1098e-05,
        -1.0854e-05, -2.3768e-05, -5.0000e-05, -1.9646e-05,  5.5515e-06,
        -4.0080e-05, -4.1086e-05, -4.9478e-06, -1.3327e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1849e-04,  3.3309e-01, -2.4265e-05,  6.2202e-05, -4.2174e-05,
         1.5309e-04,  4.5922e-05, -7.6967e-05,  1.4538e-04,  5.2206e-05,
        -7.6476e-05,  1.1035e-04, -1.6541e-05,  7.1894e-05, -8.1982e-05,
        -2.6650e-04,  1.2473e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1222e-04, -1.3631e-01, -2.0537e-05, -2.1043e-05, -2.6128e-05,
         3.1372e-05,  8.8505e-06,  1.5213e-05, -9.8525e-05, -3.5586e-05,
        -5.2864e-05,  5.2198e-05, -9.3890e-05, -3.0486e-05,  1.1011e-05,
         4.1351e-05, -9.9264e-06, -3.4359e-05,  1.5907e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5795e-05, -1.5814e-01, -9.5375e-06,  3.7114e-05,  6.5877e-05,
         4.7094e-05, -1.2430e-04, -4.6117e-05, -1.0050e-07,  1.5792e-04,
         3.3787e-05,  4.5488e-05,  2.9066e-05,  1.6924e-05,  7.8439e-05,
         1.2003e-05, -3.8405e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3525e-04, -2.0662e-01, -3.2348e-05,  3.2659e-05,  2.7418e-05,
        -7.6923e-05, -1.6041e-05,  6.4260e-05,  2.6560e-05,  1.1647e-05,
        -2.1801e-05,  4.2783e-05,  1.0125e-05,  3.3742e-05, -6.3952e-05,
         3.6087e-05, -4.2031e-05,  3.3839e-05,  4.7429e-06, -8.6309e-06,
        -2.5318e-05, -3.3515e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5588e-04,  5.5057e-01, -9.4470e-05, -1.2391e-04, -8.9477e-05,
        -5.3154e-06,  1.8165e-05, -8.8170e-05, -1.2436e-04,  2.2007e-06,
        -4.0439e-05,  1.7206e-04, -6.3417e-05, -1.1418e-04, -5.7142e-05,
         1.0394e-04, -2.3607e-05, -5.6862e-06,  1.3836e-05, -1.5124e-04,
        -4.0432e-05, -1.3061e-05,  1.1983e-05, -6.0973e-05, -1.3635e-05,
         8.8065e-05, -9.0220e-05, -4.4505e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1550: [tensor([ 1.1578e-04,  1.5910e-01, -8.4385e-05, -9.8017e-05, -1.1340e-04,
        -3.7479e-05,  7.2817e-05,  6.5983e-05, -1.7200e-05, -8.9777e-05,
         3.8193e-05, -7.9480e-05,  2.8800e-06,  1.1962e-05,  2.9721e-06,
        -1.0292e-04, -7.4090e-05, -1.4179e-04, -1.9543e-05,  1.0115e-04,
        -5.8299e-05, -5.3775e-05, -2.2673e-06,  6.7756e-05,  2.1976e-05,
        -1.1292e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7932e-05, -2.0139e-01, -9.7928e-05,  7.2905e-05, -4.1360e-05,
         7.5425e-05, -7.9917e-05,  1.3953e-05, -4.1243e-06,  1.3032e-04,
        -1.4417e-05, -1.8077e-05,  8.1207e-05,  1.4785e-04, -7.2119e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1939e-04,  4.8821e-01,  5.5605e-05,  6.8454e-05, -1.0157e-04,
        -2.2947e-05, -3.1639e-04,  1.0542e-04,  1.7794e-05,  1.6339e-04,
         2.2391e-04,  4.3040e-06,  2.5891e-04, -5.9276e-05,  6.9485e-06,
         3.8811e-05,  1.0888e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2139e-07, -1.7339e-01, -1.1738e-04,  4.7891e-05, -1.2769e-05,
         7.1672e-05, -5.4911e-05, -1.0049e-04,  9.0721e-05, -2.6768e-05,
        -8.9353e-05,  7.0079e-06, -4.4453e-05, -8.2373e-05, -4.5418e-06,
        -1.1013e-04,  4.2927e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0360e-04,  1.3407e-01,  8.6736e-05,  1.6647e-04, -8.1670e-05,
        -1.0909e-05,  7.3394e-05,  1.2173e-04, -4.8782e-05,  8.1742e-05,
         2.9109e-05, -6.5655e-06, -3.8601e-05, -4.0402e-05,  5.3635e-06,
        -1.0607e-04, -2.6941e-05, -1.1727e-04, -3.8378e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8505e-04,  2.1565e-01, -2.6038e-04,  1.0166e-04,  5.2776e-05,
         3.9263e-05,  1.4845e-04,  7.6223e-05, -2.5831e-04,  1.6982e-04,
         2.8263e-05,  1.8951e-04, -1.2403e-04,  2.2094e-04, -2.1750e-04,
         1.4452e-04, -4.0341e-05,  1.8536e-05, -1.8185e-04,  3.3858e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5841e-05,  1.9750e-01,  1.1819e-04,  2.9332e-04,  2.2915e-04,
        -4.3011e-05,  3.6145e-05,  1.8892e-04, -9.0459e-05, -1.6049e-04,
         1.6848e-04, -1.0143e-05,  1.1585e-04, -4.7019e-05, -1.0742e-04,
        -2.1355e-05, -2.1266e-04,  1.6620e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2795e-05,  3.1008e-01,  1.0475e-04,  6.1641e-05, -1.1916e-04,
         2.2662e-05,  1.0496e-04, -7.4915e-05, -3.3711e-05, -8.6060e-05,
         6.6176e-05,  3.7264e-05,  7.3328e-05,  1.0738e-04,  2.6244e-05,
        -2.5816e-05,  1.6507e-04, -5.3360e-05,  7.8504e-06,  5.3327e-05,
        -8.0782e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4636e-04,  3.5643e-01, -9.5497e-06,  3.7332e-05, -1.4211e-04,
        -7.9260e-05,  7.2169e-05, -5.1837e-05,  1.4624e-05,  7.2935e-05,
        -2.2588e-05, -6.7430e-05, -4.7093e-05, -5.1540e-05, -1.4382e-04,
        -6.7556e-05,  1.7258e-05,  6.1548e-05, -8.3150e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1562e-04,  4.7286e-01,  1.4764e-04, -7.6720e-05, -4.9220e-05,
         1.3578e-04, -4.2752e-05,  1.9020e-05, -8.4864e-05,  1.0762e-04,
         1.3977e-04,  2.4760e-04, -7.2837e-05,  8.8459e-05,  2.3957e-04,
         5.5643e-05, -4.2541e-05, -9.3349e-06, -4.6624e-05,  8.9338e-05,
        -5.5944e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6132e-05, -1.0773e-01,  3.3144e-05, -5.6016e-06, -7.3289e-06,
        -9.2446e-05,  1.0281e-04,  5.4461e-05, -1.8932e-06,  6.0485e-05,
         6.1226e-05, -1.0171e-04,  1.0763e-04, -1.6803e-05, -1.3966e-05,
         1.6990e-05, -7.8983e-05, -4.0825e-05, -6.3368e-05, -5.2105e-05,
         1.5913e-04, -2.2765e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5827e-05,  2.4260e-01, -2.2566e-05,  1.0264e-06,  2.3717e-05,
        -4.5118e-05, -3.2813e-04, -4.6113e-05,  4.1650e-06, -3.9937e-05,
        -3.0217e-04, -2.2861e-06,  3.0752e-06,  1.9378e-05, -5.4733e-05,
        -3.5368e-05,  1.0317e-05, -2.3181e-04,  8.2536e-06,  9.5274e-05,
         1.2027e-04, -5.2016e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1600: [tensor([ 1.9824e-04,  3.5456e-01, -2.2111e-04, -7.1393e-05, -8.1295e-06,
         1.7746e-05, -1.4585e-05,  7.7559e-06, -3.3475e-05, -9.7978e-05,
        -4.2339e-05, -6.6398e-05,  4.8410e-05, -1.0802e-04, -4.5465e-05,
         2.1882e-05,  3.7769e-05,  3.3323e-05, -4.5475e-05, -2.7691e-05,
        -4.7778e-05, -5.9447e-06, -7.4503e-05,  1.1148e-04,  5.8727e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3536e-04, -2.5716e-01, -6.3790e-05,  2.5585e-05, -7.5016e-05,
        -1.8084e-05,  6.4259e-06,  3.2701e-06,  4.4930e-05,  1.6709e-05,
         2.6472e-05, -1.1995e-04,  3.0708e-05, -2.8454e-05, -4.3040e-05,
        -2.2074e-06,  4.5564e-05,  9.7404e-06, -2.4088e-05,  6.1009e-05,
        -2.4700e-05, -9.8721e-06,  4.0573e-05,  5.7466e-05,  1.3355e-05,
         2.3891e-05, -5.0652e-05, -2.9962e-06, -5.2561e-05, -1.2861e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4226e-04,  5.1700e-02, -3.1880e-06, -4.7552e-06,  1.4756e-05,
         1.9288e-05,  2.1289e-05, -1.8710e-05,  3.9534e-05, -5.2741e-06,
        -4.7314e-05,  1.3784e-05,  2.8678e-05,  2.2761e-06, -1.2754e-05,
         1.2809e-05,  6.2426e-06, -3.3835e-05, -5.0817e-06, -1.1572e-06,
        -9.0806e-06, -1.8447e-05,  1.2584e-05,  2.1707e-05, -2.8181e-05,
         7.6283e-06,  3.6247e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7712e-04,  7.8403e-02, -2.4006e-05, -8.1403e-05, -1.6940e-05,
         4.3090e-05,  4.3973e-05, -6.1842e-05,  6.8505e-05,  3.5930e-05,
        -5.3020e-05, -3.4313e-05,  2.5649e-05,  2.8567e-05,  7.5792e-05,
         6.7285e-05,  2.3606e-06, -5.0389e-05,  3.2743e-05, -9.0333e-05,
        -4.9745e-05, -8.0932e-06,  1.3919e-05, -1.1751e-08,  1.3328e-05,
        -1.9995e-05,  1.6348e-05,  5.2031e-05,  3.8292e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3127e-04,  9.4734e-02, -3.0561e-05,  4.3338e-05, -1.8392e-05,
         1.6954e-05,  1.8396e-04,  1.2742e-05,  7.7551e-05,  2.0071e-05,
        -2.1455e-05,  5.1311e-06,  4.6319e-05,  1.8414e-05,  1.0152e-04,
         3.6879e-05, -3.8159e-05, -4.0777e-05, -9.4794e-05, -1.9425e-05,
         6.1620e-06, -2.7483e-05,  9.3627e-06, -2.0569e-05, -8.3337e-05,
         2.5524e-05, -9.1158e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2006e-04, -1.6600e-01,  2.2129e-06,  2.3465e-05, -5.5228e-05,
        -6.0950e-06, -2.1667e-05, -1.0228e-05, -2.0159e-05, -7.1696e-05,
        -1.2738e-05, -5.5886e-05,  4.5302e-05, -2.0620e-05,  6.7123e-06,
         2.2580e-06,  1.1617e-06, -1.5548e-05, -1.1614e-05,  5.1960e-06,
        -1.2331e-05,  2.2810e-05,  1.7692e-05,  3.2826e-05,  5.4006e-05,
         1.0558e-05,  3.1223e-05,  4.6414e-06, -1.1123e-05,  3.1480e-05,
         1.6568e-06,  9.8669e-06, -8.6556e-07,  3.8133e-05,  2.6109e-05,
         1.6874e-05, -4.0331e-06,  2.2542e-05, -5.5563e-06,  3.2891e-05,
         1.7715e-05,  4.1734e-05,  1.4456e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8702e-04, -2.4994e-01,  2.3518e-05, -8.6359e-05, -3.1486e-05,
         1.6329e-05,  4.2746e-05, -6.3478e-05,  1.7030e-05,  5.0156e-06,
         3.6259e-06, -2.1363e-05, -1.0298e-05, -3.0873e-05, -4.1298e-05,
        -8.3235e-05, -2.9518e-05, -1.6118e-05, -4.0011e-05,  1.9078e-05,
        -1.4917e-05, -3.7073e-05,  2.4872e-05,  7.9896e-05,  6.9060e-05,
        -1.5928e-05,  9.2459e-06,  2.3782e-05,  3.4721e-05,  8.1244e-06,
        -8.5644e-06,  1.6122e-05,  1.7787e-05,  4.9493e-06, -9.5678e-06,
         1.5302e-05, -8.7316e-06, -2.6611e-05,  5.9841e-06, -2.6505e-05,
        -2.9536e-05, -4.8100e-06, -1.9345e-05,  2.0338e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3667e-04, -2.2480e-01,  9.0221e-06, -2.9386e-05, -2.4010e-06,
         3.3396e-06, -7.7878e-05,  6.9892e-06, -4.3529e-05, -2.1843e-05,
         2.6833e-05, -7.2194e-06,  2.5957e-05, -3.8175e-05,  1.5865e-05,
         2.0010e-05, -1.6587e-05, -3.4610e-05, -2.8498e-05, -1.7255e-05,
        -3.2256e-05, -1.1312e-05, -1.2574e-05,  2.5581e-05,  6.2657e-06,
         3.4224e-05, -2.4610e-05, -5.6780e-05, -4.8612e-07, -5.7250e-06,
         1.4642e-05,  3.5867e-05, -1.6178e-05,  2.0793e-05, -1.8045e-05,
        -6.5709e-06,  2.0149e-05,  1.5294e-05,  9.1100e-06,  1.3605e-05,
         4.6776e-05,  1.8303e-06,  1.0159e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1490e-04,  6.1664e-02,  2.1281e-05,  1.8191e-05, -4.6735e-05,
        -4.4949e-05, -5.2716e-05, -4.0578e-05, -8.7939e-05,  1.6140e-05,
         4.7862e-05,  8.8351e-05, -4.5177e-05, -1.3180e-05,  1.7047e-05,
         2.5039e-05,  1.0171e-04,  2.8587e-06, -2.2015e-05, -6.7677e-06,
         5.4660e-05, -7.6299e-05, -6.6385e-06,  1.7656e-05, -1.8721e-05,
        -6.0329e-05,  2.4982e-05,  7.1425e-05,  3.1087e-06,  3.3648e-05,
        -2.8119e-05,  1.0649e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6356e-04,  5.7873e-02, -3.3725e-06,  8.3211e-06, -2.8202e-05,
        -1.4738e-05, -6.4469e-06,  1.8605e-07, -5.0228e-05, -2.4652e-05,
        -4.3069e-06, -1.9858e-06,  7.6314e-06, -3.1217e-05,  9.5978e-07,
        -1.3976e-05, -1.1064e-05, -2.4876e-05,  7.2165e-06,  1.6667e-06,
         6.6704e-05,  1.0562e-05, -9.9535e-06, -4.1087e-06, -2.5086e-06,
        -4.5212e-05,  3.6192e-06,  4.5068e-05,  2.8475e-05,  6.3902e-06,
         2.2777e-05,  2.0687e-05,  6.0735e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0217e-05,  2.9475e-02,  8.7139e-06, -1.0911e-05,  6.4403e-06,
        -1.4805e-05,  2.4773e-05,  9.0836e-06, -1.6748e-05,  2.2309e-05,
         1.0927e-05,  3.8172e-05,  1.5068e-05, -3.0458e-06, -2.4379e-06,
         5.3912e-06,  3.0355e-05,  6.4306e-06, -4.8690e-05, -2.1055e-05,
         4.4363e-05, -2.7704e-05,  2.5890e-05,  1.3424e-05, -8.1233e-06,
        -1.5182e-05, -8.4652e-06, -1.0238e-05,  1.4068e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8451e-06,  3.1736e-02,  2.6885e-05,  5.1978e-06, -3.3717e-06,
         2.4896e-07,  1.4887e-05, -2.5073e-05, -1.6534e-05,  1.7185e-05,
         2.2515e-05,  3.1806e-05,  2.6946e-05, -1.8716e-05,  4.9045e-05,
        -1.2779e-05,  2.4501e-05,  4.1913e-05, -2.4377e-05, -6.5087e-06,
         4.5697e-05,  4.3034e-07,  1.0300e-05,  1.9174e-05, -1.3881e-05,
        -1.2065e-06,  3.2473e-05,  6.8261e-06, -3.0234e-06, -2.5693e-05,
        -2.0548e-05, -2.6153e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1650: [tensor([ 4.5910e-05, -3.0907e-01,  3.0572e-06,  8.6088e-05, -1.2322e-05,
        -1.2065e-06,  4.5572e-05,  2.4303e-05, -9.2859e-05, -4.6950e-07,
        -7.2141e-05, -3.0295e-05, -2.0368e-05, -3.4727e-05, -8.3932e-05,
         1.9302e-05, -2.0625e-05, -1.9151e-06, -7.1381e-05, -3.9325e-05,
        -2.8681e-05,  1.0778e-04, -2.7603e-05,  2.8653e-06,  6.9598e-05,
        -2.7849e-05,  2.4173e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4634e-04, -2.2569e-01,  3.0747e-05, -2.3365e-05, -3.5408e-05,
        -3.6864e-05, -3.1019e-05, -7.1022e-05, -3.5746e-05, -1.5249e-05,
        -8.5580e-06, -1.1422e-05, -6.7139e-05, -4.5988e-06,  8.1003e-05,
         4.0327e-06,  2.3174e-05, -4.2020e-06, -6.3729e-06, -3.6853e-05,
        -1.4640e-05, -3.5518e-05, -2.7709e-05, -4.6918e-05,  5.6410e-05,
        -5.9798e-05, -1.8738e-05,  4.0657e-06,  1.4327e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3328e-05, -1.3013e-01,  9.4830e-06,  2.1874e-05, -1.2068e-05,
         1.4737e-05, -4.2195e-06, -1.8156e-05,  3.3282e-05, -9.3423e-06,
         2.0108e-05,  4.4250e-05,  2.1279e-05,  3.8715e-05, -2.2511e-06,
        -9.7835e-06,  2.1593e-06, -1.9954e-05,  4.6216e-06, -7.9362e-07,
        -2.4442e-05,  3.9295e-06,  1.4034e-05,  5.5415e-06, -1.6075e-05,
        -5.3126e-05, -9.3446e-08, -1.2966e-05,  1.5198e-05, -2.1520e-05,
         4.7675e-06,  9.1542e-06, -1.3822e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8646e-05, -1.3599e-01,  2.9964e-05,  1.3622e-05,  3.5263e-05,
         3.4888e-05, -5.0321e-05,  1.2328e-05,  3.9081e-05,  5.4142e-05,
         2.6201e-05,  1.1525e-05,  2.2251e-05, -3.9657e-06,  1.9496e-05,
         2.2495e-05,  7.6033e-06, -2.7639e-06,  1.1236e-05, -2.1115e-05,
         1.9922e-05,  1.1060e-05, -3.1282e-06,  4.0442e-06, -1.5357e-05,
         1.6404e-07,  4.8128e-06,  1.6899e-05, -5.2918e-06, -2.1551e-05,
         4.0963e-07,  1.2807e-05, -9.2748e-06,  3.0784e-06,  2.6258e-05,
        -2.0223e-05,  2.6262e-05, -2.7538e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.6081e-05,  5.9989e-01,  4.5151e-05,  9.5321e-05,  1.4065e-04,
         3.3775e-05,  8.9915e-05,  9.3511e-05,  1.2931e-05,  9.2780e-05,
         1.7088e-04,  1.2095e-04,  1.0333e-04,  6.4633e-05, -4.5347e-05,
        -6.0754e-06,  1.2603e-04,  1.0667e-04,  9.7465e-05, -2.9319e-05,
        -5.8820e-05, -1.5708e-04,  6.7864e-05, -5.3859e-05, -2.8111e-05,
        -1.0840e-04,  1.2980e-04,  1.3683e-06,  7.5690e-05,  9.5595e-05,
        -1.4024e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6599e-05,  3.2922e-01,  5.7598e-05,  4.3847e-05,  1.3582e-04,
         9.0616e-05, -2.4735e-04,  1.0133e-04, -5.4000e-05, -1.3297e-05,
        -9.6585e-05, -5.6498e-06,  2.1933e-06, -1.1716e-04, -5.3878e-05,
        -5.1159e-05,  1.5908e-05,  6.7222e-05,  2.1863e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2445e-04, -1.2662e-01, -3.1257e-05,  4.3067e-05, -5.2400e-05,
         3.7584e-05, -4.5315e-05, -5.6414e-05,  1.8066e-05,  2.1130e-05,
         4.5757e-06,  3.4853e-05,  1.4349e-05, -4.9909e-06, -1.4686e-05,
         4.5724e-06, -1.1338e-05,  2.5933e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0748e-05,  2.8318e-01, -5.3356e-05, -1.8623e-04, -1.7759e-05,
         4.7833e-06,  3.8871e-07, -1.2023e-04,  3.1941e-06,  1.2342e-05,
         5.7888e-05, -1.8898e-04, -6.4602e-05, -2.2052e-05, -9.6425e-05,
        -7.0489e-05, -2.2531e-05, -8.3438e-05,  4.7874e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3588e-04, -2.5483e-01, -4.6182e-05,  3.5557e-05,  5.4964e-05,
        -2.3744e-06, -6.4570e-07,  6.5440e-05,  2.8559e-05,  6.9787e-05,
         4.0045e-05,  4.1171e-05, -2.7845e-05,  1.4305e-05, -2.6274e-06,
         1.1299e-05,  4.5397e-05,  4.4556e-05,  1.0425e-05,  3.3344e-05,
         2.3857e-05,  1.2133e-05,  3.6943e-06,  2.7097e-05,  1.5415e-05,
         3.8392e-05, -3.2770e-05,  1.0801e-05, -1.8239e-06,  1.5596e-05,
        -2.0743e-05,  2.2308e-05,  6.5699e-05,  3.0991e-05, -5.9278e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0238e-04, -1.0614e-01,  3.1437e-05,  1.2538e-05, -3.2990e-05,
         2.4772e-05,  1.7517e-05,  1.3983e-05,  2.1428e-05, -5.4748e-06,
        -2.2243e-05,  8.9507e-06, -1.0583e-05, -3.5534e-06,  3.4042e-05,
        -2.8529e-05, -2.8489e-05, -6.3032e-06,  2.6746e-05,  3.9931e-05,
        -2.0202e-06, -2.5752e-05,  3.7933e-05,  2.9930e-06,  2.2133e-05,
        -1.1768e-07, -1.3169e-06,  1.6851e-05, -9.6917e-06,  1.4680e-05,
         1.2618e-05, -4.9365e-06,  7.5302e-06,  1.5444e-05,  1.4781e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3929e-05, -2.4648e-01, -3.5280e-05, -4.2611e-05,  4.2475e-05,
         3.2213e-05,  3.6305e-05,  1.3624e-05,  3.3124e-05,  3.2061e-06,
        -5.7811e-05,  9.2008e-06,  3.5506e-06, -1.3786e-05, -5.2718e-05,
         3.4237e-06, -5.1793e-05,  2.5251e-05,  5.4034e-05,  4.2817e-06,
        -6.0732e-05, -4.6621e-05,  6.6204e-06,  1.3815e-05,  1.0194e-05,
         2.5930e-05, -3.9174e-05,  1.7488e-05,  7.1895e-06, -2.5984e-05,
        -5.6522e-05, -2.0483e-05, -8.0928e-05, -3.7456e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3713e-05,  4.3421e-01, -3.9546e-05,  2.9811e-05,  3.0901e-05,
         1.0307e-04, -8.8672e-05,  1.4258e-04,  5.2798e-05, -1.2174e-04,
         1.8514e-04,  1.0781e-05, -9.2247e-05, -5.4745e-06, -4.9637e-05,
        -9.5596e-05,  7.9445e-05, -3.7694e-05, -5.1748e-07, -8.6191e-05,
        -6.2590e-05, -5.4931e-05,  5.7399e-05, -9.8060e-05,  5.5874e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1700: [tensor([ 2.8686e-04, -2.1248e-01, -2.9442e-05, -3.4039e-05, -1.0598e-04,
        -4.4040e-05, -3.3670e-05,  4.1378e-05, -2.5075e-05, -8.9307e-06,
         4.5480e-05,  2.3631e-05, -7.2645e-05, -4.6233e-05, -5.1931e-05,
        -8.7335e-05, -6.2075e-06, -2.4640e-05,  8.0351e-05,  5.6279e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8391e-05,  4.0587e-01, -2.0689e-04, -1.9227e-05,  9.2651e-05,
        -1.4156e-04, -9.8844e-05,  5.7487e-05, -1.0208e-04, -1.3808e-04,
         8.9489e-05, -1.5612e-04, -8.6794e-05,  2.0882e-04,  2.4391e-04,
         3.7365e-05,  1.4213e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0003e-04, -1.6957e-01, -9.0681e-05, -3.4961e-05, -2.2701e-05,
        -9.4721e-05,  3.3977e-05, -2.5653e-05,  6.3900e-05, -6.2356e-06,
        -2.9410e-05, -2.2314e-05, -2.9224e-05,  2.0262e-05,  6.1432e-05,
        -1.5206e-05, -4.3729e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7808e-04,  5.0414e-01,  1.3176e-04, -2.5731e-04,  1.3253e-04,
         1.1781e-04,  1.3138e-04,  6.1586e-05,  1.8053e-04,  1.1251e-04,
         2.1117e-04,  2.9915e-05,  2.6899e-05,  8.0615e-05,  7.4068e-05,
         9.8774e-05,  4.8592e-05, -8.9013e-06, -1.1942e-04,  2.3253e-04,
        -1.3459e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3881e-04, -2.3161e-01, -1.4219e-05,  9.3516e-05,  4.8305e-05,
         4.9851e-05, -8.1123e-05,  4.8299e-06, -1.0074e-04,  1.1692e-04,
        -1.9368e-05, -2.4322e-05,  7.0008e-06,  3.3273e-05, -3.2480e-05,
         7.4277e-05,  5.5734e-05, -3.3518e-05,  9.0921e-05,  3.1677e-05,
        -7.0541e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6617e-04,  3.3744e-01,  4.9746e-05,  3.0532e-05, -5.9804e-05,
         6.2401e-05, -3.1216e-05,  5.4300e-05, -2.6880e-05,  3.2821e-05,
        -3.1519e-05, -6.6378e-05,  9.1949e-05, -2.7685e-05, -2.6158e-05,
         6.1356e-05, -4.5250e-05,  7.4456e-05,  9.4322e-05, -3.8187e-05,
         1.8455e-05, -1.0421e-04,  9.4132e-05,  6.5646e-05,  6.4405e-06,
        -7.3107e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5525e-04,  9.9813e-02, -5.3400e-05, -4.5531e-05,  4.9046e-05,
        -7.5733e-05, -4.1782e-05, -1.9869e-05, -9.9278e-05, -7.6241e-05,
        -1.6565e-04,  7.3594e-05, -8.0402e-05,  1.7784e-04, -1.0693e-04,
        -1.6922e-05, -1.2670e-06, -2.6654e-05,  7.3511e-05, -1.5104e-04,
         7.1186e-06,  7.0738e-05, -1.1005e-04, -4.0638e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1764e-05,  5.9581e-02, -2.2885e-05,  6.1617e-06,  1.1466e-05,
        -6.7068e-05,  6.7889e-06,  1.1718e-05,  5.0242e-05, -1.0460e-06,
        -3.4878e-05, -6.9651e-06, -3.8922e-06,  6.0056e-05, -1.5497e-06,
        -9.0795e-06, -3.8874e-05, -2.0499e-05,  2.6464e-05,  8.9064e-06,
         5.3136e-05, -1.7932e-05,  7.6112e-06, -4.2876e-05, -4.4039e-05,
        -2.2920e-05, -4.6941e-06, -2.3427e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0688e-04,  2.1538e-01, -1.7239e-04, -1.3844e-04, -2.8572e-04,
        -1.0366e-04, -1.3835e-04,  1.1074e-05, -3.2306e-05, -3.0064e-05,
         1.9966e-04,  7.6177e-05,  6.4395e-06, -2.5101e-05, -7.0169e-08,
        -9.8138e-05, -1.0515e-04, -1.8808e-04, -6.6839e-05, -2.7613e-04,
         1.0602e-04, -3.3193e-05, -2.9300e-05, -1.6382e-04, -1.1211e-05,
        -2.9489e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8779e-04,  3.4343e-01, -1.8138e-05, -8.9514e-05, -8.7093e-05,
        -1.5158e-04,  4.4941e-05, -2.8556e-06, -8.1447e-05, -9.2015e-05,
         4.8869e-05, -2.7439e-05, -7.5752e-05,  9.7307e-06, -1.0841e-04,
        -1.1688e-04,  3.8827e-05, -1.3643e-04,  3.2991e-05, -9.1984e-06,
         6.4799e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9188e-05,  3.6799e-01, -3.7959e-05,  2.1189e-05, -4.4414e-05,
        -3.4666e-05,  1.4545e-04,  7.3356e-05, -1.1217e-05,  6.3474e-05,
         3.7338e-05, -6.5512e-05,  1.5175e-04, -7.7600e-05, -1.0808e-04,
        -5.9518e-05, -1.7515e-04,  2.8510e-06, -2.0702e-05, -4.6708e-05,
         6.6896e-05, -5.5272e-05,  2.8788e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0980e-04, -1.6724e-01,  1.0014e-04,  7.3197e-06,  9.5213e-06,
         3.9319e-06, -4.4268e-05,  2.7630e-05,  8.1594e-06,  1.5147e-05,
         6.9273e-05, -5.2375e-05,  2.8673e-05,  1.9233e-05,  1.7189e-05,
         3.9838e-06,  6.2586e-06, -5.3034e-05, -5.5010e-05,  1.7314e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1750: [tensor([ 1.6743e-04, -1.5437e-01,  2.0421e-05, -6.6580e-05, -9.8374e-05,
        -1.0811e-04, -7.8786e-05, -1.2876e-04, -4.1292e-06,  1.7435e-06,
         3.5476e-05,  6.9544e-06, -3.3850e-05,  8.5414e-06, -9.1505e-05,
        -9.8565e-05, -1.8551e-04,  4.2343e-05, -2.3238e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5381e-05, -9.6802e-02,  2.1329e-05,  6.3869e-05, -3.5499e-05,
         6.0684e-06, -7.6561e-07, -1.1387e-04,  3.5091e-05, -3.9962e-06,
        -4.3684e-05,  2.4065e-05,  6.7192e-06, -3.5724e-05, -5.2077e-05,
        -7.2667e-05, -1.5168e-04,  4.6183e-05,  1.6148e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6348e-04,  3.4236e-01,  2.1416e-05,  7.0007e-05,  4.1906e-04,
         1.2404e-04, -1.1323e-04,  1.9027e-04, -4.3507e-05, -2.7240e-04,
         1.8799e-04, -2.3513e-04, -5.0647e-05,  1.7291e-04, -2.4715e-04,
        -1.5303e-04, -1.4031e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2662e-05, -2.4809e-01,  4.0090e-05,  8.9885e-05,  1.1381e-05,
         4.1544e-05,  4.4677e-05,  1.3890e-04,  9.8364e-06,  1.7247e-05,
        -2.8945e-05,  1.1367e-05,  7.9568e-05,  8.1487e-05,  9.2869e-05,
         8.8250e-05,  6.5391e-05, -1.2552e-05, -5.3212e-05, -4.8044e-05,
         7.0875e-05,  7.5541e-05,  2.5210e-05,  8.0443e-07,  2.7484e-05,
         1.9119e-05, -1.5350e-06, -1.3063e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1336e-05, -1.3811e-01,  3.0435e-06, -3.0078e-05, -3.4392e-05,
         2.6379e-06, -3.3264e-05, -5.8099e-05, -1.2381e-05,  7.8012e-06,
        -3.8113e-05,  2.4599e-05, -1.8724e-05, -8.4998e-05,  1.7659e-06,
        -6.0865e-05, -3.3398e-05,  4.4743e-05, -5.5308e-05,  5.0854e-05,
        -1.4958e-05, -1.7263e-05, -3.1869e-05, -9.9629e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6847e-04,  3.1436e-01, -1.8921e-05,  4.0374e-05,  3.5520e-05,
         5.7737e-05,  1.9069e-05, -1.7263e-05,  9.0593e-05,  1.4124e-04,
        -1.8243e-05, -3.2975e-05,  6.2588e-05,  1.0477e-04, -1.0017e-06,
         9.8080e-06, -3.8623e-05, -1.0008e-04,  6.9305e-05,  2.7200e-05,
         1.0161e-05, -4.3922e-05, -1.3212e-04,  1.2443e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0045e-04,  4.7182e-01,  1.6493e-05,  1.1101e-04,  1.9048e-04,
        -6.6735e-05,  1.6454e-04,  4.5763e-05,  2.2056e-04, -1.3495e-04,
        -3.9705e-05,  2.4724e-05,  2.1515e-04, -7.8992e-05, -6.3890e-05,
         1.2870e-05,  1.0473e-04, -9.9266e-05,  2.0815e-05,  4.0877e-05,
         8.3587e-05, -7.0424e-05, -6.7502e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0152e-05, -1.9922e-01, -8.4901e-05,  1.0849e-05, -7.7817e-05,
        -6.7463e-05,  9.3275e-06, -6.0324e-07, -1.0659e-04,  5.3137e-05,
        -4.7127e-05, -1.0534e-05, -8.4035e-05,  8.6223e-06, -7.4042e-05,
        -5.0768e-05,  4.9096e-05, -3.2651e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.9378e-04, -2.0458e-01, -1.3111e-04, -6.6315e-05, -9.5303e-05,
         1.0634e-04,  1.5919e-04, -2.7061e-06, -1.0255e-04,  1.3274e-04,
        -1.7580e-05,  4.5352e-05,  2.9200e-05, -7.3943e-06,  8.9046e-05,
        -9.6388e-05, -4.5098e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4573e-05, -1.5193e-01,  7.0517e-05, -6.8151e-06, -8.3412e-05,
         5.7341e-05,  5.3662e-05,  3.5623e-05,  9.0518e-05,  5.1877e-05,
         3.3672e-05, -8.5224e-05,  9.3913e-05, -7.1552e-05, -7.5245e-05,
        -7.0139e-05, -2.7630e-05,  3.7302e-05, -6.8738e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5905e-04,  2.7000e-01, -6.0607e-05, -1.7496e-05, -2.8523e-05,
         1.5138e-04, -6.1060e-05, -1.0307e-05, -2.4995e-04,  3.1209e-05,
         6.5612e-05,  8.7720e-05, -8.8163e-05,  5.5127e-05,  2.3520e-05,
        -6.2129e-05,  5.7942e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5224e-04,  3.7103e-01, -4.2163e-05,  4.7045e-05,  2.7547e-06,
        -1.6017e-04, -7.0257e-07, -5.9970e-05, -7.1444e-05,  3.6229e-05,
         1.8530e-04, -2.2628e-04, -4.2781e-05, -4.3517e-05, -4.7854e-05,
        -1.2320e-04,  3.7526e-06, -5.7387e-05,  8.6831e-06, -1.3576e-04,
        -1.7711e-04,  2.4476e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1800: [tensor([ 1.0415e-05, -1.3698e-01,  9.8659e-06,  5.3369e-06,  4.7352e-05,
        -9.5794e-05,  1.2604e-05, -9.2449e-06,  7.0692e-05, -6.7672e-05,
         4.5890e-06,  1.5236e-04, -3.2446e-05,  4.2056e-05,  2.7489e-05,
         7.8588e-06, -7.8947e-05,  6.7733e-05,  6.9568e-05,  1.7656e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8888e-04,  2.9491e-01, -1.9667e-04,  4.7749e-05,  1.2947e-04,
         1.2808e-04,  1.1882e-04,  4.8301e-05, -1.1483e-04,  1.4079e-04,
        -8.5943e-05, -2.1238e-04, -3.2876e-05,  3.2358e-05,  5.6827e-06,
        -1.7786e-04, -1.8848e-04, -2.6334e-05, -1.1571e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4955e-04, -1.0260e-01, -5.1893e-05,  3.5652e-05, -7.8578e-05,
         4.0092e-07,  1.9572e-05, -6.5548e-05, -8.3575e-05, -3.9423e-05,
        -1.2689e-05, -4.2634e-06, -1.0399e-04, -9.5820e-05, -6.2757e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8220e-05, -1.4379e-01, -2.4130e-05,  2.8629e-05,  5.3126e-05,
        -6.3585e-05,  5.4504e-05, -1.2009e-04,  7.5300e-05,  1.7664e-04,
         5.1481e-05,  3.1972e-05, -8.4800e-05,  1.0628e-04,  5.2348e-05,
         9.2042e-06, -2.9273e-05,  5.1614e-05, -3.3108e-05, -2.4923e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0209e-04, -1.5526e-01, -8.3896e-05, -1.7403e-06,  7.4887e-05,
         1.2752e-04,  2.5608e-05,  8.3037e-05,  1.0609e-04,  2.0224e-04,
        -7.7790e-05,  1.3802e-04,  3.5170e-05, -2.1934e-05, -1.5092e-04,
         6.5815e-05, -7.0681e-05, -4.1584e-06, -5.2839e-05, -5.4213e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4683e-04,  2.2762e-01,  1.4374e-04,  1.5811e-04,  6.3168e-05,
         1.1964e-04,  2.2302e-04,  2.0190e-04, -2.0973e-04,  1.7148e-04,
         1.0964e-06, -7.3874e-05,  7.3024e-06,  8.8077e-06,  4.4946e-05,
        -1.2653e-04, -1.5161e-04, -1.1053e-04, -6.4584e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5123e-04, -1.0767e-01, -2.7517e-05, -6.0647e-06,  8.1298e-05,
         6.6203e-06,  7.6065e-06,  1.4067e-06, -2.8279e-05,  2.6146e-05,
         3.8789e-05, -2.0999e-05,  2.1700e-05,  2.2733e-05,  9.1316e-06,
         5.4134e-05,  3.5559e-05,  3.9605e-06,  3.8324e-05, -1.3776e-06,
         1.5868e-05,  8.6404e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6643e-05,  3.4179e-01,  1.0454e-04, -1.0032e-04, -4.1421e-05,
         1.1554e-05, -1.5856e-04,  4.9374e-05, -1.1399e-05, -8.6260e-05,
         3.1634e-05,  3.4124e-05,  8.0986e-05,  8.1173e-05,  4.3095e-06,
         1.6244e-05, -3.1840e-06, -4.5497e-05, -9.9279e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7715e-05, -1.7726e-01, -7.8189e-05, -6.2785e-05,  3.1703e-05,
        -5.3371e-05,  4.1539e-05,  5.3681e-05,  7.4418e-05,  1.5455e-04,
        -3.9904e-05, -3.6592e-05,  3.4020e-05, -4.1988e-05,  3.6232e-05,
         1.3831e-05,  5.6792e-05,  1.1232e-04,  5.9326e-05,  2.1543e-05,
         4.6724e-05, -5.5569e-06,  7.1092e-05,  2.4531e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0394e-06, -1.0190e-01, -1.4890e-05,  1.1336e-05, -1.5515e-05,
         1.7551e-05,  2.1307e-05,  5.7228e-06,  1.9167e-05,  3.4263e-06,
         1.8630e-05,  3.5856e-05, -3.0323e-05, -7.2281e-06,  4.6579e-05,
        -5.8819e-05,  3.6772e-05,  3.2210e-05,  3.9621e-05, -1.5862e-05,
        -3.7971e-05, -2.6606e-05, -1.4787e-05,  5.9076e-05,  9.7231e-06,
         1.5273e-05, -7.1601e-06, -5.9341e-06, -1.0390e-05,  6.2087e-06,
        -1.3291e-05,  1.0103e-05,  8.0496e-06, -3.0629e-05,  1.9839e-05,
         5.3336e-05, -5.6502e-06,  1.3325e-05,  2.1754e-06, -1.6247e-05,
         4.2400e-05,  6.2600e-05, -2.3052e-05,  3.5026e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5268e-04, -9.6118e-02,  1.3775e-05, -2.3103e-05,  5.2056e-05,
         1.9434e-05,  1.8071e-05,  4.1233e-05,  3.7292e-05,  3.7084e-05,
        -5.0702e-06,  3.2952e-05,  6.1754e-05, -2.0900e-05,  2.3970e-05,
         9.0321e-05, -2.2227e-05,  3.0866e-05,  1.0161e-05, -8.4090e-06,
         1.8742e-05, -2.5300e-05,  3.5622e-05,  3.5306e-05, -6.3103e-06,
         4.9472e-05,  7.2803e-07, -1.9910e-05,  5.5070e-05,  5.0471e-06,
         1.4058e-05,  2.6391e-05, -1.5195e-05,  2.7783e-05,  2.8978e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2996e-04, -1.3627e-01, -4.3595e-05, -3.1352e-05,  9.2817e-05,
        -5.1148e-05,  2.3781e-05, -1.1112e-05,  3.0686e-05,  1.3952e-05,
         2.9190e-05, -4.7439e-05, -5.5931e-05, -1.7034e-05,  1.2323e-04,
        -4.3649e-05, -7.3730e-05, -7.9983e-06, -1.3656e-05, -7.4345e-05,
         6.7542e-05,  1.0820e-05, -3.4023e-05,  4.1334e-05, -2.7242e-05,
        -2.0698e-05,  4.6460e-06,  3.5607e-05, -4.4759e-05,  1.2234e-05,
        -5.0703e-05,  8.8928e-05, -3.5312e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #1850: [tensor([ 2.5744e-05, -2.4768e-01, -3.0746e-05,  2.4990e-06, -3.0982e-05,
         4.5025e-07, -3.4050e-05, -1.4512e-05,  2.4723e-05, -9.5449e-06,
        -1.5216e-05,  1.0433e-05,  2.1336e-06, -3.4533e-05, -2.9879e-05,
        -8.1708e-06, -3.5251e-05, -1.1503e-04, -2.5059e-05,  6.8091e-06,
         6.3986e-05, -5.0023e-05,  3.8271e-06,  9.0040e-05,  2.8913e-05,
         5.9285e-06, -6.2108e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1748e-04, -2.2175e-01,  4.8795e-05, -2.6494e-06, -2.4299e-05,
        -1.8521e-05, -4.5361e-05, -1.2025e-04,  3.8538e-05, -3.3650e-05,
         8.0050e-05,  2.8993e-05,  4.5146e-05,  4.4534e-05, -1.8395e-05,
        -6.4101e-06, -1.3085e-06,  4.3174e-05,  6.2582e-05, -2.3862e-05,
        -1.4629e-05, -2.0228e-05, -6.4346e-05,  8.2488e-06,  1.4116e-05,
        -1.3222e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3892e-04, -2.3566e-01, -1.2261e-05,  8.7579e-05, -1.1421e-04,
        -1.4925e-05, -7.6513e-05, -2.9806e-05,  4.7126e-05,  4.7347e-05,
         6.1841e-05,  8.0637e-05, -6.9942e-05,  1.0448e-04, -4.9152e-06,
        -7.4327e-05, -3.7631e-05, -1.9712e-05,  4.2214e-05, -6.4358e-05,
         6.3456e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6044e-04,  5.2458e-01,  8.4931e-05, -9.5918e-05,  5.9916e-05,
         1.6054e-04, -2.1478e-05,  8.0480e-05, -8.1976e-05,  4.5184e-05,
         3.1199e-05, -1.5750e-04,  5.2607e-06,  7.1223e-05,  4.0321e-05,
         4.1234e-05,  1.2095e-04,  9.3435e-05, -1.5388e-04,  4.4150e-06,
        -3.0591e-05, -1.5690e-04, -8.9766e-06,  2.2517e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1923e-05, -2.2054e-01, -2.0495e-05, -6.6360e-05, -3.5443e-05,
         9.9909e-05,  8.1739e-06,  4.9766e-05, -2.5097e-05,  6.2406e-06,
         5.0366e-05,  9.6992e-05, -4.1112e-05,  6.3193e-05,  4.9046e-05,
        -4.1740e-05,  3.2175e-05,  5.9281e-05,  4.6599e-05,  3.2751e-05,
        -1.1701e-04, -5.8428e-05,  8.4951e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1116e-04,  4.1872e-01,  2.1920e-07,  5.5847e-05, -4.6610e-05,
        -1.3088e-05, -7.8315e-05, -3.4648e-05,  5.7222e-05, -2.3555e-05,
         5.0841e-05, -1.1626e-04, -2.2807e-05, -9.8685e-06, -1.0467e-05,
        -2.2621e-06, -1.0197e-05, -5.5531e-05,  4.3360e-05, -2.1951e-06,
         1.1001e-05, -1.0981e-04,  3.7882e-05,  3.9547e-05, -1.3031e-05,
         4.7732e-05, -7.9872e-05,  4.9196e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2939e-05, -2.1432e-01, -5.9348e-05, -2.6930e-05,  6.9513e-05,
         5.9269e-05,  1.4344e-05,  1.1526e-05, -4.4203e-05,  3.2827e-05,
         3.3580e-06,  1.0093e-05, -3.7156e-05, -3.0924e-06,  1.6689e-05,
        -1.6413e-05, -1.3402e-05,  1.7296e-05, -4.0371e-06, -2.4756e-06,
        -4.1040e-05, -7.8632e-05,  2.0093e-05,  1.6865e-06, -5.0367e-06,
        -1.8242e-05, -5.7678e-05,  3.1563e-05, -1.4984e-05, -1.3221e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0171e-04, -1.6304e-01, -1.4648e-05, -1.3241e-05,  7.6310e-06,
         1.0980e-05,  2.3053e-05, -2.6063e-05,  8.6371e-06,  1.9516e-05,
        -1.6703e-06, -9.0724e-06, -1.2749e-06, -6.1913e-06,  4.7468e-06,
        -3.5396e-06,  1.6081e-05, -1.2804e-05,  1.9514e-05,  1.2749e-05,
        -4.6178e-07, -1.8884e-05,  8.8546e-06, -3.7921e-05,  2.6791e-05,
         4.6916e-05, -1.6911e-05, -7.5089e-07,  7.3241e-06, -1.5020e-05,
        -1.0447e-05,  4.5250e-05, -2.7809e-05, -3.5662e-05, -8.2610e-06,
        -1.4747e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0170e-04,  4.3203e-01,  1.7963e-04,  2.8175e-04, -5.6212e-05,
        -4.8091e-05,  1.5748e-05,  3.9384e-05,  2.7105e-05, -1.6902e-04,
         2.4103e-05, -1.6852e-04,  1.3746e-05, -4.9803e-05, -1.5247e-05,
         1.1367e-04,  5.1992e-05,  1.0157e-04,  2.3488e-05,  8.5011e-05,
         2.9784e-05,  1.4194e-04,  1.0345e-05,  4.2521e-05,  1.4054e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0379e-04, -1.3124e-01,  4.2478e-05,  4.3912e-05,  7.2813e-05,
        -1.5696e-05,  3.5296e-05, -5.6912e-05, -5.5068e-06,  5.1398e-06,
         3.9931e-05,  4.2595e-05, -1.8998e-05,  1.3893e-06,  7.9442e-05,
         8.3093e-06,  7.1217e-06,  1.3317e-05,  2.9789e-05, -5.3098e-07,
         3.0566e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0993e-05,  3.7341e-01, -4.3632e-05,  2.5675e-05, -1.5056e-05,
        -4.6744e-05,  1.7121e-04, -4.9188e-05,  9.2402e-05, -8.7379e-05,
        -2.5651e-06, -7.5900e-05,  1.7446e-05,  8.8657e-06,  4.0304e-05,
         7.6770e-05, -1.0225e-05,  6.0533e-05, -1.0313e-04, -1.8955e-06,
         5.7341e-05,  8.4907e-05, -7.0699e-05,  2.1335e-05, -4.7834e-06,
        -5.5807e-05, -1.8577e-05,  3.7771e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5507e-04, -2.5632e-01, -6.9698e-06,  1.0444e-04,  4.8067e-05,
        -9.5043e-05, -2.4078e-05,  7.0899e-05,  9.7896e-06,  1.8582e-04,
        -4.3643e-05, -6.6859e-05,  7.9856e-05,  4.4761e-05, -2.3623e-05,
        -3.4904e-05, -4.9794e-05, -9.2235e-05, -3.7402e-05, -8.8788e-05,
         1.9801e-05,  3.4096e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1900: [tensor([-1.6710e-05, -1.1123e-01, -1.6457e-05,  3.8352e-05,  2.6142e-05,
         1.2245e-05, -2.5263e-05,  4.3630e-05, -6.4223e-05, -3.7337e-06,
        -4.2517e-05,  5.9010e-06, -1.2092e-05, -7.5993e-05, -1.3295e-06,
         4.6715e-06,  1.5588e-05, -1.0566e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1444e-04,  4.0592e-01, -6.2241e-05, -4.4169e-04, -2.5549e-04,
        -1.1308e-04,  3.9332e-04, -2.5220e-04,  1.0043e-04, -6.4377e-05,
        -1.1932e-04, -5.6232e-05,  2.6951e-04,  1.1201e-04, -3.5607e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3761e-05,  3.4540e-01, -1.1564e-04, -9.4365e-05, -1.7959e-04,
        -2.1463e-04,  5.3904e-05,  3.3456e-04,  1.3572e-04, -9.7440e-05,
         1.4529e-04, -2.3880e-05,  6.4406e-06, -1.1388e-04,  2.7544e-04,
         9.6697e-05,  3.8274e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8719e-04, -2.3712e-01,  4.0335e-05,  3.8936e-06, -1.0820e-04,
         4.9703e-05, -2.1677e-05, -4.7343e-05, -3.0096e-05,  1.1228e-04,
        -5.4630e-05, -2.7543e-05, -1.2992e-06,  4.2032e-05, -1.0154e-04,
         1.9564e-05, -1.0480e-05,  3.4852e-05,  2.3536e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0925e-06, -1.3509e-01, -2.5784e-05,  1.1362e-05, -2.8675e-06,
         2.6254e-05, -5.7821e-06,  3.3242e-05, -2.3759e-06,  7.4015e-05,
         4.6767e-05, -9.3692e-07,  2.0368e-06, -4.1057e-05,  1.8042e-05,
        -2.9696e-06, -1.9001e-05, -1.2936e-06, -2.2793e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4533e-04, -2.0631e-01, -2.3226e-06,  2.2549e-05,  2.7607e-05,
         5.1641e-05, -4.5738e-05,  1.2316e-05,  1.0093e-05, -3.0419e-05,
         1.7880e-05, -1.9074e-05,  3.4916e-05,  1.8738e-05, -5.2122e-05,
        -1.9788e-05,  2.3919e-05,  7.7032e-05, -3.7484e-05,  1.5842e-07,
        -7.3021e-07,  4.0969e-05, -2.1827e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9063e-05, -9.2382e-02, -2.9487e-05,  2.1741e-05, -5.6984e-07,
         5.2167e-05,  5.8265e-05,  3.7894e-05, -6.0240e-05, -3.0159e-05,
         2.7030e-05,  4.2902e-07, -8.4789e-06, -9.6475e-05,  1.4927e-05,
         3.9534e-05, -1.4312e-05, -4.6124e-05,  1.2341e-05, -1.3450e-05,
        -4.2672e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0058e-04,  1.5468e-01, -8.4003e-05, -1.2701e-04,  1.1684e-04,
        -2.1976e-04, -8.3813e-05,  3.6079e-05,  4.1594e-05, -4.7533e-05,
        -1.6608e-05, -1.5076e-05, -3.2732e-05,  5.3501e-05,  1.0773e-05,
        -3.1992e-05, -1.0742e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9831e-04,  2.0996e-01,  1.0997e-04, -6.3927e-05,  1.4207e-04,
        -1.8482e-04,  1.8757e-05, -7.7177e-05, -7.5905e-05, -9.6960e-05,
         1.8157e-05, -7.3965e-06, -1.3318e-04, -6.0162e-05, -9.3661e-05,
         1.6104e-05,  6.3693e-05,  1.0784e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5082e-05, -1.7712e-01, -2.5225e-05,  1.0886e-05,  2.1773e-05,
         4.3607e-06, -7.2367e-05, -1.6256e-07, -5.0404e-05, -2.9593e-07,
         4.1226e-05, -5.2956e-05, -1.7757e-05, -4.5883e-06, -2.2767e-05,
        -4.8267e-05,  1.8270e-05, -2.8234e-05,  1.5573e-05, -2.7777e-05,
         3.3225e-05, -2.8704e-05,  4.2778e-05,  3.1586e-05,  1.2653e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2571e-04, -1.4454e-01,  2.0745e-05, -2.4225e-06,  2.6912e-05,
         6.4783e-05, -5.9162e-06,  1.0130e-06,  2.9426e-05,  4.7188e-05,
         2.2329e-05,  5.1934e-05,  1.8683e-05,  6.2887e-05, -1.5519e-05,
         5.9735e-05, -1.4459e-05, -3.7800e-05,  5.5228e-05, -7.8455e-06,
         5.9305e-05,  2.2092e-05,  1.3298e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1821e-05,  2.7398e-01,  4.0285e-05, -4.4740e-05, -5.5468e-05,
        -1.0969e-05,  7.0838e-06, -8.3099e-05, -7.2143e-06, -6.7026e-05,
        -2.7508e-05, -1.6663e-05, -4.7224e-05, -5.7942e-05, -3.4836e-05,
         4.6412e-05, -1.2605e-04,  5.0996e-06,  3.3667e-05,  2.4227e-05,
         3.7506e-05,  4.6667e-05,  1.2444e-06,  1.0481e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #1950: [tensor([ 1.3082e-04,  2.2895e-01,  6.5279e-05,  3.6440e-05,  1.3116e-04,
        -4.5494e-05, -7.7377e-05,  1.3215e-05,  1.1100e-04,  4.4449e-05,
         9.1940e-05,  6.7292e-05, -3.1393e-05, -9.5515e-05, -1.0556e-04,
        -3.6193e-05, -1.0387e-04,  1.3816e-05, -2.5884e-05, -9.7242e-06,
         6.1024e-05, -4.5948e-05, -5.3503e-06, -4.7666e-06, -5.2285e-05,
         2.2400e-05, -1.5221e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0107e-04, -8.8094e-02, -6.8675e-06,  5.9805e-05, -1.7181e-05,
        -4.9141e-05,  5.1999e-05,  4.1601e-06, -2.5236e-06,  1.9807e-05,
        -5.3524e-05,  5.0892e-05, -1.8867e-05,  1.9431e-05,  2.9523e-06,
         2.5300e-05,  1.2667e-06,  2.0839e-05,  4.1784e-05,  5.0395e-05,
        -2.4852e-05,  1.1746e-05, -6.0839e-05, -2.6282e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.2398e-05, -1.1697e-01, -6.1169e-06,  1.6469e-05, -1.7938e-05,
         8.0211e-06,  4.5900e-05,  6.6525e-05, -1.5854e-05,  2.2821e-05,
        -7.8246e-05,  1.8306e-05, -1.9628e-05, -8.9580e-06,  1.7207e-05,
        -5.4363e-05, -2.6939e-05, -1.8836e-06,  9.2263e-07,  2.9577e-05,
         2.8064e-05, -2.3140e-05,  2.1942e-05, -2.3355e-05,  9.2482e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3784e-04, -1.8345e-01,  6.7149e-06,  1.6633e-04, -8.6005e-05,
        -7.8949e-06,  9.3858e-06, -4.7807e-05,  1.4514e-04, -1.0743e-04,
        -8.9314e-06,  8.9148e-05,  2.4063e-05, -9.5348e-05,  1.2028e-04,
         3.2648e-05,  4.5274e-06,  1.0514e-04, -6.2277e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8559e-04, -1.0784e-01,  1.1031e-05,  1.9798e-05, -9.5380e-06,
         1.4384e-06,  2.5115e-05, -1.0234e-05, -4.6030e-05,  9.9830e-06,
        -6.7241e-06,  7.2258e-07, -2.4341e-05, -2.6428e-05, -1.3028e-05,
        -6.5161e-06,  4.6699e-06, -1.2272e-05, -3.4566e-05, -7.9118e-06,
         5.3617e-06, -1.0976e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1789e-04,  2.2158e-01, -1.2486e-05, -3.1975e-05, -9.3566e-05,
        -3.3761e-05,  4.3546e-06,  4.6535e-05,  7.6277e-05,  2.4747e-05,
        -1.9753e-05,  3.2690e-05, -3.3191e-05, -2.4752e-05,  6.0337e-05,
        -3.4514e-05, -6.6637e-06, -1.8229e-05, -4.4943e-05, -2.5326e-05,
         2.0932e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7689e-05, -1.6499e-01,  1.2484e-04, -2.8892e-05, -6.5359e-06,
         3.0237e-05,  1.3682e-04, -2.8882e-05,  5.6612e-05, -3.6491e-05,
        -2.8992e-05, -3.5149e-05,  3.5813e-06,  7.7943e-06, -2.1442e-05,
         2.2668e-07, -2.6658e-05, -2.0538e-05,  4.3876e-05,  1.2599e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1446e-04,  2.6751e-01, -1.6355e-05, -3.0930e-05, -8.2341e-05,
        -4.5367e-05, -5.9462e-05,  1.1731e-04, -7.1528e-05, -1.2131e-05,
        -8.1309e-05, -4.2060e-05,  5.8208e-06,  2.8531e-05,  6.5374e-05,
         8.2130e-06, -8.4358e-05,  6.5370e-05, -9.4419e-05,  4.6737e-05,
         2.5767e-05,  6.4407e-05, -3.8366e-05,  8.1661e-06,  8.6044e-06,
         1.2897e-05, -9.4520e-05,  5.6362e-05,  5.1830e-05,  1.0876e-04,
         1.0593e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1366e-04, -4.9254e-02, -1.2659e-05, -2.5559e-07,  3.9633e-06,
        -1.8703e-07, -1.7786e-05, -2.0894e-06,  9.3171e-06,  2.7417e-05,
        -2.9686e-06, -1.1370e-05,  1.1444e-06,  3.8866e-06,  1.0866e-05,
         1.8094e-05, -1.7918e-05, -2.5040e-05, -1.1039e-05,  1.5474e-05,
         1.1361e-05, -1.0291e-05, -1.5980e-05, -2.7551e-06, -3.5211e-07,
         1.2898e-06,  6.5224e-06, -8.2059e-07, -6.9211e-06, -3.4659e-06,
         8.5890e-06, -3.3564e-07,  1.0024e-05,  9.2155e-06,  2.6960e-06,
        -5.1682e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1857e-05, -9.9776e-02, -1.8786e-06,  2.3188e-05,  3.9148e-05,
         3.1354e-05,  2.7999e-05,  1.3896e-05,  1.3761e-05, -1.3131e-05,
         3.2256e-05, -2.8665e-05, -2.4357e-05, -1.5475e-06,  1.2358e-05,
         1.4651e-05, -4.9177e-06,  6.3578e-06, -3.1840e-07, -1.0099e-05,
         3.8436e-05,  1.2724e-05, -2.2611e-05,  8.3719e-06,  3.1527e-05,
        -7.0547e-06,  2.9185e-05, -2.5776e-05, -1.5351e-06,  3.8890e-06,
         4.5536e-06, -1.2725e-05, -9.2360e-06, -4.1344e-06, -1.8248e-05,
        -1.5898e-05, -3.0688e-05,  9.4699e-06, -1.6532e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1460e-04, -2.3726e-01, -3.7639e-05, -4.2199e-05,  2.5643e-05,
         3.2217e-06, -3.9124e-05, -2.7008e-05, -1.2903e-05, -8.1625e-06,
        -2.6000e-05,  1.1385e-05, -3.2415e-05, -1.0763e-05,  1.2766e-05,
         1.1537e-05,  3.8752e-05, -1.1104e-04,  5.0581e-07,  1.1053e-04,
         7.3438e-05, -1.1914e-05, -4.1773e-05, -1.1834e-05,  1.6352e-05,
        -1.9337e-05, -1.4764e-06, -3.4243e-05,  3.4479e-05,  4.6926e-05,
         1.2764e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2765e-05, -1.8088e-01, -4.2122e-05, -3.8657e-05, -4.7156e-05,
        -4.5708e-05, -1.0581e-05,  3.9291e-05, -5.2444e-05, -2.9429e-05,
        -2.6396e-05, -1.3750e-05, -1.1551e-05, -8.2391e-06, -2.3898e-06,
        -6.0741e-06,  1.2441e-05, -3.6923e-05, -2.3248e-05,  8.1329e-06,
         1.4695e-05,  6.1651e-07,  1.7844e-05,  1.2867e-05,  2.7488e-05,
        -4.4966e-06, -1.2632e-05, -2.8109e-05, -3.3940e-05, -4.2789e-05,
        -7.1810e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2000: [tensor([ 3.5078e-04, -1.1468e-01, -3.6947e-07,  8.4423e-06,  1.0887e-04,
         5.0183e-05, -3.4466e-06, -3.0942e-06,  6.2378e-05, -6.0857e-05,
         6.4877e-05, -1.7033e-05,  3.6245e-05, -5.9312e-05,  1.0967e-04,
        -1.7666e-05, -8.1043e-05,  3.9437e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5477e-05,  1.6841e-01, -4.2022e-05, -9.7003e-05, -4.9656e-05,
         2.7015e-05, -9.3490e-05,  2.7460e-05, -7.6368e-05,  4.8128e-05,
        -5.7813e-05,  3.2121e-05,  5.3433e-05, -2.0285e-05,  1.0007e-04,
        -6.6469e-05, -1.7656e-04,  1.9128e-05, -8.6015e-05,  1.8173e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6148e-05,  9.9965e-02,  5.1977e-05,  4.4791e-05, -1.3096e-05,
        -4.5666e-05, -1.1948e-05,  4.6170e-05,  2.2639e-05,  1.1651e-05,
         8.0852e-06,  2.2119e-05,  2.1285e-05, -8.4987e-05,  1.9537e-05,
        -3.0863e-05, -8.6785e-05,  2.8990e-05,  1.2945e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.1378e-05,  5.8086e-01,  4.3865e-04,  1.4780e-04,  1.3398e-04,
        -3.7900e-05,  2.2633e-04,  2.1321e-04, -2.4085e-04,  2.6860e-05,
        -1.4972e-04, -6.1265e-05, -8.4050e-05, -4.2902e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6653e-05, -1.7481e-01,  6.9271e-05,  7.7191e-05,  4.4733e-05,
        -7.8401e-05, -6.0518e-05, -4.1306e-05,  3.1350e-05,  8.3840e-05,
         4.8779e-05,  5.3179e-05,  1.2875e-05,  1.3246e-05,  9.0370e-06,
         8.4171e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6406e-05, -2.0176e-01, -7.7342e-06, -8.7344e-05, -8.8277e-05,
         3.6352e-05, -2.5532e-06, -3.5882e-05,  2.2834e-05, -1.4597e-05,
        -9.1661e-05, -3.1796e-06,  2.4928e-05, -4.0671e-05, -2.8811e-05,
         5.3336e-06, -1.7859e-05,  4.9757e-05, -5.8966e-05,  5.0301e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6650e-04, -1.4107e-01, -3.4845e-05, -9.3548e-06,  2.2690e-07,
        -1.2583e-05,  4.4127e-05, -2.4936e-05, -6.8500e-06,  1.5316e-05,
        -1.5848e-05, -7.8004e-07, -2.5708e-05, -5.5238e-05,  1.5051e-05,
        -3.5152e-06,  5.8674e-05, -2.1981e-05,  3.6383e-05,  8.7828e-05,
        -2.2484e-05,  3.1816e-06, -7.3077e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7981e-05, -1.8571e-01, -2.7818e-05,  2.8618e-05, -1.3686e-05,
         9.5383e-06, -5.5257e-05, -1.2450e-05, -3.3755e-05, -3.6308e-05,
        -2.1764e-05, -1.7457e-05,  2.3124e-06,  5.1650e-06,  3.9621e-05,
        -4.2628e-05,  8.3543e-06, -9.4259e-06, -3.8725e-05, -9.4159e-06,
        -1.0439e-06,  1.2323e-05,  4.5269e-05, -1.3475e-05,  1.8941e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2802e-05, -1.8933e-01, -2.9086e-05, -7.5038e-05, -6.8739e-05,
         1.4023e-05, -3.0952e-05,  5.6504e-05, -3.8633e-05, -3.9601e-05,
        -1.1200e-04,  4.9699e-05, -6.1772e-05,  1.2109e-05, -1.4974e-05,
         6.2183e-05, -1.3806e-05, -1.6732e-05,  7.1880e-06,  8.7715e-05,
         7.7168e-06, -5.9769e-06, -2.6193e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2978e-05, -8.7966e-02,  1.4662e-05, -2.0032e-05, -1.1371e-05,
        -3.1363e-05,  7.2055e-06,  1.2524e-05, -4.4505e-05,  3.5385e-06,
        -2.4249e-05, -7.5590e-06, -2.2631e-05, -9.0251e-06, -1.2905e-05,
        -5.9150e-06, -1.2257e-05, -1.6087e-05,  1.7697e-06, -2.8501e-05,
         5.0623e-06,  3.3353e-05,  2.6170e-06, -1.5415e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0609e-04,  4.5380e-01,  5.2361e-05,  2.7935e-05,  2.7408e-04,
         2.8299e-04, -2.8696e-05, -4.5023e-06, -5.4728e-05,  1.1779e-04,
         5.4277e-05,  1.5946e-04,  1.0705e-04, -5.2984e-05,  5.7113e-05,
        -3.6866e-05,  1.0176e-04, -2.8662e-05, -1.0827e-04, -2.7764e-05,
         1.9813e-04, -1.7295e-05, -8.3765e-05, -9.0999e-05, -5.3371e-05,
        -1.4347e-04,  2.5062e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0646e-04, -2.2798e-01, -4.4018e-05, -4.6544e-05,  4.6922e-06,
         1.7632e-05, -4.6046e-06, -2.9731e-05,  6.2703e-05, -1.5415e-05,
         1.9841e-07, -5.6089e-05, -2.3431e-05, -1.3907e-06,  3.4552e-05,
        -3.5031e-05, -1.6239e-05,  3.8405e-05, -5.8442e-06, -2.6869e-05,
         1.1032e-05,  2.7576e-05,  4.4861e-07, -2.8638e-05,  5.0062e-05,
        -1.3873e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2050: [tensor([ 9.5653e-05, -1.6402e-01, -4.2801e-05,  7.1327e-06,  4.5673e-05,
         5.3276e-05, -1.7379e-05, -3.0498e-05,  6.2860e-05,  2.3267e-05,
        -5.0005e-05,  5.6936e-05, -4.5008e-05,  6.2236e-05, -2.1670e-05,
        -5.7594e-06, -1.3700e-05,  1.4098e-05,  8.0783e-05,  5.2121e-05,
         4.2153e-05,  9.2420e-05, -1.4133e-05, -1.9127e-07,  4.0581e-06,
        -2.0383e-05,  9.7987e-06,  1.5274e-05,  2.5413e-05,  1.3258e-05,
         1.9657e-05, -7.4855e-05,  8.0420e-05,  2.7931e-05,  3.9460e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2201e-05, -1.0166e-01,  4.3178e-05,  3.6197e-05, -1.5956e-05,
         3.4543e-05, -9.9031e-06,  5.4661e-06,  4.2808e-05,  3.9046e-05,
        -2.1930e-05,  4.5240e-05, -6.4507e-05,  9.6266e-06,  1.4261e-05,
         9.7774e-05, -1.8326e-05,  1.2291e-05, -5.6012e-05, -4.0226e-05,
         3.9026e-05,  2.0429e-05,  4.6973e-06,  3.3250e-05,  5.4610e-06,
         5.9617e-06,  4.4903e-05,  1.5440e-05, -4.5969e-05,  7.2168e-06,
        -5.4664e-05,  2.5231e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0154e-04,  1.4184e-01,  1.1959e-05,  3.6785e-05,  7.6912e-05,
         1.7266e-05,  6.6038e-05,  1.0607e-04,  5.7746e-05, -3.7348e-05,
         1.4894e-05,  1.7471e-05,  2.1128e-05,  6.5161e-06, -8.5526e-06,
        -4.2138e-05,  5.5889e-05,  3.6361e-05, -1.0328e-05, -8.7542e-06,
         1.9048e-05, -4.7395e-05,  2.3519e-05,  4.6129e-05,  7.3815e-06,
         4.5609e-05, -6.0757e-05, -8.9525e-05,  1.1508e-05,  1.9230e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4852e-05,  1.3200e-01, -1.4011e-05,  1.1547e-05,  3.4631e-05,
        -3.0902e-05,  4.4314e-05,  3.4778e-05, -7.0272e-06, -1.2804e-05,
        -2.7597e-05, -3.2657e-05,  1.8301e-05, -5.4506e-05, -4.4210e-05,
         1.3432e-05, -2.2634e-05, -3.7189e-05, -5.6770e-05, -6.2482e-05,
        -3.1867e-05, -3.8366e-05, -3.1067e-05,  9.8243e-06,  1.5356e-05,
        -3.3943e-05, -3.7239e-05, -7.8216e-06,  2.7710e-05, -2.4497e-05,
         1.4057e-05, -6.8224e-05, -1.2418e-05,  5.0427e-07,  3.3193e-05,
        -2.5723e-05, -2.9889e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0350e-04,  1.6690e-01, -8.6123e-05,  1.6639e-05, -2.5014e-05,
        -1.3472e-04,  6.4145e-05,  1.0781e-04,  4.5132e-05, -7.0506e-05,
        -4.6928e-05,  4.4235e-05,  3.0427e-05, -1.1715e-04, -5.1994e-05,
        -5.0463e-05, -2.2453e-05,  4.3305e-05, -7.9784e-05,  4.9448e-05,
        -5.0948e-05, -1.6249e-05, -5.5894e-05,  2.0293e-05,  4.4997e-06,
         3.5958e-05,  7.0722e-06,  1.8994e-05, -6.1284e-06,  6.1598e-05,
        -5.6720e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.8393e-05,  2.5561e-01, -1.3112e-04,  9.8952e-07,  8.0659e-05,
        -6.4102e-06,  4.3613e-05, -7.1556e-05,  7.9167e-05,  2.0374e-05,
         9.3699e-05,  1.6401e-05,  6.7554e-05,  5.5121e-06, -6.0079e-05,
        -1.0712e-04, -3.4110e-05,  6.3077e-05, -1.0366e-05,  4.9211e-06,
         4.9804e-05,  2.6525e-05, -7.3979e-05,  8.6526e-05, -2.7026e-05,
        -7.3311e-05,  3.2335e-05,  5.0132e-05,  3.8245e-05,  9.1445e-05,
         2.7164e-05, -2.4224e-05, -2.0081e-05,  2.5372e-05, -1.1608e-04,
         5.1261e-05, -7.3835e-05,  7.0702e-05, -1.0292e-04,  4.7331e-05,
        -4.0445e-05, -9.2633e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2297e-05,  2.6671e-01, -2.4593e-05, -7.7000e-05,  4.5364e-05,
         1.6025e-05, -4.4801e-05,  3.2019e-05, -7.7248e-05, -2.1909e-04,
        -1.5593e-04,  2.2773e-05,  1.6164e-04, -1.2548e-04,  1.1951e-04,
        -6.6345e-06,  8.5006e-05, -4.8285e-05, -5.5000e-05, -7.2618e-05,
         3.6707e-05, -1.8381e-04, -8.8244e-05,  1.8837e-05, -1.8031e-07,
         4.2571e-07,  1.0691e-05, -6.1091e-05,  6.5628e-05, -1.0331e-04,
         5.2379e-06,  4.4068e-05,  3.0071e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3340e-04,  2.1476e-01, -3.8697e-05, -4.5694e-05,  4.7824e-05,
        -1.6219e-04, -5.3472e-05, -4.5811e-06,  3.9042e-05, -2.2460e-06,
        -1.3339e-04, -3.5763e-05,  1.2355e-04, -1.6830e-04, -1.1837e-04,
        -5.2145e-05, -6.6118e-05, -7.0148e-05, -6.6234e-05, -7.0574e-05,
        -7.6534e-05,  2.0474e-05,  1.2564e-05, -1.9053e-05, -6.1285e-05,
         8.1253e-05,  3.1825e-05, -9.8367e-07,  3.3582e-05, -9.6419e-05,
        -9.9093e-05,  4.4360e-05, -5.8391e-05, -6.7093e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6805e-06,  1.3326e-01, -7.6739e-05,  2.8047e-05, -4.5539e-06,
         5.6882e-05, -4.2518e-05,  1.2729e-05,  3.3444e-06,  2.7401e-06,
        -3.4999e-05,  8.1631e-06,  1.2386e-05, -3.1534e-05, -4.9634e-05,
        -2.8177e-05,  4.0286e-06, -3.6142e-06, -4.4771e-05, -2.5106e-05,
        -1.1156e-04,  2.9958e-05, -1.2921e-06, -1.5291e-05, -4.5807e-05,
        -2.2814e-05, -7.3474e-05, -3.3246e-05, -2.1189e-05,  4.4969e-05,
        -2.4071e-06,  3.2051e-05,  4.0633e-05,  1.8939e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0040e-06,  3.0165e-01, -2.5011e-06, -1.1204e-05,  1.9060e-05,
         2.1426e-05,  1.1961e-04, -2.7844e-05, -3.6559e-05, -4.5081e-05,
        -1.1091e-05,  1.4660e-05,  1.1214e-04, -3.3681e-05, -2.6128e-05,
        -5.8849e-05,  1.2732e-04,  1.5888e-05,  4.4097e-05,  1.4885e-06,
         6.5078e-05, -3.7131e-05, -1.7886e-05, -8.0259e-05,  2.7860e-05,
        -6.7478e-05, -3.1437e-05, -4.3334e-05, -1.7777e-05, -8.3588e-06,
        -6.8969e-05, -1.2988e-04, -9.6155e-05,  1.1848e-04,  9.4530e-06,
        -3.9090e-05, -1.6228e-04, -9.7986e-05,  8.2976e-05,  1.3872e-05,
        -1.6825e-06, -1.5632e-04,  2.3838e-05, -7.5981e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9065e-04, -9.3454e-02,  2.2837e-05,  9.6587e-06, -2.6518e-05,
        -1.1216e-06, -1.3558e-05, -1.1706e-05, -2.1204e-05, -4.5163e-06,
         3.4095e-05,  5.0066e-06,  3.7392e-05,  1.0237e-05,  1.7071e-06,
        -3.4557e-05, -9.1577e-06, -2.6481e-05, -2.4295e-06, -4.8763e-05,
         1.3382e-05, -4.2046e-05, -1.7392e-07, -1.8891e-05,  1.2684e-05,
         1.0512e-05, -2.9892e-05,  1.5796e-05, -2.5525e-06, -1.4801e-05,
        -5.7927e-05,  2.2084e-05,  5.9492e-06, -3.1757e-05, -2.8548e-05,
        -3.3521e-05, -2.3622e-06,  6.2848e-06, -6.3839e-06, -3.1333e-05,
         3.1624e-05,  1.1098e-06, -2.4857e-05,  7.2875e-06, -8.6885e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8694e-04, -1.6393e-01,  2.6907e-05,  1.8257e-05,  9.4748e-05,
         2.7072e-06,  1.8160e-05, -8.5131e-05,  1.0272e-05,  9.8493e-05,
        -3.2926e-05,  3.8357e-05,  3.9213e-05,  1.0137e-05,  4.2073e-06,
         7.2121e-05, -2.6009e-05,  1.4174e-05,  2.7123e-05, -4.2311e-05,
         6.1822e-05, -4.7691e-05, -1.8142e-05, -1.1381e-04, -7.5227e-06,
         4.3698e-06, -1.4731e-05, -1.1714e-05, -5.6739e-05,  2.9798e-05,
        -4.2924e-05,  4.5132e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2100: [tensor([-3.5565e-05, -1.3264e-01,  8.8394e-07, -7.5471e-06, -1.8932e-05,
         6.2315e-05, -7.9082e-06, -2.2186e-05, -6.5170e-06, -5.3504e-06,
         1.3429e-05, -2.5919e-07,  3.1266e-05, -5.3474e-06, -1.4121e-05,
         5.7259e-06,  8.1011e-05,  1.5791e-06, -4.9274e-06, -2.5242e-05,
        -5.9910e-06,  4.7957e-06, -5.7858e-06,  1.0766e-05, -6.8933e-06,
        -6.3447e-06,  1.6493e-06, -4.3807e-06, -1.6543e-05, -4.1499e-05,
         1.2929e-05, -1.6329e-05,  2.0874e-05,  2.0052e-05, -1.8307e-05,
         2.2834e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8851e-05, -1.7518e-01, -1.9649e-05,  1.5134e-05,  2.4978e-05,
         5.1915e-05,  8.0869e-06, -4.3522e-06,  1.7519e-05, -2.3021e-05,
        -1.6003e-05, -4.7655e-06,  5.1036e-05,  1.2228e-06,  2.0075e-05,
        -1.6920e-05,  4.1189e-05, -9.1387e-06, -1.8909e-05,  1.3832e-05,
         1.5834e-05, -2.4275e-05, -1.3541e-05, -3.4061e-06,  1.2911e-06,
         5.0628e-05,  1.4358e-05, -2.4581e-05, -1.5822e-05,  5.3164e-06,
         7.4511e-06, -5.1551e-06, -2.9573e-05, -9.3378e-06, -9.8382e-07,
         1.7023e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3253e-05, -1.3972e-01, -1.4411e-05, -6.3614e-06, -3.9965e-06,
         8.2090e-06,  1.2305e-05, -2.6099e-05,  1.5912e-05,  1.5584e-05,
        -2.6536e-05, -2.2135e-05, -1.3892e-06, -2.0417e-05, -8.1597e-06,
        -1.1735e-05,  6.6813e-06,  2.8002e-05,  3.9019e-06, -2.7976e-05,
         1.1669e-06, -4.4135e-05, -4.8146e-06, -3.5946e-05,  1.9871e-05,
         2.1681e-06, -2.4958e-05,  8.3453e-06, -6.3044e-06,  1.6951e-05,
         2.4543e-05, -7.4301e-07, -6.3891e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5608e-05,  2.6691e-01, -1.6145e-05, -2.0894e-06,  1.3610e-05,
         2.0368e-05, -5.8644e-07,  1.7022e-05,  3.6082e-05, -6.1723e-05,
         1.1530e-04, -4.4459e-05,  8.2559e-05, -1.0215e-05, -2.3345e-05,
         6.3582e-05,  3.7586e-06,  6.2604e-05,  1.6512e-05,  4.0999e-05,
        -4.2896e-05, -3.8980e-06,  9.0402e-06, -2.1411e-05,  8.3511e-06,
        -2.4049e-05, -5.1524e-05, -4.0399e-06,  1.5545e-06, -1.8468e-06,
        -8.3223e-05,  1.8036e-05, -4.8778e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6429e-05,  3.3676e-01, -2.0289e-05,  4.9342e-05, -6.1302e-05,
         6.0226e-07,  9.4677e-06, -1.1088e-05, -5.3347e-05, -3.9597e-05,
        -1.1760e-05, -6.9065e-05, -1.0873e-04,  1.9455e-05,  3.1106e-05,
         4.7612e-05, -4.3981e-05,  1.8426e-06, -4.1161e-05, -4.0202e-05,
         9.1767e-05,  5.5246e-06, -1.4585e-05, -3.3943e-07,  5.0055e-06,
         1.8719e-05,  7.9431e-07,  2.0888e-05, -1.0145e-04, -1.5050e-05,
         3.6063e-05, -8.6280e-05, -4.0439e-05, -1.8760e-05, -8.6593e-06,
        -1.7805e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2187e-05, -2.1898e-01, -1.9721e-05, -3.7371e-07, -1.4590e-05,
         4.2970e-05, -4.2673e-05, -1.6451e-05,  1.4736e-05,  1.6256e-05,
         4.5535e-05,  4.7886e-05,  4.8046e-05, -5.6839e-05,  1.1755e-05,
        -3.1926e-05,  3.4014e-05, -5.9961e-05, -5.7538e-05,  1.6920e-06,
         2.1897e-05, -2.9958e-05,  2.4954e-05, -3.4528e-05,  1.7573e-05,
        -3.8001e-08,  1.3026e-05,  1.3762e-05,  9.3314e-06,  1.4158e-05,
        -1.7853e-05, -1.3376e-06,  2.8395e-06,  3.6079e-06, -1.6203e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9837e-05,  1.6045e-01, -6.6618e-07,  4.1852e-06,  6.6479e-06,
        -1.2107e-05,  7.3943e-06,  1.9821e-05, -2.3589e-05, -3.2822e-05,
        -8.3022e-06, -2.0189e-05, -2.5135e-05, -5.3533e-06,  1.0367e-05,
        -4.2239e-06, -2.0730e-07,  1.6877e-05,  1.1010e-05, -1.7254e-06,
        -1.0720e-05, -1.0297e-05, -1.6602e-06, -3.7472e-05,  1.4474e-05,
         1.9403e-05, -2.8390e-05, -1.4862e-05,  1.5907e-05, -9.5923e-06,
        -2.2664e-05, -1.4471e-05, -3.2274e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8814e-05,  3.5706e-01, -8.3949e-05,  2.8485e-05,  6.7976e-05,
         5.4500e-05,  2.1698e-05,  2.4095e-05,  1.7943e-05, -3.9592e-05,
        -3.8016e-06, -4.0797e-05,  4.7626e-05, -4.3728e-05, -2.7674e-05,
         1.5840e-05,  8.3859e-06,  1.1220e-05,  3.1827e-05, -7.2372e-05,
         1.6530e-05, -1.7757e-05, -5.3311e-05, -1.1851e-05,  3.1291e-05,
        -4.9822e-05, -4.6390e-05, -1.4083e-05,  2.6085e-05,  3.8614e-05,
         1.7035e-05,  6.8687e-05,  1.6716e-05, -3.9000e-05, -3.7989e-06,
        -4.1020e-07,  5.1012e-05,  9.2229e-06,  9.7762e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0463e-05,  6.0215e-01, -5.9898e-05, -6.8396e-05,  9.0932e-05,
         2.9220e-05, -1.8728e-05,  1.8013e-04, -1.5522e-05,  7.0390e-05,
        -8.4248e-05,  3.9587e-05,  9.1981e-05,  4.8885e-06, -1.4985e-04,
         1.6050e-04, -7.4274e-05,  8.1865e-05,  7.4575e-05,  7.7640e-05,
        -5.1317e-05,  6.1267e-05,  9.3149e-05, -8.5382e-06, -4.7884e-05,
        -1.3265e-05, -8.9018e-05,  1.1960e-04,  7.3432e-05,  1.2652e-04,
        -1.0055e-04,  3.6844e-05,  5.0968e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2326e-04, -1.5406e-01, -4.4611e-06, -1.4897e-05,  1.5525e-05,
        -2.1006e-05, -1.4262e-05, -2.3669e-05, -2.3590e-06, -9.8751e-06,
        -4.4015e-05, -7.3399e-06,  2.6571e-05, -3.8976e-05,  3.4132e-06,
         2.2158e-05, -2.3220e-05,  1.1550e-05, -3.1957e-05,  1.8739e-05,
        -2.3419e-05,  1.2621e-06, -6.4883e-06,  2.6697e-05,  6.8642e-06,
         3.5584e-05, -8.9719e-06,  1.3163e-05, -1.3768e-05,  2.7048e-05,
         2.6375e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.7073e-05, -1.4744e-01,  3.3877e-06,  1.4877e-05,  1.5947e-05,
         5.7852e-05,  1.1209e-05,  8.9895e-06,  3.3775e-06,  1.3188e-05,
        -8.9456e-06, -3.0434e-06,  3.4319e-06, -3.0339e-05, -8.4370e-06,
         5.0555e-06,  2.9095e-05,  8.0142e-06,  6.2929e-05, -1.8117e-05,
         2.9665e-05,  3.4488e-05,  3.7197e-05,  5.6620e-05,  2.4237e-05,
        -9.5731e-06,  3.3859e-06,  2.0080e-06, -1.5721e-05,  6.5763e-07,
         1.7282e-06, -2.4565e-05,  2.7947e-05,  5.3067e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7165e-06, -1.4277e-01,  1.1832e-05, -4.4928e-05, -1.5821e-05,
        -3.9080e-05, -2.5486e-06,  1.3769e-05, -1.0323e-05,  2.2241e-05,
        -7.2690e-06,  3.5412e-05, -3.1690e-05, -1.1259e-06, -2.3801e-06,
         5.8752e-06, -9.9991e-06, -1.7868e-05, -2.5289e-05, -2.4257e-05,
        -1.6542e-05,  7.4999e-07, -2.1153e-06, -4.3630e-05,  1.8067e-05,
         7.2584e-06, -2.0250e-05,  1.5178e-05, -5.1051e-06,  2.4029e-05,
         3.6787e-05, -1.5227e-05,  3.1279e-05, -2.8822e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2150: [tensor([ 5.3692e-06,  4.4698e-01,  3.3162e-06,  8.5062e-05, -5.7232e-05,
         5.7940e-05, -1.1166e-05, -1.3044e-04,  2.5080e-05, -8.0400e-05,
        -6.7549e-06, -8.5385e-05, -4.9990e-05, -1.0486e-04, -1.8218e-05,
         8.9667e-05, -4.2424e-05, -1.8995e-04, -1.6148e-05,  5.0364e-05,
        -3.8992e-05,  4.5421e-05,  2.1030e-06,  6.0540e-05,  2.0874e-06,
         6.2386e-05,  9.2269e-05, -5.7323e-05,  6.2108e-05,  3.3215e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9472e-04,  3.7784e-01, -1.1377e-04, -1.2822e-04, -5.0899e-05,
        -5.1744e-05, -3.5312e-05, -1.0302e-04, -6.1208e-05,  2.6465e-05,
         1.1465e-05, -1.2615e-04, -2.8346e-05, -5.0343e-05, -5.4950e-05,
        -2.4396e-05,  2.0301e-05, -3.3439e-05,  7.3515e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9697e-04,  6.0268e-01,  1.9579e-04,  1.5161e-06,  2.7171e-05,
         1.0495e-04, -5.9201e-05, -2.7597e-06, -2.1149e-05, -3.8584e-05,
        -1.3240e-04, -2.0151e-04,  1.1101e-04, -1.3125e-04,  2.3379e-04,
        -1.6860e-04,  2.4295e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5496e-04,  3.9255e-01,  1.2872e-04, -1.5898e-04, -3.3959e-04,
        -1.9747e-04, -1.9614e-04, -6.5085e-05, -1.0962e-04, -7.8607e-05,
         9.9884e-05, -1.9425e-05,  4.2747e-05,  1.4458e-04, -2.4343e-05,
         7.6883e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.6400e-06, -1.9184e-02, -1.0717e-05, -8.1489e-06,  1.4185e-06,
        -6.7518e-06, -2.1837e-05, -9.6857e-06, -2.9064e-06, -1.0618e-05,
        -4.0386e-06,  6.7855e-06, -1.9864e-05, -7.3925e-06, -4.7637e-05,
        -7.5625e-06,  1.1747e-05, -7.5014e-06, -1.5544e-06,  3.8240e-08,
        -9.3722e-06, -1.1942e-05,  1.5918e-06, -1.0933e-05,  8.1437e-07,
        -1.9894e-06,  8.1655e-06,  6.9324e-06,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3769e-05,  3.6578e-01, -1.3321e-05,  6.0024e-05,  1.1908e-05,
         5.5214e-06, -1.7798e-05, -4.0034e-05,  2.7656e-05, -3.4658e-05,
         2.5081e-05,  5.2306e-05, -4.7046e-05,  4.8928e-06, -9.5314e-05,
         3.5538e-05, -7.0836e-06,  2.2319e-05, -3.6712e-05,  9.9394e-05,
        -9.9520e-05,  1.3656e-05, -1.3113e-05, -4.4054e-05, -3.9040e-05,
        -1.6612e-05, -1.8457e-05,  2.9177e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1730e-04, -1.1345e-01, -4.4937e-05,  5.5917e-06,  6.9818e-06,
         5.7418e-06,  7.1939e-06,  3.5707e-05, -3.3183e-05,  2.4365e-05,
         2.8680e-05,  4.9427e-06,  5.7374e-06,  2.6788e-05,  9.0031e-06,
        -2.6881e-07, -1.5787e-05, -3.2529e-06, -1.7293e-06, -7.6159e-06,
         4.1555e-06,  3.6436e-05, -8.7882e-06,  3.0158e-06,  1.3612e-05,
         2.2941e-05,  2.6473e-06, -1.6834e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0762e-04,  2.6793e-01,  1.3701e-05,  9.5017e-05, -6.5247e-05,
        -1.6936e-04, -6.6683e-05,  1.7087e-05, -1.2445e-04,  1.0796e-04,
        -1.2899e-05, -3.3864e-05,  2.6500e-05,  1.8022e-04, -4.5443e-05,
        -7.1224e-05, -1.2921e-04,  4.3512e-05, -8.4558e-05, -8.2211e-05,
         8.6297e-06,  3.4195e-05,  2.5231e-05, -7.1129e-05,  8.8306e-05,
        -1.2103e-05, -1.0440e-05,  9.6498e-05, -1.1718e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0568e-04, -1.5185e-01, -4.4815e-05,  2.4544e-05,  7.5526e-06,
         2.5764e-05, -1.0600e-04,  3.7369e-05,  5.7505e-05,  3.7015e-05,
         3.6389e-05,  3.3742e-05, -1.9583e-05, -2.7546e-06,  4.3725e-05,
        -1.1147e-04,  6.2310e-05, -1.9371e-05, -4.9810e-05, -1.5154e-06,
         1.9488e-05, -1.0845e-04,  2.3259e-05, -7.1356e-05, -3.0058e-05,
        -8.6350e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8161e-04, -1.3808e-01,  4.3143e-06, -2.6957e-05, -8.7688e-06,
         1.9673e-05, -9.0656e-05,  1.7729e-05, -6.3599e-05, -2.4626e-05,
        -1.5552e-05, -5.5670e-05, -1.0150e-04, -2.0008e-06,  2.1083e-05,
        -6.0853e-05, -3.1416e-05, -6.9280e-05, -1.1267e-05,  1.2500e-05,
         7.3134e-06, -3.0552e-05, -1.3537e-05,  5.0391e-05, -3.1822e-05,
         2.4873e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2057e-05,  2.3019e-01, -1.0439e-05,  6.2414e-05, -1.6891e-05,
        -6.9806e-05,  8.6683e-05, -1.5636e-05,  3.3270e-05, -7.7134e-05,
         2.8666e-05,  7.3809e-05,  8.1197e-05, -5.5665e-06,  2.4223e-05,
         1.3160e-04, -3.2093e-05, -4.5185e-05, -2.5722e-05, -8.5164e-05,
        -4.0141e-05, -3.6814e-05, -3.5658e-05,  5.8378e-05, -3.8273e-05,
         2.9807e-05, -1.8411e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2629e-04,  1.8679e-01,  2.7156e-06,  6.0396e-05, -5.9356e-05,
         2.5439e-05, -2.0772e-05,  2.2875e-05,  6.5534e-05, -9.4444e-05,
         1.7485e-05, -3.3169e-05,  6.0362e-05, -3.3294e-05, -3.3419e-05,
         1.1662e-05,  3.4311e-05,  4.1985e-06,  2.0179e-05, -6.7363e-05,
        -1.2530e-04, -6.6568e-05, -2.6638e-05, -1.3981e-05, -4.6619e-06,
         6.4707e-06, -1.9963e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #2200: [tensor([-2.1540e-04, -1.8649e-01,  8.4619e-06, -6.7851e-06,  4.6482e-05,
        -2.1737e-05,  1.8738e-05, -1.9504e-05, -4.9311e-05, -4.5434e-05,
         2.2103e-05, -4.4615e-05,  1.2556e-05, -3.7467e-05,  3.3948e-05,
        -8.2980e-06, -1.1393e-05,  8.1365e-06,  5.5298e-06, -8.8933e-06,
        -6.7990e-07,  1.2199e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.2545e-05,  2.6397e-01, -7.7421e-05, -4.5147e-05,  1.9839e-04,
        -1.3373e-04, -5.9625e-05, -6.4923e-05, -2.8391e-05, -5.7315e-06,
        -3.4278e-05,  5.2841e-05, -7.0134e-05,  1.0296e-04,  8.8133e-06,
        -1.3556e-04,  5.2864e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6138e-05, -2.1514e-01, -3.6490e-05, -8.4737e-05,  2.5486e-05,
        -5.4200e-05, -1.6012e-05, -8.7730e-05, -7.2000e-05, -7.7653e-05,
         2.2937e-05, -4.1157e-05, -4.6546e-05, -1.8488e-05,  2.5497e-05,
        -3.1167e-05, -4.3100e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7453e-05, -1.5827e-01, -1.2478e-05, -2.6694e-05,  3.7186e-05,
         4.8923e-05, -1.4349e-05, -2.9391e-05,  3.7203e-05,  1.2088e-05,
        -4.8092e-05, -4.0757e-05, -7.3389e-06,  1.0287e-06,  3.5602e-06,
        -4.2854e-08,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0307e-04,  2.9381e-01, -1.1639e-04,  2.7186e-05,  6.8324e-06,
         1.1986e-05,  1.8807e-05,  2.4028e-05, -3.9006e-05,  1.4274e-05,
        -3.7358e-05,  5.5713e-05, -1.6075e-04, -2.6180e-05,  1.2926e-04,
         6.8196e-06, -2.9650e-05, -4.9300e-05,  5.8341e-05,  4.9377e-05,
         6.1887e-07,  8.7924e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3454e-05, -1.7859e-01, -4.3214e-05, -2.5353e-05, -2.1244e-05,
         3.0991e-05,  4.0044e-05,  3.3062e-05, -1.6247e-05, -3.5888e-05,
        -1.0925e-06, -3.2450e-05, -3.4526e-05,  5.6627e-06, -1.1891e-05,
         6.2082e-06,  7.2075e-05, -6.4212e-05,  3.8253e-05,  5.2652e-05,
        -3.7068e-05, -5.1207e-05,  2.0389e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0204e-04, -1.8356e-01,  3.3040e-05, -1.6449e-05,  1.2084e-06,
         2.1107e-05, -8.7334e-06, -7.4481e-06,  2.3774e-05, -3.0410e-06,
        -2.9613e-05, -3.4626e-05, -4.1733e-06,  1.6522e-05,  2.6264e-06,
        -3.4757e-05,  1.9702e-05, -5.0430e-05, -2.3400e-05, -3.0369e-05,
         6.1763e-07,  1.2495e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.9059e-04,  3.6237e-01, -1.0334e-04,  1.4304e-04,  1.4437e-04,
        -1.0139e-04, -1.0562e-04,  6.3274e-06, -1.9151e-04, -4.1234e-05,
        -5.8781e-05, -1.0637e-05, -7.2207e-05,  2.6592e-04,  2.8198e-04,
         3.0373e-05,  1.4400e-04,  1.2167e-04,  2.6066e-05, -1.2704e-06,
        -1.2078e-04,  1.4277e-04,  2.8130e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0444e-04, -7.3836e-02,  7.9435e-05, -5.5692e-05,  2.9003e-05,
        -1.8547e-06, -4.4578e-05,  4.2460e-05, -3.9605e-05,  1.7194e-05,
         3.8957e-05,  2.5182e-05,  7.7036e-06, -6.7726e-05, -2.9140e-05,
        -4.6014e-05, -2.6026e-05,  5.1469e-05, -1.3902e-05,  7.8236e-06,
        -1.0044e-05,  1.6373e-05,  3.6419e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8794e-06,  2.7750e-01, -1.7420e-05,  3.2447e-04, -3.0954e-04,
        -6.5689e-05, -1.7855e-05, -6.5672e-05,  2.6779e-04,  3.5306e-05,
        -9.4790e-05,  1.8228e-05, -3.2344e-04,  4.0163e-05, -9.8641e-05,
        -5.5977e-05, -1.9526e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0781e-06, -1.2483e-01,  2.0550e-06, -1.8870e-05,  3.3047e-05,
        -3.0853e-05, -5.1140e-05, -3.3508e-05, -2.1122e-05,  1.0027e-05,
         5.1816e-06,  2.7996e-05,  4.2001e-06,  1.4883e-05,  2.2607e-06,
         8.5308e-06,  1.2646e-05,  4.2031e-05,  4.1492e-06, -5.0348e-06,
        -1.2824e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0832e-05, -1.9186e-01, -4.8495e-07, -7.3759e-05, -1.0938e-05,
        -6.4313e-05, -6.9666e-05, -8.0149e-05,  2.5705e-06, -5.9459e-05,
        -1.2368e-05,  6.5498e-05,  1.5371e-06,  1.5228e-05, -2.5908e-05,
         5.8189e-05, -3.7165e-05, -4.5349e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #2250: [tensor([-1.1203e-04,  1.4658e-01, -7.5176e-06, -3.2866e-05, -1.1471e-04,
         4.1748e-05,  6.0638e-05,  5.5569e-06, -6.7054e-05, -5.8008e-05,
        -7.0784e-05, -2.5124e-05, -8.2061e-06, -6.6371e-05, -1.3337e-05,
         4.6230e-05,  8.5141e-05,  2.7056e-06, -5.6325e-05, -7.6266e-05,
         6.8152e-05, -4.1291e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0384e-04,  1.9799e-02, -7.9752e-06,  1.8871e-05, -1.1818e-05,
        -8.8396e-07,  1.0226e-06, -8.9402e-06,  3.5962e-05, -1.8622e-05,
         1.9302e-05, -2.1496e-06,  7.4381e-06,  7.3031e-06, -3.5240e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0326e-04,  2.8228e-02, -5.3838e-05, -4.4779e-05,  1.5681e-05,
         1.9286e-05, -2.9971e-05, -5.6949e-06,  3.0492e-05,  7.4654e-06,
        -7.4906e-06,  4.6303e-05, -2.6177e-06, -2.6055e-05,  3.9941e-06,
         5.3629e-06, -9.0458e-07, -8.5688e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3924e-04,  1.9953e-02, -9.9325e-06, -3.6213e-05, -1.1779e-06,
        -1.1466e-05, -2.1586e-05, -1.8765e-05,  3.9805e-05,  9.2192e-06,
        -2.2465e-05,  1.1741e-05, -3.9743e-06, -1.3282e-05, -3.1333e-06,
        -7.0013e-06,  1.1110e-05,  7.7069e-06, -4.2392e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3496e-04,  3.4646e-01, -1.7383e-04, -3.1248e-04,  6.8827e-05,
         1.6472e-04,  1.9717e-04,  6.4799e-06, -5.2083e-06,  3.6956e-05,
         1.0001e-04,  5.6346e-05,  6.2300e-05,  1.3060e-04, -1.0815e-04,
        -1.0900e-04,  4.1739e-05, -1.9508e-04,  1.4278e-04,  2.7099e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0350e-04, -1.6450e-01,  6.2742e-06, -1.1247e-04, -1.7740e-05,
         2.2561e-05, -1.2116e-04, -6.8624e-05,  2.8941e-05, -2.3062e-05,
        -6.3213e-05,  8.5235e-05,  5.7712e-05, -1.5512e-05, -2.4714e-05,
         1.9945e-05,  2.8924e-06,  1.5378e-06, -3.2484e-05,  1.1334e-05,
        -8.4074e-05, -3.9477e-05,  1.3607e-05,  2.3579e-05,  6.6633e-06,
         3.2659e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7777e-04,  3.4164e-01, -1.2170e-04, -1.7078e-04, -1.2018e-04,
        -2.0952e-04,  3.8934e-06, -5.9195e-05, -1.1246e-04,  5.8033e-05,
        -5.0315e-05, -5.0378e-05, -2.9204e-04,  5.2499e-05,  5.0841e-05,
        -1.1162e-04, -1.2563e-04, -5.0139e-05,  7.4647e-05, -4.3976e-05,
        -7.3409e-06, -5.8894e-05,  3.9031e-05,  7.6926e-05, -5.5226e-05,
        -2.9684e-05, -1.2977e-04, -8.9985e-05, -1.0589e-04, -1.4327e-04,
        -1.3310e-04, -9.0451e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5926e-04,  2.1464e-01,  3.0548e-05, -2.2228e-05,  2.1886e-05,
        -2.1646e-07, -4.2016e-05, -1.4276e-04,  2.5240e-04,  1.0293e-04,
        -2.6347e-05, -8.8071e-05, -6.3224e-05, -1.7297e-04, -1.0859e-05,
         7.1084e-05, -2.5782e-05,  4.2028e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0355e-04,  2.6380e-01, -1.6374e-04,  1.1443e-04,  1.4971e-04,
        -1.4666e-05,  5.2685e-05, -3.0443e-04, -3.3118e-05,  7.1390e-05,
        -2.4217e-05,  6.8874e-06,  2.1612e-04, -1.1112e-04,  7.0981e-05,
         1.0676e-04, -6.4311e-05, -2.4094e-05,  1.2146e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8915e-04,  1.7521e-01, -1.6274e-04, -2.7237e-05, -5.2179e-05,
        -7.9146e-05, -9.5839e-05, -2.9497e-04, -2.3132e-05,  1.1181e-04,
        -7.0510e-05, -3.2324e-05,  5.4968e-05, -9.3452e-05,  1.4926e-04,
         6.7761e-05, -8.9925e-05,  7.6701e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2604e-04, -9.7189e-02,  1.4821e-05, -1.0541e-05,  3.9911e-05,
         8.1147e-06,  1.2283e-05,  2.7850e-05, -1.4346e-05,  3.6418e-05,
         1.5358e-05, -1.3446e-05,  8.5162e-06,  1.7204e-06, -2.5725e-06,
        -1.9312e-05, -8.6648e-06, -3.1557e-06, -1.5309e-05, -1.3432e-05,
        -1.2660e-05, -3.8074e-06,  1.9303e-06,  1.7095e-05,  9.5315e-06,
        -1.8664e-05,  6.7711e-06,  1.3117e-05, -1.8981e-05,  3.1273e-05,
         2.5308e-05,  4.3919e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2933e-05, -1.0481e-01, -3.4906e-05, -9.9519e-06, -5.9569e-07,
        -2.1141e-05,  3.3943e-05, -8.3643e-06,  2.7100e-05,  9.6696e-06,
         1.2493e-06, -6.6808e-06, -6.7598e-06,  3.5066e-05,  1.5392e-05,
        -3.5424e-05,  2.4308e-06,  2.9950e-05,  1.7292e-05,  1.4786e-05,
        -1.8498e-05,  2.0359e-05, -1.9951e-05,  3.4340e-05,  2.2261e-05,
         2.5887e-05,  4.4057e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
