Iter #50: [tensor([-6.2828e-03,  6.5039e-04,  9.0270e-04,  8.2870e-04,  5.8656e-04,
         2.0470e-04,  2.8247e-04, -3.0664e-04,  2.7021e-04, -3.8217e-04,
         6.7979e-04, -1.9480e-04,  3.1415e-04, -1.2573e-04,  9.2438e-04,
         1.2347e-03,  6.6738e-04, -1.9780e-04,  5.2120e-04,  1.0405e-03,
         1.2525e-03,  2.3884e-04,  5.9952e-04,  7.3624e-04, -2.3480e-04,
         1.1042e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0077, -0.0023, -0.0005, -0.0035, -0.0002, -0.0026,  0.0004, -0.0002,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0808e-03,  6.1250e-04, -2.1720e-04,  1.5733e-03,  1.0844e-03,
        -2.8442e-05,  1.9674e-04, -2.1908e-04,  5.2578e-04,  2.0437e-03,
        -1.9240e-04, -1.5447e-04,  2.3728e-04, -2.1821e-04, -1.6884e-04,
         4.1723e-05, -2.0878e-04, -1.0376e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2459e-03,  1.6819e-03,  7.7867e-04,  3.5062e-03,  2.9506e-04,
        -5.9064e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 0.0057,  0.0035, -0.0015,  0.0005,  0.0010,  0.0023, -0.0002, -0.0005,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6147e-05,  1.5372e-03, -2.8327e-04,  2.5641e-04,  5.4375e-04,
         8.9555e-04, -3.5657e-04, -2.1442e-04, -1.2323e-04,  7.9446e-04,
        -1.6921e-04, -1.0685e-04,  3.8051e-05,  1.5741e-04, -7.1297e-05,
         3.2666e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3902e-03, -4.9001e-04, -3.1904e-04, -1.2109e-03, -2.0393e-03,
        -2.9548e-04, -7.8808e-04, -1.9525e-04, -1.2860e-04, -1.2181e-03,
        -2.2462e-03, -2.5983e-04, -4.9403e-05, -4.2077e-04, -1.1202e-03,
        -2.1195e-03, -3.9751e-04, -8.9145e-05, -9.8709e-04, -6.1838e-05,
        -6.7607e-05, -7.9797e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1373e-03,  8.2355e-04, -2.8096e-05,  1.0042e-03,  3.5541e-04,
        -5.2837e-05,  4.4062e-04, -5.5023e-05, -1.5014e-04,  1.1768e-04,
         2.4001e-04, -1.4945e-04, -2.3444e-05, -1.7341e-04,  3.7561e-04,
         5.0764e-05,  8.7685e-04,  3.3668e-04,  2.9334e-04,  3.4244e-04,
         5.3833e-04,  3.5333e-04,  2.9213e-04,  3.2070e-04,  2.0793e-04,
         4.1019e-04,  6.2644e-04,  2.2760e-04,  8.7007e-05, -6.4166e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5423e-03,  4.6723e-03,  7.8318e-05,  7.7908e-04,  1.1487e-03,
         5.0787e-04,  2.3139e-03,  1.4455e-03,  1.2861e-04, -1.3901e-04,
         2.4369e-04, -3.1949e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9266e-03,  1.1670e-03, -4.2061e-05,  1.1271e-03,  2.2451e-03,
        -1.0294e-05, -1.5298e-05,  6.0990e-04,  6.7342e-04,  1.2243e-04,
         7.4669e-05,  8.0055e-04,  7.6003e-04,  2.6594e-04, -1.6524e-04,
         8.1743e-04,  6.1014e-04,  7.9000e-04,  4.0202e-04,  4.8201e-04,
         3.2808e-04, -6.3463e-05,  6.7046e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8227e-03, -1.1069e-03, -1.1526e-04, -1.1937e-03, -4.9854e-05,
        -2.2693e-04, -6.2797e-04, -9.0666e-04,  2.9303e-04, -1.0936e-03,
        -9.3033e-04, -2.1540e-05, -8.4026e-05, -7.2528e-04,  5.8419e-05,
        -7.6177e-04, -8.5537e-05, -1.1355e-03, -1.7891e-04, -1.1652e-03,
        -1.4469e-03, -3.3803e-04, -1.4014e-03, -9.8161e-04, -6.8467e-04,
        -5.2531e-04, -3.3539e-05,  6.3902e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5280e-03,  8.1693e-04,  4.9194e-04,  5.6414e-04,  5.1470e-04,
         9.6214e-05,  8.4445e-06,  2.7603e-03, -1.2966e-06,  1.6268e-03,
         1.1948e-03,  9.6478e-04,  8.2777e-04,  1.1010e-03,  5.7864e-04,
         1.0189e-03,  1.9579e-03,  1.4311e-04, -7.0649e-05, -2.3754e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 4.6415e-05, -6.8357e-04, -4.9598e-04, -3.2539e-03, -3.8868e-04,
        -3.8340e-05,  2.3398e-04, -6.8697e-04, -1.6338e-03, -1.5843e-04,
        -4.0955e-04, -2.6426e-04,  3.5058e-04, -1.5750e-04, -1.5494e-03,
        -8.1870e-04,  3.3581e-04, -1.4146e-05, -6.3141e-04, -1.7160e-03,
        -1.8781e-04, -1.1860e-03,  6.9341e-05,  1.8866e-04, -2.6055e-04,
        -8.1692e-04,  3.4644e-04, -6.7909e-05, -1.3172e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1620e-04,  1.1057e-03,  3.5370e-03,  4.4329e-03,  7.3611e-04,
        -5.4560e-04, -2.6116e-04, -1.1993e-05,  7.0889e-04, -7.2801e-05,
         3.0984e-04,  2.7588e-04,  4.1650e-04,  4.0406e-04,  3.2033e-04,
        -9.0491e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8162e-04, -5.3051e-04, -1.7365e-04, -5.8021e-05, -2.4716e-04,
        -5.5549e-04,  1.7730e-04, -1.6151e-03, -5.6866e-04, -1.0387e-03,
        -2.1750e-05, -1.6965e-03, -2.2003e-04, -4.8507e-03, -7.9616e-04,
         2.9999e-04, -6.3130e-05, -2.5078e-04, -1.0021e-03, -9.4785e-04,
         8.2948e-05, -4.7005e-04, -9.5624e-04,  3.3638e-05, -2.9467e-06,
        -7.7145e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8853e-03, -7.7569e-03, -2.7429e-03, -7.2443e-04, -6.5267e-03,
        -9.9704e-04, -7.3907e-03, -5.2926e-04,  5.8679e-04, -1.2859e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.0781e-04,  4.7141e-05,  1.8663e-03,  1.9641e-04,  2.3609e-04,
         5.5207e-05, -2.3128e-04,  6.7075e-04,  1.5475e-05,  2.5833e-04,
         1.8206e-04,  3.1679e-05,  1.5612e-04,  5.7386e-04,  5.9250e-04,
         7.3325e-05,  4.7587e-04,  3.7840e-04,  8.1708e-05,  2.4297e-04,
         7.6709e-04,  3.9042e-04,  2.6357e-04, -9.0438e-05,  3.3015e-04,
         3.6665e-03, -1.9567e-04,  3.7970e-04,  1.5191e-05, -1.5341e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5426e-03, -3.4038e-04, -9.2042e-04, -2.3891e-04, -1.0910e-03,
        -3.9796e-04, -2.4104e-03, -3.3257e-04, -1.9047e-03,  1.0450e-04,
        -4.3555e-04,  3.6233e-04, -1.0150e-03, -4.8966e-05, -2.8758e-04,
        -2.0473e-03, -4.1673e-04, -5.9824e-04, -7.0117e-05, -4.2867e-05,
         1.4587e-04, -2.6582e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7748e-04,  6.8927e-03,  3.0422e-05,  8.1601e-04,  4.0298e-03,
         1.1629e-03, -2.9706e-04,  5.7192e-04, -1.9251e-05,  5.9409e-05,
         2.0012e-03,  4.2837e-03,  3.1612e-04,  2.5121e-04,  2.5468e-03,
        -1.9930e-04,  2.7764e-04,  3.6771e-05,  2.2510e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3487e-03,  1.5565e-03,  2.6260e-04,  1.1857e-03,  5.3636e-05,
         2.3953e-04,  2.2408e-04, -7.5004e-05,  8.6323e-04, -2.7825e-05,
         1.0594e-03,  3.1345e-04,  4.3213e-04,  1.1665e-04,  1.1706e-03,
        -1.6789e-04,  2.2168e-04,  2.0545e-04,  4.6377e-05,  3.9497e-05,
         2.3242e-05, -7.0779e-05,  2.6032e-04,  5.6540e-04,  3.6166e-04,
         7.7097e-05,  2.6144e-05,  5.9078e-05,  2.4466e-04, -3.3914e-05,
         1.2685e-03,  2.6533e-04,  2.9450e-05,  4.6105e-04,  2.3831e-03,
        -6.2250e-05, -1.8664e-04, -1.0123e-05,  9.4927e-05, -1.3651e-05,
         3.0439e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6045e-03,  5.5719e-04,  1.8504e-05,  1.3857e-03, -7.2674e-05,
        -7.3469e-06, -6.2830e-05,  2.6198e-04,  1.3965e-03, -1.0638e-04,
         3.4841e-04,  6.6260e-04,  1.2093e-04,  3.9164e-04,  4.5524e-04,
        -4.6336e-05,  2.6835e-04,  2.6108e-05,  8.3182e-04,  1.0641e-04,
         8.4523e-04, -2.3026e-05,  2.3178e-04, -3.6199e-05,  7.6684e-07,
         2.8814e-04,  1.7328e-04,  1.9719e-04,  5.9186e-05,  6.6790e-04,
        -9.9956e-05, -5.5410e-05,  3.6056e-04,  1.8301e-03, -1.1424e-04,
         3.0564e-04,  1.2348e-04, -1.5175e-04,  7.8680e-04, -1.0362e-04,
         3.8191e-04,  1.2434e-05, -2.5672e-05,  2.8900e-04,  1.1455e-03,
         9.9763e-05,  3.4740e-04,  1.0651e-03, -1.3495e-04,  1.8954e-04,
        -5.5671e-05, -1.0813e-04], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4811e-03,  6.2819e-04,  2.3078e-03,  3.2413e-04,  8.6762e-05,
         4.3727e-04, -1.7820e-04,  3.1150e-04, -3.8265e-05,  1.4674e-05,
        -3.1331e-05,  8.1248e-04,  2.0855e-03,  1.3392e-04,  3.1719e-04,
         1.8847e-03,  3.4950e-04,  4.1282e-04,  3.6115e-04,  8.2455e-05,
        -2.0335e-07,  1.3811e-03,  4.2123e-04,  1.9320e-05,  1.8917e-04,
         4.3202e-04,  1.6148e-04, -4.0114e-05,  7.3920e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7481e-03, -1.5635e-03, -1.9590e-04, -2.1332e-03, -5.0925e-03,
         4.1376e-04, -6.4358e-04, -3.4392e-04, -3.2010e-04, -2.3313e-04,
        -1.0952e-03, -3.5538e-03,  4.2298e-04, -3.9902e-04, -6.1432e-04,
        -1.5315e-04,  7.0674e-05, -4.2063e-04, -2.5436e-03,  8.2788e-05,
        -1.0629e-03, -9.0532e-05, -1.0809e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4983e-03, -4.2524e-04, -4.6826e-03, -4.0075e-04, -2.3765e-03,
        -9.5614e-04, -1.5082e-05, -4.4928e-04, -4.3695e-04,  2.3115e-04,
        -8.7598e-04, -3.1260e-03, -6.6163e-04, -4.9339e-03, -1.8289e-04,
        -1.3938e-04, -2.3622e-03,  2.3405e-04,  3.2076e-04,  1.6836e-04,
        -5.5125e-04, -3.5091e-05, -3.8164e-04,  5.5037e-05, -1.9041e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 8.1338e-04, -6.4488e-04,  1.6251e-04, -1.5625e-02, -4.4700e-04,
        -7.2364e-03, -1.4473e-03,  1.4931e-04,  4.6441e-05, -1.4983e-03,
        -9.9162e-05, -4.1641e-03, -1.7546e-04, -2.8677e-03, -2.8379e-04,
        -7.9077e-03, -6.0762e-04, -5.1969e-03, -5.0280e-04,  6.2845e-05,
         4.0393e-04,  1.2943e-04, -4.7075e-03,  4.1130e-04, -1.0496e-04,
         9.2214e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8231e-03, -5.4276e-03,  1.6206e-05,  2.3074e-04, -3.9908e-03,
        -8.9335e-04, -1.6050e-03, -2.6878e-05, -8.8989e-04, -3.7610e-04,
        -6.0000e-03, -3.0939e-04, -3.8470e-04, -1.4284e-04, -8.4130e-04,
        -9.1851e-03, -1.9020e-04,  8.5196e-06, -5.2325e-05, -3.5013e-03,
        -6.4810e-03, -7.5941e-04, -1.9480e-04, -3.0551e-03, -5.2897e-04,
        -9.6471e-04,  1.5248e-04, -1.2923e-04, -9.5657e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4815e-03, -3.5907e-04, -7.8699e-04, -5.4377e-05, -1.4328e-04,
         6.2188e-08,  6.8081e-05, -1.4905e-03, -9.1519e-04,  1.3175e-05,
         1.2851e-04, -9.7118e-05,  1.7116e-04,  7.5423e-05, -1.4069e-03,
        -1.0411e-02,  2.7475e-05, -2.1562e-05, -1.0167e-03, -2.2659e-04,
        -1.1562e-04,  1.6659e-04,  5.9138e-05, -6.8556e-05, -3.7273e-03,
        -5.0816e-06, -2.7047e-04,  1.6412e-04,  1.3160e-05, -2.0468e-03,
        -2.7238e-04,  2.7453e-04,  9.9926e-05, -4.8769e-05, -3.0222e-04,
         7.1952e-05,  1.6792e-04, -7.8661e-05, -1.2485e-04, -9.4127e-03,
         2.8451e-05,  2.6186e-04, -1.2738e-04, -4.4825e-04, -2.0742e-04,
        -8.5594e-05, -7.9867e-05,  3.6766e-05,  8.3279e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0008,  0.0371, -0.0001,  0.0007,  0.0060,  0.0063,  0.0005,  0.0004,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5388e-03, -1.8691e-04, -4.2648e-04, -2.0070e-03, -3.2128e-04,
        -3.4390e-03,  2.8997e-04,  2.2281e-04, -1.0689e-04,  5.9473e-05,
         1.2579e-04, -8.4613e-03,  2.0061e-04, -3.5735e-03, -4.1365e-03,
        -8.4382e-06, -1.0452e-03,  3.9808e-04, -3.5682e-04, -5.5463e-03,
        -1.1661e-04, -2.5220e-04, -1.8663e-04, -1.7903e-04, -1.9117e-03,
        -6.1630e-03, -4.5572e-04, -1.4762e-05, -2.3324e-03, -1.9976e-04,
         2.5520e-04,  2.9506e-05, -1.2197e-05,  9.6025e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7326e-04,  9.4309e-04, -1.0379e-04, -2.7168e-04,  9.4224e-04,
         5.5880e-03,  8.3406e-05,  1.4967e-04,  4.0035e-04, -1.2338e-04,
        -5.8758e-05,  8.2957e-03,  4.4930e-04,  2.5021e-04, -3.3034e-05,
        -2.2973e-04,  1.6783e-04,  1.2039e-04,  2.3485e-04,  1.3510e-04,
         2.2802e-04,  4.9290e-05, -1.8181e-04,  4.2462e-04,  5.1884e-05,
         7.2655e-04, -6.4727e-05,  4.2399e-05,  5.4145e-03, -1.8282e-04,
         6.5164e-04,  8.1238e-05, -2.0655e-05,  4.3768e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1881e-03,  8.2295e-04,  3.2242e-05,  8.4634e-03,  2.6977e-04,
        -2.0777e-04,  1.9518e-04,  2.0024e-04, -4.5341e-05,  5.8308e-05,
         2.3524e-04, -8.4884e-05, -1.2736e-04, -2.0552e-04, -4.5108e-05,
         3.7243e-03,  8.1446e-04,  8.3301e-05,  9.2418e-05,  2.1467e-03,
         7.7613e-04,  8.0434e-05,  1.0840e-04,  4.7898e-04,  3.7011e-04,
        -3.4483e-05,  6.6440e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 0.0024,  0.0170,  0.0002,  0.0008,  0.0003, -0.0002,  0.0002, -0.0001,
         0.0038, -0.0001, -0.0001,  0.0017,  0.0028,  0.0263, -0.0003, -0.0003,
        -0.0001, -0.0001,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4991e-03,  5.6131e-04,  5.2354e-04,  6.2031e-04,  1.6782e-03,
         3.9948e-03, -1.1913e-04, -5.2750e-06,  2.4290e-03,  1.1970e-06,
         1.1422e-02, -7.1386e-05,  2.6812e-03,  4.2101e-04,  6.0697e-06,
         1.5832e-04,  1.0425e-04,  1.1694e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9270e-04,  3.6895e-04,  4.1476e-04,  1.7784e-03,  5.7242e-04,
         1.9918e-04,  7.3393e-05,  9.2995e-04,  2.4147e-04,  8.6554e-04,
         1.2523e-03,  1.6495e-04,  1.2627e-03, -1.0327e-04,  2.9037e-05,
        -1.7695e-04,  9.0283e-06,  8.6321e-03,  2.3634e-04, -1.9060e-04,
         2.7221e-04,  5.5833e-04, -1.2675e-04,  4.7151e-04,  5.0655e-05,
         2.1198e-02, -4.9242e-04,  6.2138e-05,  4.1070e-04, -1.7323e-04,
        -8.2540e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.5724e-04,  4.0040e-04,  2.8144e-04,  1.3645e-04, -6.3685e-05,
        -1.5255e-04,  4.7755e-03,  1.5643e-04,  2.9592e-03, -1.0502e-04,
         5.2027e-04, -8.0701e-05,  3.9661e-04, -6.0224e-05, -5.3670e-05,
        -2.1317e-05,  3.8751e-04,  9.6364e-05, -6.4481e-05, -1.4271e-04,
        -2.4575e-04, -7.5626e-05, -7.3219e-05, -5.3701e-07,  2.7071e-04,
         3.6015e-04,  1.9421e-04,  4.9387e-03,  2.6810e-04,  3.7726e-04,
         7.0788e-05,  8.8837e-05, -2.2263e-04,  1.1525e-04, -5.7997e-06,
         2.6447e-04,  5.4827e-04, -2.9476e-05,  5.9767e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5636e-04, -8.2776e-04, -1.0554e-03, -4.8547e-04, -4.1020e-04,
         3.1547e-04, -2.5822e-03, -1.6402e-03, -3.4423e-04, -2.7621e-03,
        -6.8857e-03, -8.1439e-04, -1.3127e-06,  4.1905e-05, -3.7812e-03,
        -1.2300e-04, -3.8661e-04, -3.2586e-04,  1.1108e-04, -1.1251e-02,
         4.6548e-04, -2.4857e-04,  3.4797e-05, -8.5666e-05,  1.9563e-04,
        -2.0310e-03, -5.9761e-03, -1.8635e-04,  8.0618e-05,  1.8560e-04,
        -7.8003e-03, -4.9693e-04,  1.4265e-04,  1.3701e-04,  7.7876e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-6.4282e-04, -2.1250e-04, -2.0140e-04, -4.0298e-04, -4.6781e-03,
        -8.9751e-02, -6.1988e-06,  1.0962e-04, -2.7779e-05,  4.0705e-05,
        -5.3362e-04, -4.3417e-05,  3.0390e-04, -4.3870e-05,  1.3612e-04,
        -2.0107e-02, -1.9906e-05,  2.7792e-04,  3.3853e-04,  1.3498e-04,
        -5.6296e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7371e-04, -1.3673e-04, -3.6445e-03, -8.9525e-04, -1.5068e-04,
        -8.7024e-02, -6.2957e-05,  2.3509e-06, -1.8832e-04, -5.6763e-05,
         1.6800e-05, -5.6726e-06,  3.6643e-05,  2.9284e-04, -3.2170e-02,
        -1.0587e-04, -5.0369e-05, -1.6416e-04, -9.9262e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5879e-04,  1.0194e-04,  2.2890e-02, -4.9367e-05,  2.4670e-04,
         9.9435e-05,  2.2975e-04, -4.3865e-05, -2.1575e-05, -1.9503e-04,
        -1.6448e-04, -9.6654e-05,  3.0118e-04, -1.7088e-04, -2.2569e-04,
         1.8578e-04,  4.5003e-05,  1.2091e-03, -4.9678e-05, -2.2058e-04,
         7.7559e-04,  4.7844e-03,  2.9289e-05,  1.4908e-02,  3.7344e-02,
        -1.6914e-04,  4.3570e-03, -4.5297e-05,  1.9846e-03, -1.1421e-04,
         1.2976e-03, -2.1898e-05, -1.7122e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5911e-03,  5.5313e-05,  4.0517e-03,  9.4376e-05,  1.0164e-03,
         2.8777e-05,  1.0412e-02,  2.2714e-04, -1.9377e-04,  4.0841e-05,
        -1.3175e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5890e-04, -3.5316e-04, -6.0518e-05, -2.3962e-04,  6.4092e-05,
         2.5370e-05,  4.2498e-06, -3.0912e-02,  1.0598e-05, -1.1457e-03,
         6.9380e-05,  2.7605e-05,  3.8220e-05, -8.7371e-04, -1.1786e-04,
        -7.6289e-05, -2.3947e-05, -1.9809e-02, -1.0969e-02,  8.8738e-06,
         1.5448e-05,  1.0720e-04, -4.9972e-04,  5.3817e-05, -1.1724e-04,
        -1.0106e-04,  5.6243e-05, -6.3388e-03, -8.8455e-03,  3.7634e-06,
         5.7152e-05,  5.8409e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7526e-06,  1.0652e-04, -3.8514e-03, -2.1731e-04,  1.1966e-04,
        -6.3651e-05, -6.3016e-04, -2.5269e-05, -5.5924e-05,  3.4636e-05,
         1.1037e-04, -3.2028e-05, -3.3006e-02,  1.0952e-04,  9.5604e-05,
        -4.3259e-02,  3.0896e-06,  1.2203e-04,  1.7935e-05, -9.3424e-03,
         9.2405e-06, -2.6587e-03, -1.7962e-04, -4.2597e-05,  1.5065e-04,
        -3.8667e-04,  8.9086e-05,  1.2730e-04, -1.0471e-04, -4.2986e-04,
         1.8916e-04, -7.3751e-04,  1.4386e-04, -3.5661e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.4783e-04,  1.7812e-04,  2.3595e-04,  3.4488e-04,  1.3611e-04,
        -2.1587e-05, -5.1277e-02, -3.1266e-05, -1.2295e-03, -2.6731e-04,
         7.0956e-05, -2.3675e-02,  4.7271e-05,  1.2931e-04, -7.3060e-03,
        -5.4661e-04, -9.2904e-06,  1.1861e-04,  1.3666e-04, -1.8067e-05,
         7.5875e-05, -1.3499e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7937e-03, -1.8730e-04, -5.0897e-05, -2.0502e-05, -5.8606e-05,
        -3.4336e-03,  3.2795e-04, -2.4441e-04, -3.8416e-05, -1.6462e-04,
        -5.2457e-02, -2.7418e-04, -1.3177e-04, -7.9287e-02, -3.6095e-04,
         6.4316e-05,  1.2919e-06,  6.8568e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1503e-04,  7.1864e-04, -1.0499e-04, -6.0759e-06,  2.8774e-02,
        -1.2109e-04,  5.4669e-04,  2.7873e-03, -1.8676e-04,  8.4346e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2628e-04,  2.9917e-03,  1.0964e-03,  2.3204e-04,  6.8099e-03,
        -1.4735e-04, -1.9065e-04,  5.9800e-04, -5.9790e-05,  1.0197e-05,
         4.0117e-02,  7.8949e-04,  9.2936e-03,  5.0533e-04, -2.9629e-04,
         3.1863e-04,  1.3427e-04, -6.1687e-05,  1.7929e-04,  1.6663e-02,
        -3.0969e-05,  4.0173e-05,  4.0554e-03, -8.4544e-05,  6.5852e-05,
         5.3750e-04,  1.0817e-03,  8.6321e-05,  3.4803e-04,  9.8684e-06,
        -5.9172e-05, -1.5379e-04, -1.6499e-05, -7.6628e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.1391e-04,  3.8117e-05, -2.8917e-04, -1.9185e-02,  2.5798e-05,
         7.3858e-05, -1.0477e-04, -1.5147e-02, -1.6821e-03,  6.2874e-05,
         3.6124e-05,  9.1095e-06, -1.9169e-04, -5.1452e-05,  9.5830e-05,
         4.1433e-05, -3.5345e-05, -1.9923e-02, -1.0534e-04,  3.4399e-05,
        -8.9690e-04,  6.7910e-05, -5.5903e-04, -1.7743e-02, -9.1385e-05,
         1.7881e-04, -2.7202e-06, -1.1404e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.6491e-04,  3.7579e-05,  5.6707e-05, -8.6759e-06, -2.1282e-04,
        -9.4909e-03,  1.3216e-05, -5.3508e-04,  9.2530e-05, -8.4015e-05,
        -4.5809e-05,  2.1003e-05,  6.5698e-05, -1.9737e-05,  4.2047e-05,
        -1.3093e-04, -1.4057e-02, -5.8714e-05, -1.1568e-04, -1.2509e-04,
        -3.7459e-03,  1.8030e-05,  4.3589e-05,  2.4295e-05, -1.2467e-05,
        -4.5045e-03,  6.2272e-05, -5.9605e-03,  2.8091e-05, -7.0676e-05,
        -1.5470e-02,  6.1582e-06,  7.9237e-05, -4.8370e-04, -6.5708e-03,
         4.9489e-05, -3.8303e-06, -4.7258e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 3.7729e-04,  9.1204e-04,  2.2395e-06, -2.5964e-05,  1.3186e-05,
         2.7679e-05,  1.6236e-04, -6.3656e-06,  5.8509e-05,  5.6151e-04,
         2.2460e-02,  1.5044e-04,  4.7132e-06,  4.9393e-02,  2.3575e-04,
         2.0243e-05,  8.2480e-03,  1.1860e-04,  1.1469e-04, -3.8687e-05,
         1.4384e-04,  2.1966e-03,  1.5516e-04, -1.6029e-06,  2.0328e-05,
        -2.4776e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5595e-04,  1.7645e-04,  1.5697e-05, -1.3224e-01,  7.4257e-05,
        -6.7205e-05, -1.1524e-05, -3.3811e-04,  1.6277e-04, -7.0868e-03,
        -1.0687e-04, -1.3452e-04, -1.7162e-04, -1.4833e-05,  4.9985e-05,
         8.1565e-05,  6.5256e-05,  1.4322e-04, -5.2892e-05, -3.2157e-04,
         1.2099e-04, -1.1106e-04, -8.3399e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9511e-03,  1.6606e-04, -4.1173e-05, -5.3458e-06,  2.3888e-04,
         1.5817e-04,  7.1720e-05, -1.2367e-04, -1.4934e-04,  1.0136e-04,
         5.6760e-03,  1.3060e-01,  1.1028e-05, -2.4388e-04, -1.1199e-04,
         2.6347e-05,  1.2431e-04,  1.1289e-04, -6.4391e-05,  6.7480e-05,
         8.0088e-03, -2.8304e-04,  1.6589e-04,  1.2609e-04, -2.4356e-04,
        -6.8539e-05, -6.2177e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.4534e-04, -1.2952e-04,  5.5134e-05, -1.2166e-05, -1.7465e-05,
         6.9006e-05, -3.8400e-02,  1.0235e-04,  2.4881e-07,  2.5638e-05,
        -7.7603e-05,  9.0482e-05,  2.4996e-05, -5.4243e-05, -1.7314e-04,
        -4.8959e-02,  7.3195e-05, -1.8318e-05,  2.1193e-04, -6.3145e-05,
         4.2025e-05, -2.6005e-05, -6.8669e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.5009e-04, -2.4563e-02,  5.9487e-05, -1.9298e-03,  6.5920e-05,
         6.1379e-05, -2.2534e-02,  1.0603e-04,  3.3104e-05, -8.9712e-05,
        -1.0733e-02,  1.4462e-04,  5.4824e-05, -1.7502e-05,  9.2213e-05,
         1.0881e-06, -4.6985e-02,  1.5871e-04,  1.7531e-05, -1.0858e-05,
        -4.4620e-04, -8.4485e-05, -1.8438e-04,  3.7807e-06,  1.9138e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4389e-05,  4.5358e-04, -8.6103e-04, -2.3280e-04, -5.7014e-04,
        -7.0885e-02,  2.2174e-04, -9.4570e-03, -1.1432e-01, -2.4620e-04,
        -2.6727e-04, -3.1377e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0081e-04,  8.7285e-06,  5.5882e-06,  7.6365e-05, -4.6096e-05,
         5.2730e-04,  2.5172e-04, -1.2147e-04, -6.3748e-05, -1.0456e-04,
         7.1085e-05,  1.9366e-04,  8.0593e-05, -2.2133e-04, -7.4979e-05,
        -1.9710e-05,  8.2209e-02, -8.0860e-05, -1.5385e-04,  3.4009e-05,
         5.7883e-03,  3.4654e-05,  3.7613e-05,  9.8449e-05, -1.5644e-04,
        -1.1386e-04,  5.2871e-02, -8.6342e-05,  4.0755e-05,  1.1207e-05,
         4.8700e-05, -1.3197e-04, -2.5704e-04, -4.0763e-05,  3.0341e-04,
         7.2019e-05, -3.1350e-05, -2.7312e-05,  3.0058e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3322e-04, -2.7656e-04, -1.8967e-04, -1.1847e-01, -1.4299e-04,
        -2.3619e-04, -1.2951e-04,  7.9221e-05,  3.3366e-05, -8.2722e-05,
        -1.1687e-04,  6.8288e-05, -2.5616e-02, -2.4388e-04, -5.9369e-05,
         1.1015e-04,  9.7887e-05,  4.2071e-05, -1.8648e-04,  1.1714e-04,
        -8.2816e-05, -1.2659e-02, -2.2123e-04, -1.4349e-04, -2.1043e-04,
        -1.4841e-04,  1.1774e-04,  1.3455e-04,  1.4443e-04, -6.2151e-05,
        -7.1462e-05, -5.3385e-05,  6.9266e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0325e-03, -5.3824e-05,  7.3667e-05, -1.2270e-03, -8.6621e-05,
         3.0253e-05, -3.9726e-03, -1.1015e-01, -2.5570e-04,  1.1099e-04,
        -1.9033e-04, -8.1067e-04, -7.8479e-04,  5.6476e-05, -2.6594e-02,
         1.0602e-04, -2.2284e-04, -2.9732e-04, -2.9548e-04, -9.3368e-05,
        -1.2211e-04,  1.3865e-04, -8.6970e-05,  2.5122e-05, -3.6544e-05,
        -2.9448e-03, -3.7653e-05,  9.1290e-05,  7.5993e-05, -1.0324e-04,
        -3.2985e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0291e-04, -5.7579e-02,  6.7597e-05, -8.1731e-04,  2.2061e-05,
         1.1315e-04, -3.2601e-04,  1.1893e-04, -4.9405e-03, -3.1700e-05,
        -5.3889e-02, -9.9304e-05, -3.6068e-03, -1.3313e-03,  1.3390e-04,
        -1.7313e-04, -8.8974e-05,  1.7262e-05,  2.0619e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0693e-03,  6.1465e-05, -1.7913e-05, -6.7484e-05, -5.7702e-05,
         5.7734e-06,  7.6570e-07, -1.3948e-04,  1.4861e-04, -6.7412e-05,
        -8.8363e-06, -1.3029e-04,  3.4770e-05,  1.2717e-01, -2.8522e-07,
         4.9887e-05,  3.1084e-04,  4.5404e-05, -1.3164e-05, -6.4137e-05,
        -1.4753e-05,  6.6998e-05, -4.5140e-05, -4.8455e-05,  2.2198e-04,
         1.5033e-04,  5.4663e-06, -6.9086e-05,  1.1014e-05,  1.3760e-04,
         1.3189e-05,  2.6600e-04, -2.6723e-05,  9.5094e-05,  2.8703e-05,
        -5.1005e-05,  4.3207e-06, -2.0404e-05, -2.2167e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1109e-03,  1.4411e-01,  1.1909e-04, -1.6608e-04, -8.0878e-05,
         6.3104e-04,  1.9448e-04,  1.3868e-04,  8.2909e-03,  2.9555e-04,
         1.6867e-04,  9.6614e-05, -4.9284e-04, -1.4414e-04, -3.4955e-05,
        -3.0183e-05,  9.6983e-05,  3.2035e-04,  1.9297e-04,  1.1863e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 5.6008e-04,  5.4606e-05,  1.0048e-03,  6.3209e-04, -3.3622e-04,
         4.1521e-02,  1.1771e-04, -3.9079e-05, -4.7063e-05,  7.6996e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3562e-04,  4.5145e-05, -1.1018e-02,  8.7894e-05, -3.4190e-02,
         2.3732e-05,  1.9670e-05, -4.8346e-05, -3.7067e-02,  2.6077e-05,
         5.1758e-05,  6.2239e-05, -1.7945e-04, -2.9004e-03, -3.8351e-05,
        -3.5842e-05,  8.1081e-05, -5.9738e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8628e-04, -1.8549e-05, -3.2322e-05, -2.0228e-05,  4.4878e-03,
         2.4465e-02,  1.1071e-04,  5.6620e-05,  6.0503e-05, -6.7442e-06,
         5.2292e-05,  9.7556e-05,  1.1960e-03,  7.5539e-05, -5.4912e-05,
        -8.0057e-07, -3.7211e-05,  3.2892e-03,  5.0932e-02,  1.9531e-04,
         1.9635e-02, -8.1802e-05,  1.6731e-05, -1.0519e-05,  3.1731e-05,
         1.1302e-05,  2.7127e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5689e-04,  2.4367e-04,  6.3896e-05, -3.0128e-04, -8.8451e-02,
         1.4276e-04, -6.3703e-03,  2.1511e-04,  2.4606e-05,  1.2163e-04,
         8.0319e-05,  4.8944e-05,  4.3591e-05,  4.2255e-05, -5.0779e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6323e-05, -9.8202e-05,  4.9563e-05,  2.3121e-05, -1.9640e-05,
        -1.0629e-04, -5.9930e-05,  1.1591e-04,  3.8118e-05,  2.4770e-04,
         4.2269e-05,  5.3148e-04, -5.4772e-05,  1.1330e-04,  9.5992e-05,
        -5.3545e-05, -1.8986e-05,  4.1251e-05,  6.7487e-02, -7.9959e-05,
         5.4848e-06,  6.1338e-05,  1.2913e-05, -7.8314e-05,  9.7117e-02,
        -3.3889e-05,  8.7533e-05,  3.7394e-05, -2.7279e-05, -1.9287e-05,
        -3.7861e-05,  1.1214e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2448e-04, -3.7939e-05,  5.5080e-05, -6.5987e-02,  6.0802e-05,
        -4.0179e-05, -9.3996e-05, -2.0479e-04, -1.5502e-04,  1.8895e-04,
         7.0417e-05, -1.4180e-03,  7.2723e-05, -5.5860e-06, -5.1871e-05,
         4.3298e-05,  9.3233e-06, -4.8267e-05,  1.8547e-05, -2.2735e-02,
         5.3694e-05, -2.7279e-04, -4.8076e-04, -3.1907e-02,  7.8196e-05,
         1.0175e-05, -4.8447e-02, -1.2874e-05, -2.5164e-04, -1.1900e-05,
         1.0112e-04,  6.1022e-05, -1.6305e-03, -4.8850e-03,  1.8902e-04,
         7.9855e-05,  1.7804e-04, -2.0454e-05, -4.5403e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7369e-04, -2.0347e-04,  4.4321e-05, -1.0632e-04, -2.7676e-05,
        -4.3450e-05, -2.1099e-04,  9.2839e-02,  2.3321e-05,  1.2507e-04,
        -5.9097e-05,  3.4140e-03, -1.1020e-04,  6.7783e-05,  9.7136e-05,
         3.5485e-05, -1.5059e-04,  3.2786e-05,  3.8259e-02, -4.4901e-05,
         5.6990e-02, -7.5040e-05, -1.2262e-04, -9.7304e-05, -4.0421e-05,
         1.9744e-06,  2.0095e-04, -1.3159e-04, -4.6679e-07,  1.7613e-03,
         1.3481e-04,  3.6967e-07,  6.2713e-06, -8.6100e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8863e-05,  3.7913e-04,  2.6742e-05, -5.7134e-06,  7.2812e-03,
         3.4251e-04,  1.0841e-03,  4.9750e-04, -1.5521e-05, -7.5704e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4859e-04, -8.7246e-05, -2.2826e-05, -8.8517e-06, -3.6201e-05,
         1.3375e-04, -7.8465e-07, -3.5041e-02,  5.2741e-06, -3.8704e-05,
        -1.0424e-04, -3.5807e-05,  2.0162e-05,  1.4711e-04, -2.8488e-05,
        -1.0506e-05, -5.2496e-02,  6.4120e-06,  1.9751e-05, -2.8549e-04,
         3.8606e-05, -6.9808e-05, -3.0104e-02, -1.4564e-05, -1.0426e-05,
         2.9099e-06,  5.4616e-05, -1.7876e-03, -2.0558e-05, -1.0870e-04,
        -5.9780e-05, -9.2504e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.3017e-04,  3.0162e-07,  2.2058e-05, -9.7143e-05, -4.1542e-05,
        -1.7168e-03,  8.0741e-05,  1.8453e-05,  1.1942e-04, -1.4905e-04,
         1.9857e-04,  7.2515e-05, -5.8021e-03, -1.2393e-05,  5.3518e-07,
        -9.3738e-06, -1.5796e-02, -5.9881e-02,  2.9882e-05,  1.0964e-04,
         8.2568e-05, -1.9997e-04, -5.8558e-02,  1.5166e-04,  9.9982e-05,
         6.9206e-05, -1.7751e-02, -3.2004e-05,  5.2457e-05,  4.2820e-05,
         1.5276e-05,  1.7283e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4266e-04,  1.1873e-01,  1.7239e-04,  2.0204e-04,  3.7857e-05,
        -3.0075e-04,  5.8718e-05, -3.2179e-06, -3.1070e-05,  3.6088e-04,
         4.2400e-04, -1.0144e-04, -3.1077e-04,  1.5503e-04, -7.7545e-05,
        -1.0269e-04,  3.0356e-05,  8.9669e-05,  3.1964e-02,  4.1980e-05,
         8.1829e-05,  6.7915e-07, -1.5712e-04,  8.0199e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2506e-04, -3.7940e-04, -1.9548e-05, -1.6146e-05,  1.3444e-04,
        -2.1203e-04, -1.4125e-05, -1.4850e-04,  5.9103e-05, -1.6055e-04,
         1.3301e-04, -1.9884e-04, -4.9263e-05,  1.3942e-04, -5.3745e-06,
        -1.1404e-01,  1.1706e-04, -8.3003e-05,  1.6363e-05, -4.0066e-05,
        -1.9718e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 4.8114e-04,  1.0025e-05, -4.8242e-05,  3.7184e-05, -2.2115e-05,
         1.5688e-04, -1.3532e-04, -8.4854e-05,  4.6424e-05,  1.1839e-07,
         7.8950e-05,  1.0024e-04, -9.3759e-05, -2.6454e-04, -6.5529e-05,
        -1.8003e-04, -4.8329e-05, -4.8489e-06, -3.9273e-06,  7.2974e-05,
        -2.6287e-05, -6.7465e-05, -1.0238e-04,  3.7897e-05, -6.7164e-02,
         1.9256e-04,  3.3838e-05, -8.9578e-05, -1.2662e-04, -5.0370e-05,
        -1.8103e-01, -1.2708e-04, -6.8413e-05, -3.0962e-05,  7.2204e-05,
         1.0045e-04, -6.8957e-03,  1.5251e-05,  5.0126e-06, -3.1606e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4743e-04,  7.5141e-05, -7.7911e-05,  2.3483e-04, -8.1377e-05,
         1.4791e-01,  1.0264e-04, -5.5438e-05,  8.6847e-05, -1.6764e-05,
         4.7577e-05, -4.8000e-06,  2.1705e-04,  2.0070e-02,  1.2784e-04,
        -2.1312e-05, -3.4743e-06,  5.5480e-04,  1.2429e-05, -5.4668e-06,
         2.4286e-04,  5.5568e-02,  1.9560e-04,  1.6840e-04, -6.9909e-05,
         1.0406e-03,  3.8349e-04,  8.2211e-05,  2.3521e-06,  1.5474e-05,
         7.4643e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2683e-04, -3.9857e-02,  1.1057e-05, -3.2865e-02,  5.8876e-05,
        -5.4290e-06,  4.0635e-05, -6.2071e-05,  1.5214e-05,  2.2098e-05,
        -6.1117e-05,  3.7377e-05, -6.0041e-02,  2.1127e-05, -2.7609e-05,
        -1.2761e-04,  9.0378e-06,  6.8678e-05, -2.0102e-04, -4.2562e-05,
        -1.8346e-04, -2.0126e-04, -9.6143e-06, -1.8199e-04, -8.5705e-05,
        -4.2386e-05, -7.3658e-04,  1.4082e-05, -3.7861e-02, -1.2126e-05,
         2.7928e-05, -3.9463e-05,  2.2752e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6049e-04, -6.1784e-04,  5.3001e-05, -2.1126e-04,  6.2219e-05,
        -9.0992e-05, -5.5715e-05, -2.5044e-04, -1.4637e-03,  5.8128e-05,
        -8.0903e-03, -6.3249e-02,  4.8766e-05, -6.6053e-02,  8.9644e-05,
        -5.0319e-05, -2.4634e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8676e-06, -9.9920e-02, -4.6345e-05, -4.6679e-03, -1.3190e-04,
        -3.9530e-05, -7.1880e-05,  1.6307e-04, -7.2774e-04,  2.8189e-05,
        -1.2467e-02, -2.0892e-05,  1.5442e-04, -1.0936e-04, -9.5909e-06,
        -2.6875e-05, -4.5304e-05,  3.2366e-05, -3.0258e-02,  5.6130e-05,
        -2.4236e-05, -3.6557e-04, -1.6704e-05,  1.7559e-06, -5.0201e-05,
        -1.8910e-02, -2.1369e-04,  3.1603e-05, -3.7380e-02, -8.1914e-05,
        -6.1101e-06, -3.9358e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4156e-04,  1.6232e-04,  1.0420e-02,  6.9181e-04,  2.0457e-05,
         3.1150e-05, -1.2710e-05,  6.3834e-05, -5.6153e-05, -1.0190e-05,
        -7.2912e-06,  3.1741e-04,  7.9592e-04, -7.1551e-05,  3.3843e-03,
        -1.5050e-05,  2.7898e-04, -2.6097e-05, -6.5992e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3850e-04, -3.3354e-02,  5.3981e-05, -6.9772e-06, -1.6088e-05,
         8.8164e-06,  7.3414e-05,  4.2577e-05, -3.2015e-05,  2.1113e-05,
        -5.0520e-05,  9.2900e-06, -5.5358e-02,  1.5867e-05,  5.7449e-05,
         7.9344e-05,  2.0616e-06, -3.4499e-05, -1.3178e-02,  9.2170e-05,
        -4.1781e-04,  2.7431e-05,  1.1570e-05,  6.0771e-06,  9.1689e-05,
        -3.3417e-05, -1.4955e-02,  1.8225e-05, -2.0055e-06,  2.0187e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8405e-04,  3.7636e-04,  1.4611e-04, -9.0509e-05, -1.9218e-04,
        -6.9995e-05,  1.5092e-04, -7.5370e-05,  5.9806e-05,  1.4760e-04,
         9.1588e-05, -1.5221e-04,  1.3722e-01, -3.9762e-04,  9.2543e-05,
        -6.3531e-05, -1.5427e-05,  1.8209e-03,  2.0962e-02,  1.3835e-04,
         4.7604e-05,  1.0873e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8893e-04, -4.7617e-02,  2.2975e-05,  3.2676e-05,  2.4466e-06,
        -6.5302e-07,  5.7344e-06, -1.7945e-03,  8.6521e-05,  1.9415e-05,
        -4.7066e-05, -3.9775e-02, -3.1209e-06,  2.6327e-05, -3.5035e-05,
        -8.6048e-05, -2.3553e-04, -5.1351e-05, -4.4536e-05, -2.1556e-05,
         8.8490e-06, -3.0670e-05, -4.8440e-05, -2.0070e-04, -2.6451e-02,
         8.0412e-05, -7.5489e-05, -4.0998e-05,  5.0483e-05,  6.2816e-05,
        -1.7397e-02, -2.7133e-05,  5.6205e-06, -9.0991e-06, -8.8467e-03,
        -5.4443e-06,  1.3052e-05, -3.4037e-05, -1.2728e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1519e-04, -1.0553e-04,  1.7330e-05,  4.0537e-04, -1.3975e-05,
         1.5006e-01,  9.9954e-05,  1.6550e-04, -1.3651e-04, -3.8164e-05,
        -1.7847e-05,  3.6868e-04, -1.0227e-04,  4.5076e-04,  9.9652e-05,
         7.4866e-05, -8.3540e-05, -2.6399e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9522e-04, -1.0672e-04,  4.5553e-05,  3.2988e-03, -8.7829e-05,
         7.3900e-05, -8.3283e-06, -3.2823e-05,  2.3131e-05,  3.2605e-05,
        -1.8341e-05, -8.1127e-06, -1.1132e-04,  9.3235e-06,  1.2762e-02,
         8.2467e-05, -5.7627e-05, -1.8042e-06,  1.9373e-04,  2.3897e-04,
        -2.2790e-05, -3.6298e-06,  7.0989e-02,  4.2983e-05,  1.5169e-02,
         5.9557e-05,  4.3989e-05,  5.0598e-05,  1.2741e-05,  1.1795e-04,
        -4.6094e-05, -9.3087e-06,  1.4227e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0242e-05,  1.1287e-04, -6.7832e-04, -8.7924e-05,  1.7254e-04,
         4.6893e-05,  4.1637e-05,  1.3183e-05, -2.0226e-05, -1.5387e-02,
        -5.9813e-05, -7.2284e-05,  5.6760e-05,  3.7054e-05, -2.4714e-02,
        -1.0127e-01,  1.8184e-05,  7.1788e-06,  9.4987e-06,  5.4521e-05,
         3.7677e-05,  9.3240e-06, -1.1577e-04, -1.1370e-05,  1.5958e-06,
         8.3536e-05,  4.7287e-05,  4.7350e-05, -4.2907e-04, -2.9514e-02,
        -9.5939e-05, -6.5647e-05, -2.6272e-06, -4.7441e-05, -5.3569e-05,
         5.9141e-05, -2.7738e-05, -5.5080e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 5.8527e-04,  5.5369e-02, -2.9854e-05,  6.8847e-03,  1.3042e-04,
         3.5767e-04,  1.7310e-05,  6.1194e-05,  1.0276e-06,  3.0157e-04,
         3.5473e-04,  4.5701e-05, -1.0470e-04,  4.2757e-05,  9.7284e-06,
        -6.2772e-05,  8.0358e-05,  1.9799e-05,  3.5552e-05,  1.5084e-04,
         4.8094e-05,  2.0597e-05, -1.1356e-05,  5.0037e-06,  3.3619e-02,
         3.4593e-05, -1.0208e-05, -3.6551e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6811e-04,  3.6163e-07, -7.2012e-02, -1.4921e-05,  4.6152e-05,
        -1.4887e-02, -4.2885e-05, -5.8127e-03,  7.8571e-05, -8.9989e-06,
        -1.2129e-04, -8.4295e-04,  6.5793e-05,  3.6114e-05,  4.5818e-05,
        -5.3512e-05, -5.9599e-05, -2.5026e-02,  1.6488e-05, -1.8620e-04,
        -8.9193e-05,  1.6069e-05,  1.8892e-05, -1.9765e-03,  2.5425e-05,
        -1.8016e-04, -2.5336e-05, -4.1071e-03,  1.4655e-05,  7.6822e-06,
        -1.2914e-02, -5.0282e-05,  4.3985e-06, -7.8967e-05,  8.7607e-06,
         8.2910e-05, -5.7075e-05,  1.5411e-05, -4.2752e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6255e-05,  1.2533e-04,  2.3589e-04,  2.5748e-04, -2.8282e-05,
         2.7073e-04, -7.5516e-05,  4.3971e-04,  3.7495e-05, -2.8689e-05,
        -3.9821e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9638e-04, -8.6602e-05, -6.2027e-02,  1.7481e-05, -5.9118e-03,
        -5.4585e-03, -2.7310e-05,  2.1011e-04,  2.4316e-06,  3.8996e-05,
        -3.1839e-03,  2.9917e-05, -7.8511e-05, -6.7438e-05,  2.5568e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0758e-04,  5.0684e-05, -5.0272e-02, -6.2862e-05,  4.6241e-05,
        -2.2565e-05, -1.3163e-05, -5.8315e-06,  1.5359e-05, -1.1131e-04,
        -1.4727e-03, -1.8544e-04,  4.1190e-05, -2.0594e-05, -3.9039e-05,
         9.2604e-08,  2.7347e-06, -6.9133e-03, -3.2649e-02, -8.7245e-05,
         6.1733e-06, -4.0763e-06, -2.1704e-03, -7.9168e-05, -3.0562e-05,
         1.6563e-05, -1.0364e-03, -5.6803e-03, -3.3501e-05, -2.8774e-03,
        -6.2576e-05,  4.4757e-05, -1.9483e-02,  4.8394e-05, -2.8158e-05,
         1.9077e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2911e-03,  3.4591e-01,  3.6415e-04, -1.0432e-04,  4.3127e-04,
         6.5245e-04, -1.2104e-04,  5.1179e-05,  1.5734e-04,  1.2558e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6283e-03,  1.1213e-04, -1.1153e-01, -8.1402e-05, -2.8237e-04,
        -1.7567e-05,  2.1909e-04,  5.6562e-05, -2.9547e-04, -8.4680e-05,
         1.2734e-04, -3.7113e-02, -3.7374e-06, -6.7348e-04,  1.0159e-04,
        -1.9071e-04,  1.1326e-04,  7.9219e-06, -8.9651e-05,  1.3654e-04,
        -3.0254e-04, -6.4417e-02,  1.2837e-04, -6.9198e-06, -2.8739e-05,
         2.1337e-04,  6.7643e-05, -3.9022e-05, -8.6099e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.1875e-06, -1.3217e-02, -9.9048e-02,  5.2889e-05, -9.7418e-05,
         8.0185e-05,  1.8578e-05, -1.3725e-05,  7.0622e-05,  5.6176e-05,
         1.6134e-05, -8.0775e-05,  1.9360e-05, -5.5206e-05,  2.1837e-05,
        -1.6129e-05, -6.1058e-04, -1.5696e-05,  8.3858e-05, -2.9362e-02,
         7.0010e-05,  1.1486e-05, -9.8836e-05, -3.1246e-05, -6.1628e-05,
         2.5903e-05,  2.9559e-05, -4.0791e-02, -6.1541e-02,  4.8494e-05,
        -2.0220e-06, -1.2222e-04,  1.9678e-05,  1.7468e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1257e-04,  6.3162e-05, -1.5123e-02,  4.4456e-06, -1.9858e-05,
        -4.7161e-05, -1.5030e-05, -3.3773e-05, -1.8938e-04, -1.2188e-05,
        -6.5602e-05, -1.2580e-05, -1.2090e-02,  8.5731e-05, -6.2237e-06,
        -2.9159e-05, -1.9277e-02,  7.0083e-05, -1.2654e-05,  2.4559e-05,
        -1.1213e-02, -2.5875e-05, -5.8755e-06, -2.2072e-03, -3.4816e-05,
         2.2735e-05, -3.6015e-06, -3.3049e-05,  1.1573e-07, -4.9157e-03,
        -4.8648e-05, -1.6474e-02,  5.2933e-05, -1.9407e-02, -3.1558e-05,
         6.3336e-05, -4.8877e-05, -5.3060e-02, -3.0729e-06, -7.7116e-03,
        -2.1699e-05,  4.4476e-05,  5.2699e-05,  4.1196e-05, -2.4412e-06,
         4.5995e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4640e-04,  9.8604e-05, -5.4415e-03, -2.7819e-02, -9.4365e-06,
         8.5160e-05, -4.8657e-05, -4.6536e-06, -2.4566e-03,  8.8322e-05,
        -2.5009e-02, -4.8844e-06,  1.8120e-05, -3.1235e-02,  7.2153e-06,
        -7.5620e-02,  4.4670e-05, -2.2650e-05, -3.7822e-05, -5.8638e-05,
        -1.0120e-04, -1.3619e-06, -4.6516e-05,  2.7416e-06, -7.1975e-05,
         1.2364e-04, -2.2094e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8074e-04, -4.1301e-05,  3.9815e-05,  1.0032e-05,  2.2452e-04,
        -1.4043e-05,  1.5364e-04,  2.4858e-04,  4.4717e-05,  2.0761e-03,
         1.1041e-04,  5.3533e-05, -3.2562e-05,  1.1430e-04,  2.7445e-05,
         1.3214e-05,  2.3418e-04, -3.2029e-05,  2.9182e-05,  6.1834e-05,
         1.1715e-05,  5.9534e-05,  3.1765e-04,  3.1790e-05,  8.1414e-02,
        -2.3257e-05,  2.8001e-05, -6.6227e-05,  4.3826e-06,  4.7499e-05,
         4.3066e-05, -2.8839e-06,  1.8996e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9876e-04,  6.5751e-05, -7.9318e-05, -2.1105e-05, -1.8461e-05,
        -1.1447e-04, -2.5001e-05, -6.8144e-04,  1.8977e-05,  5.9861e-05,
        -3.1856e-05,  1.5826e-05, -4.5479e-02,  3.0508e-05, -3.6555e-06,
         3.3702e-06, -2.0115e-05, -4.4346e-05, -7.6523e-05,  2.8952e-06,
        -4.0449e-05,  2.8135e-06, -2.5976e-02, -8.7469e-05, -2.6834e-05,
        -2.5310e-05, -6.9113e-02,  2.0179e-05, -7.5649e-05, -1.4108e-05,
        -3.0956e-03, -1.2324e-05,  1.0171e-05,  3.2088e-06,  6.9595e-05,
         1.0216e-05, -1.9170e-05,  1.9372e-05, -3.4889e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
