Iter #50: [tensor([ 6.0278e-03,  2.5652e-03,  7.6338e-05, -1.8934e-05, -8.6649e-05,
         1.2246e-04,  1.1691e-04, -8.2370e-05, -1.1210e-04,  1.0484e-04,
         1.3728e-04,  7.5147e-05,  7.3971e-05, -1.3148e-04,  7.3740e-05,
         7.7585e-05,  1.3719e-04,  2.7955e-05,  1.0749e-04,  1.0997e-04,
         1.4964e-04, -2.0758e-05, -1.0683e-04,  2.3926e-04, -7.3018e-05,
        -2.1441e-04, -1.6943e-04, -1.0184e-04,  1.2028e-04,  1.1799e-04,
        -1.3144e-04,  1.2802e-04,  6.9093e-05, -1.7621e-05,  2.1300e-04,
         3.8694e-05,  1.6033e-04,  1.1411e-04, -5.8689e-05,  9.1897e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9776e-03, -1.2230e-03,  1.2165e-05,  8.8107e-05,  8.6164e-05,
         1.5082e-04,  6.4057e-05, -7.8385e-05, -1.4857e-04, -6.9516e-05,
         5.2874e-05, -1.7025e-04,  6.5369e-05,  2.3447e-05, -1.1002e-04,
        -6.7526e-06, -9.3968e-05, -2.2202e-06,  4.1486e-05,  1.3374e-05,
        -1.4906e-04, -4.4979e-05, -1.2704e-04, -6.8492e-05,  5.1206e-05,
        -1.0384e-04,  9.2352e-05, -1.8681e-05, -1.3521e-04,  6.1483e-05,
        -5.7339e-05, -1.9829e-05, -7.7016e-05,  1.2396e-05, -3.8441e-05,
        -1.5724e-04,  3.3715e-05, -6.1875e-06, -1.9122e-05, -1.3130e-04,
        -1.7524e-04, -2.1056e-05, -1.1603e-04, -2.7782e-05,  1.0938e-04,
        -3.5704e-05, -2.0459e-05,  7.1213e-05, -7.0631e-05, -5.7819e-05,
         5.2127e-06, -1.3510e-04, -1.4976e-04, -3.6064e-05, -4.3054e-05,
         1.3471e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4033e-03, -2.5003e-03,  1.3585e-04,  2.9682e-05, -8.9151e-05,
        -2.5951e-04, -1.2065e-04, -8.6608e-05, -1.3244e-04, -9.9826e-05,
        -2.3404e-04,  2.1878e-04,  1.9126e-04, -1.4991e-05, -4.7786e-05,
        -2.1579e-05,  4.8666e-05, -2.8694e-06, -3.7067e-05, -2.1635e-04,
        -1.1572e-04,  8.6249e-05,  5.8628e-05, -2.2305e-04,  2.3745e-06,
         8.4015e-05, -1.8536e-04,  6.8040e-05, -7.8533e-05,  5.7345e-05,
         1.1998e-04, -2.0948e-04,  9.1021e-05,  9.8611e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2216e-03, -4.0841e-03, -1.0339e-04, -7.4872e-04,  1.3452e-04,
         1.5051e-04,  6.2464e-04,  2.8000e-04,  4.6042e-04,  7.7652e-04,
        -2.5136e-04,  6.5784e-05,  3.8801e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5653e-03, -2.1571e-03,  2.5234e-04, -2.4755e-05,  9.8618e-06,
         1.0242e-04, -2.7619e-04, -8.1762e-05, -1.0904e-04,  1.2994e-04,
        -4.6286e-05,  1.8116e-05,  3.8168e-05, -3.0811e-04, -6.9150e-05,
         2.6943e-04,  1.2778e-05, -1.2192e-04, -2.3125e-04, -1.2720e-04,
        -3.2866e-05, -2.0429e-05,  9.9944e-05, -1.1756e-04,  2.8119e-04,
        -3.6467e-04,  1.0973e-04, -2.4425e-04, -1.3423e-04,  1.5463e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4822e-03, -2.3365e-03, -5.9071e-04,  1.7549e-04,  4.9664e-05,
         3.0820e-05,  2.7504e-05,  2.7560e-04, -8.2353e-05, -7.8526e-05,
         2.5050e-05,  9.7325e-05,  7.6870e-05, -2.0143e-04, -4.9980e-04,
        -2.2859e-06,  2.5417e-05,  2.1532e-04, -3.4006e-05,  7.9288e-05,
        -8.0294e-05, -1.6592e-04,  2.7734e-05, -4.5804e-04, -2.8598e-04,
         2.4144e-04, -1.8921e-04,  1.5577e-04, -1.2062e-04, -2.0266e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9563e-03,  2.1032e-03, -1.0762e-04, -8.1243e-05,  1.4535e-04,
        -3.9262e-05,  2.8718e-04,  4.7163e-06, -1.6571e-04, -3.4485e-05,
        -6.3617e-05, -8.9754e-05, -2.0612e-04, -1.0178e-04,  1.4913e-05,
         5.1535e-05, -9.6613e-05,  6.7662e-06,  1.1201e-04, -4.7383e-05,
         7.6925e-05, -2.1336e-05, -2.4341e-04,  2.8943e-04,  1.8720e-05,
         1.2036e-04, -4.3309e-05, -3.5466e-05,  1.6103e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8532e-04, -6.5851e-03,  7.9914e-05,  3.8655e-04, -5.3019e-04,
        -6.1199e-04,  6.5508e-04,  5.5515e-04,  6.8775e-04,  5.5931e-04,
        -4.8130e-04,  4.2031e-04,  2.0241e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1954e-03, -3.4275e-03,  9.0275e-05, -2.9107e-06,  4.2791e-05,
        -2.0207e-04,  9.5701e-05,  3.2537e-05, -1.6670e-04, -4.5818e-05,
         1.1268e-04,  5.0478e-05,  1.9900e-05, -6.0944e-05, -1.1141e-04,
         2.2310e-05,  1.2230e-04, -6.2001e-05, -3.4220e-04,  6.8968e-05,
         1.5973e-05,  1.8226e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8887e-03, -1.9823e-03, -8.3831e-05,  1.4398e-04, -7.0729e-05,
        -6.9234e-05,  2.7113e-05,  5.2928e-05, -1.0819e-04, -9.4436e-05,
        -1.6370e-04, -4.5501e-05,  3.2511e-05, -1.3243e-04, -6.4520e-06,
        -2.2702e-05,  1.6417e-04, -1.7729e-05, -6.0123e-05, -1.6594e-04,
        -1.2436e-04, -1.1583e-04, -5.4261e-05, -1.2204e-04,  1.6845e-04,
         7.9353e-05, -5.6629e-06,  9.5857e-05, -7.4936e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0664e-03, -3.2012e-03,  2.8562e-05,  9.8056e-05,  2.0404e-04,
        -1.9481e-05,  6.4950e-05, -2.0124e-04,  6.4967e-05,  5.5695e-06,
         1.5032e-04, -2.4110e-04,  3.4878e-04,  1.8297e-04,  1.1624e-04,
        -1.6659e-04,  1.4991e-05, -2.0636e-04, -4.3941e-05, -2.9281e-04,
         2.4496e-04,  1.0128e-04,  1.4735e-04, -2.3319e-04,  1.0940e-04,
        -2.7495e-05,  1.6710e-04,  6.6909e-05, -2.8126e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3893e-03, -3.3489e-03, -4.8812e-04, -2.3025e-04, -3.7385e-04,
         3.6299e-05, -2.2434e-04,  2.9121e-04, -1.4746e-04, -1.3434e-04,
         2.0105e-04,  5.3338e-05, -4.2334e-04, -2.8738e-04, -2.6331e-04,
        -7.7745e-05, -4.6425e-04,  2.1923e-04, -3.4439e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.9753e-04, -1.2341e-02,  2.4397e-05,  1.1435e-04,  2.6946e-05,
         6.9247e-05,  2.9170e-05,  5.8628e-05,  1.0572e-04,  4.0735e-05,
        -5.7767e-07,  1.2749e-04,  6.8540e-05,  1.0610e-04,  2.7183e-05,
         1.2327e-04, -5.4757e-05,  8.4515e-05,  1.1156e-05,  2.1698e-05,
         5.7699e-05, -6.0526e-05,  2.9956e-05, -5.2271e-05, -2.4211e-05,
        -2.5363e-05, -7.8907e-06,  7.6058e-05,  1.6838e-04, -1.9892e-05,
        -2.5111e-05, -8.8618e-05,  5.3503e-05,  1.9785e-05,  5.5534e-05,
        -7.7336e-05, -2.6601e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0045e-04, -7.4504e-03, -2.0384e-05, -2.4651e-05, -2.5285e-05,
        -1.8127e-05, -6.9940e-05, -4.4500e-05,  1.8328e-04,  2.5130e-05,
        -6.8706e-05, -1.0033e-05, -5.5413e-06,  8.4960e-05, -7.2324e-05,
         3.4996e-05,  6.6078e-05,  5.6381e-05,  6.1561e-06, -2.9398e-05,
        -1.9137e-05, -3.1465e-05,  6.6813e-05, -6.5303e-05, -6.9416e-05,
         1.2224e-04,  5.6616e-05, -2.5367e-05, -6.5414e-05,  5.3689e-05,
         1.2541e-05, -4.4787e-05, -2.0025e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3573e-04, -1.0816e-02, -1.6805e-04, -1.1943e-04, -1.0227e-04,
        -6.4492e-05,  5.9202e-05, -1.1940e-05,  2.4602e-04,  1.8735e-05,
         2.6332e-04,  3.0945e-05, -4.3771e-05,  1.2590e-04,  7.5423e-05,
         2.1769e-04,  2.1817e-06, -7.5836e-05, -1.5233e-04,  3.2385e-06,
         1.0482e-04,  4.9613e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2800e-03, -9.0699e-03, -2.6715e-05,  7.4679e-05, -6.3185e-05,
         4.2979e-05,  4.5483e-05,  1.9371e-04,  1.1000e-04, -5.1086e-05,
        -5.6301e-05,  2.2503e-06, -7.7624e-05, -3.1845e-06, -7.3465e-05,
         7.0685e-05, -6.5196e-05,  7.0524e-05,  1.0666e-06, -4.5038e-05,
        -3.1838e-05,  3.2914e-05,  3.3403e-05,  1.5990e-04, -7.0497e-05,
         2.3471e-05,  1.2090e-05,  2.8826e-05, -3.8079e-05, -6.6163e-05,
        -4.3953e-05, -5.2866e-05,  4.6427e-05, -8.1845e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1249e-03,  2.7929e-02, -5.3845e-05,  4.1142e-05,  1.1079e-06,
         8.3028e-05,  8.0491e-05,  4.8073e-05, -2.9634e-04, -1.3648e-04,
         8.0356e-06,  1.8209e-04, -3.6367e-04,  6.8737e-05,  1.2752e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4782e-04, -1.9838e-02,  5.0744e-05,  1.3921e-04,  1.0340e-05,
        -7.1100e-05, -5.9449e-05, -2.1054e-06, -1.2437e-05, -3.0313e-05,
        -1.7973e-04,  1.0739e-04, -2.4866e-05,  3.8815e-05, -4.9916e-05,
         8.2192e-05,  1.1336e-04, -1.0925e-05, -2.1058e-04,  3.5428e-05,
        -6.9716e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9783e-04, -9.6276e-03, -9.5704e-05, -6.9247e-05, -4.4991e-06,
        -3.9748e-05,  1.0835e-04, -3.8047e-06,  7.9822e-05,  1.2793e-05,
        -2.6607e-05, -6.9778e-05,  1.6633e-04,  5.4636e-05,  6.3402e-05,
         3.4797e-04, -8.2974e-05,  6.8709e-05,  6.1256e-05, -8.9622e-05,
        -8.0116e-06, -1.0323e-04,  2.0008e-04,  9.5781e-05, -1.3025e-05,
        -1.2326e-05,  5.0315e-05, -1.4107e-05, -2.9570e-05, -3.6563e-05,
        -1.3078e-05, -6.4038e-05,  1.5981e-04,  3.1857e-05, -4.4070e-05,
         1.9429e-05, -2.6232e-06,  1.0776e-05, -5.5686e-05, -3.7260e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4095e-03, -1.6192e-02,  2.9529e-04,  2.9976e-05,  6.0070e-05,
         1.2634e-04, -1.1687e-04, -3.1188e-05,  1.6586e-04, -5.7977e-05,
        -2.3889e-04,  2.1572e-04,  9.6434e-05, -2.2420e-04,  1.8113e-04,
         1.6211e-04, -7.6051e-05, -6.6842e-06,  6.4410e-05,  3.3236e-05,
         6.9089e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4777e-04,  2.4405e-02, -2.4213e-04, -1.7467e-04, -1.4374e-04,
         1.1631e-05,  1.1271e-04, -1.3473e-04,  8.8147e-05, -5.3447e-05,
        -3.6091e-05,  1.5962e-04, -5.4780e-04, -8.0060e-05, -1.3617e-06,
        -8.3339e-05, -1.4080e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7667e-04,  1.1560e-02,  3.8487e-05, -1.0604e-05,  2.5306e-04,
        -3.1507e-04, -1.4402e-04, -1.0908e-04,  4.8889e-05,  1.5422e-05,
         7.9663e-05, -2.0168e-04,  1.9354e-04, -6.8318e-05,  5.0935e-05,
        -1.4706e-04,  4.0096e-05, -6.4380e-05,  2.0278e-05, -5.0048e-05,
         2.1403e-05,  1.4665e-04,  3.2998e-05, -7.4610e-05,  6.1969e-05,
         3.9008e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9794e-03,  2.3175e-02, -6.4060e-04,  6.0548e-05, -1.2712e-04,
        -6.1794e-04, -2.9861e-04,  8.3933e-05, -1.7565e-04, -3.9377e-05,
         3.7430e-04,  3.7865e-04,  6.5907e-05, -1.2646e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1186e-05, -1.3807e-02,  2.0584e-04, -1.2435e-04,  2.8319e-05,
         6.1656e-05,  1.3283e-04, -3.3040e-05,  2.5757e-04,  4.9302e-06,
        -2.5945e-05, -2.1681e-05, -7.4322e-05,  2.4896e-04,  7.5317e-04,
         7.3581e-05,  3.5384e-05, -8.6151e-05, -7.4826e-05,  5.3557e-05,
        -2.0858e-05,  9.4347e-05,  5.0861e-05,  1.1799e-04,  8.0055e-05,
         2.5506e-05,  1.4533e-04,  7.1167e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 7.2635e-04,  6.2696e-02, -4.6563e-04, -1.2393e-04, -1.1823e-04,
        -1.4891e-04, -1.3572e-05,  2.2325e-05,  1.2177e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4719e-04, -4.9290e-02,  2.1805e-05, -1.7719e-04,  2.3752e-05,
        -9.2182e-05,  1.6954e-05,  4.2621e-05, -8.1154e-05, -8.2423e-05,
         2.1195e-04, -2.2862e-05,  5.4539e-05, -5.4633e-05, -5.4629e-05,
        -2.4933e-05,  1.9128e-04,  1.0896e-04, -1.8789e-05, -1.4424e-04,
        -6.2710e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5263e-04,  2.5529e-02,  8.0329e-05,  3.0362e-05,  1.6088e-06,
         1.0296e-04, -4.4867e-06,  6.9697e-05,  1.0106e-04,  1.1420e-04,
         6.5002e-05,  2.4164e-05, -4.2217e-05, -2.1797e-05,  3.4350e-05,
        -4.9932e-05,  8.6131e-06,  3.2789e-05, -3.5374e-05, -2.5943e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0429e-03,  4.1985e-02, -1.5449e-04, -4.1456e-05,  6.3604e-05,
        -2.4619e-05, -2.4520e-05,  3.1648e-05,  9.7530e-05, -2.1805e-05,
         4.8011e-05, -1.3605e-04, -2.8291e-05,  2.3891e-05,  3.0396e-05,
         1.1441e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8826e-03,  5.6949e-02, -3.5914e-04, -3.7467e-04, -2.2307e-05,
        -3.2507e-04,  8.0682e-05, -5.4558e-05, -5.4411e-05, -2.0026e-04,
         2.5801e-04, -1.7293e-04, -2.0683e-04, -2.0436e-04, -2.7637e-05,
        -7.5655e-05, -2.5383e-04, -1.2494e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6360e-04,  3.2282e-02,  3.6564e-05,  2.3996e-05, -6.4242e-06,
         3.9556e-05,  4.5599e-05,  3.7493e-05,  1.0251e-04,  5.5764e-06,
         1.0622e-04,  6.4564e-05, -3.0068e-05,  6.3415e-05,  1.5338e-05,
         4.3574e-05,  5.7577e-05,  4.4336e-05, -1.7238e-05,  3.7641e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0371e-03, -4.4996e-02,  5.9627e-05, -9.9945e-05, -6.4565e-05,
        -9.5228e-05, -1.1789e-05,  1.0701e-04, -7.0724e-05, -8.7440e-06,
         1.0724e-04,  8.0853e-06,  9.0941e-05, -1.2844e-04, -2.9218e-05,
        -6.9674e-05, -1.9097e-05,  1.2165e-04, -1.0823e-05, -3.3855e-05,
         8.3107e-05, -3.4207e-05,  7.5782e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4802e-04, -3.7092e-02,  3.2491e-05, -5.2217e-05,  3.1434e-05,
         7.5385e-05,  1.4341e-04,  6.6420e-05,  9.6983e-05,  5.7369e-05,
        -6.1900e-05,  9.1856e-05,  4.3803e-05,  3.6623e-05,  2.0287e-05,
        -3.5194e-05,  1.3309e-04,  5.1689e-05,  2.8486e-04,  1.5472e-04,
         7.2248e-05,  1.2134e-04,  8.2913e-05,  3.2963e-05,  1.2335e-04,
         1.2628e-04,  5.6946e-05,  7.3573e-05,  2.7878e-05,  1.7014e-05,
         3.3681e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1057e-04, -4.1910e-02,  7.9878e-05,  1.0819e-04, -9.6131e-06,
         2.6165e-05, -8.8947e-06,  6.2342e-05,  5.6316e-05, -1.0739e-05,
         1.1008e-04,  1.7930e-04,  9.5483e-05,  1.0969e-04, -1.6632e-06,
        -3.3403e-05,  3.9344e-05,  6.6948e-05,  4.3586e-05, -4.6318e-05,
        -8.9720e-06,  1.3089e-04, -4.4092e-05, -5.7812e-05,  3.0409e-05,
         9.9998e-05, -1.8197e-06, -1.4255e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9080e-04,  3.6723e-02,  1.1325e-06,  2.2420e-04,  1.3069e-04,
        -1.1450e-04, -8.9183e-05,  3.9274e-06, -1.9175e-04,  5.9506e-05,
         2.5010e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1105e-04, -4.8405e-02,  3.3920e-05, -1.0984e-04, -1.5952e-04,
         1.5496e-04, -4.2424e-05,  4.4203e-05,  1.6712e-04,  5.0372e-05,
        -1.7844e-04,  3.3416e-05, -1.4840e-04, -8.1899e-05, -6.9404e-05,
        -4.7899e-05, -1.6380e-04,  6.0881e-06, -5.5399e-05, -4.1720e-05,
        -6.6225e-05, -1.9937e-05,  9.6805e-05, -1.2438e-05, -6.7855e-05,
        -1.8899e-05,  2.1245e-05,  2.7208e-05, -1.4202e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9707e-04, -4.1312e-02, -1.1726e-04, -2.4270e-04, -6.8961e-05,
        -1.3520e-04,  1.2173e-05,  5.1830e-05, -4.8805e-05,  1.5604e-05,
        -1.6043e-04,  1.1471e-04, -7.0514e-05, -1.5980e-05,  1.6904e-05,
         6.6607e-05, -3.7896e-06, -1.4543e-04,  9.3943e-05,  4.1335e-06,
        -4.0875e-05, -8.5952e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-2.0045e-03,  4.9765e-02,  5.3352e-05,  4.1358e-06, -1.9188e-04,
        -1.8341e-04, -3.4020e-05, -1.2524e-04, -3.4949e-06, -1.2829e-04,
        -2.1057e-05, -2.3434e-04, -6.1668e-06, -3.7462e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4225e-04, -7.0247e-02,  7.7061e-05,  9.0301e-05,  4.0953e-05,
        -6.7342e-05,  1.9440e-05,  5.7429e-05,  8.8840e-05,  1.3571e-04,
        -1.1155e-04,  9.0135e-06,  3.6160e-05,  6.5201e-05,  9.8879e-05,
         1.7252e-04,  1.6471e-04,  5.9102e-06,  1.5617e-04,  5.4469e-05,
         7.3349e-05, -1.5263e-04,  2.5671e-05,  7.2246e-06, -6.2868e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0741e-03,  6.2958e-02, -1.1173e-04,  4.0123e-06,  1.5728e-04,
        -1.6914e-04,  1.9291e-05,  6.4539e-05,  6.3160e-05, -1.4775e-04,
        -6.7007e-05,  5.0917e-05,  3.1359e-05, -9.9818e-06, -3.3505e-05,
        -4.7038e-05, -1.2993e-04,  6.4557e-05, -6.9306e-06, -2.0589e-04,
        -9.9904e-05, -9.4309e-05,  8.1912e-05, -3.8777e-05, -5.6933e-05,
        -1.0616e-04, -4.7158e-05, -3.2051e-05, -3.5374e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0435e-04,  3.6339e-02, -4.4874e-05,  7.4964e-06, -3.0238e-05,
         1.5857e-04, -1.0696e-05, -2.8941e-05, -3.1038e-05,  1.4328e-04,
         1.1006e-05,  7.5876e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2985e-03,  2.7734e-02,  5.1450e-05,  2.1008e-05,  4.8277e-05,
         6.3445e-05,  9.2369e-06,  1.3264e-04,  7.6363e-05,  1.4380e-04,
         7.1350e-05,  2.8210e-05,  2.6372e-05,  4.3786e-05, -7.4652e-05,
         6.6510e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0207e-03,  4.3916e-02, -7.7597e-05,  5.5455e-05,  9.4418e-05,
        -1.7441e-05, -1.7526e-05,  2.6287e-05,  7.4548e-05,  3.2819e-05,
         6.4225e-05,  2.6108e-05,  5.5885e-05, -1.7826e-05, -6.2058e-05,
         2.2706e-06, -9.4528e-06,  5.7872e-05, -7.8831e-05,  2.6721e-05,
        -4.0503e-05, -7.4045e-05,  9.9425e-05, -1.0731e-04,  3.3541e-05,
         4.2850e-05,  6.4129e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.4265e-04, -4.2691e-02,  6.7701e-05,  7.6762e-05, -2.1006e-05,
        -1.8362e-05,  1.4362e-05,  4.5989e-06, -1.4048e-05,  2.3748e-06,
        -4.4194e-05, -6.9849e-05,  7.2556e-05, -1.4129e-06,  2.0641e-05,
         1.0802e-04,  3.8947e-05, -2.0164e-05,  1.1961e-04,  4.9473e-05,
         1.5477e-04,  2.6159e-05, -7.9193e-05, -1.2742e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.7762e-04, -4.6085e-02,  5.8861e-05,  4.0058e-05, -5.3672e-05,
        -8.6288e-05,  5.1968e-05,  3.6712e-05, -6.1969e-05, -2.6912e-05,
         5.3377e-06, -3.1465e-05, -7.9682e-06,  1.6992e-05,  2.8957e-05,
         4.9393e-05, -1.6537e-05,  9.9568e-05, -1.7686e-05, -7.5058e-05,
         5.5795e-05, -7.6363e-05,  1.6912e-06,  8.5598e-08,  4.3028e-05,
         6.3884e-05, -4.0950e-05,  2.5495e-05,  6.3308e-05,  5.0473e-05,
         5.7549e-05,  2.5593e-05, -2.0432e-05, -1.7634e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9004e-04, -5.4040e-02,  5.8518e-05, -3.0502e-06,  4.1307e-05,
         2.3152e-05,  7.4446e-05,  2.9144e-05, -2.6109e-05,  1.2877e-04,
         3.8996e-05, -9.5804e-06,  3.1977e-05,  4.1403e-05,  2.5663e-05,
         4.6115e-05,  9.9388e-05,  2.1551e-05,  2.2487e-05,  5.1337e-05,
         8.3588e-05, -2.1031e-05,  3.9718e-05,  3.1681e-05, -5.9801e-05,
         1.0496e-04,  1.4116e-04, -1.4804e-06,  2.8809e-05,  8.6365e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0549e-03,  7.9837e-02, -5.6419e-05, -2.2106e-04, -1.1434e-04,
        -1.4118e-04, -2.8745e-05, -2.7474e-04,  4.7844e-05, -1.3492e-04,
        -9.6836e-05, -1.0292e-04, -1.0936e-04, -2.2627e-04, -1.5719e-04,
        -2.0797e-04, -3.2746e-04, -3.4815e-04, -8.5286e-05, -5.3132e-05,
        -1.2482e-04,  2.0186e-05, -1.0598e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8153e-04, -4.6812e-02, -9.1250e-05,  8.1167e-05,  3.1518e-06,
         5.6776e-05,  2.1308e-05, -9.7635e-06,  9.1508e-05,  1.0999e-04,
         4.1502e-05,  5.9253e-05,  8.7581e-06,  2.0019e-05,  1.6405e-05,
         1.7888e-05, -1.8205e-05, -2.6139e-05,  5.4980e-05,  2.7781e-05,
        -6.3176e-05,  1.8292e-05,  3.5719e-05,  2.0564e-04,  1.7986e-04,
         7.6215e-05,  4.0306e-05,  2.3194e-04,  5.4965e-05,  1.0768e-05,
         2.2479e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6024e-04, -5.9445e-02,  4.1279e-05,  2.4371e-05, -3.6034e-05,
        -7.3826e-05,  3.5067e-05,  1.9502e-05, -4.3460e-05, -2.9092e-05,
        -1.6099e-06,  9.5634e-05,  3.6565e-06, -4.2804e-05,  5.3245e-06,
        -2.8218e-05,  3.3052e-06, -3.2875e-05,  8.6067e-05,  3.6391e-06,
        -5.7108e-05, -3.9662e-06,  1.4081e-05,  2.3880e-05,  4.8638e-06,
        -2.7465e-05,  4.1449e-05,  1.8371e-05, -2.5483e-05,  2.2678e-05,
        -5.5073e-05,  7.4942e-05,  5.4787e-05, -1.6393e-06,  4.8950e-06,
        -4.7961e-05, -7.4258e-05, -2.0445e-05, -2.5967e-05, -6.9864e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-4.1015e-04,  9.3528e-02, -4.3500e-05,  1.3706e-04,  1.4054e-04,
        -1.1586e-04,  4.2394e-05,  9.4818e-05, -1.9902e-04, -4.7729e-05,
         4.5001e-05, -1.3282e-04, -1.6433e-05,  1.1767e-04,  1.7965e-05,
         1.6681e-04, -8.6456e-05, -1.4045e-04, -6.4232e-05, -1.1244e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1014e-03,  6.4969e-02, -1.5910e-04,  5.1445e-05, -1.7501e-04,
        -3.2257e-04, -2.4829e-04, -2.3637e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.1679e-04,  5.2908e-02,  9.6553e-05,  1.4608e-06,  2.0694e-05,
         1.0719e-04, -7.3394e-05, -8.0836e-05,  1.2572e-04, -4.8162e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0698e-04, -5.5644e-02, -8.7707e-05, -1.6168e-05,  3.9292e-05,
        -2.1260e-05, -1.0098e-05, -7.0355e-05,  1.3567e-05, -5.9159e-05,
         2.5252e-05,  3.4249e-06, -3.7706e-05,  6.2439e-05, -4.5666e-05,
         3.1879e-05, -1.2541e-05, -8.1403e-05, -5.8292e-05, -5.3747e-05,
        -4.5994e-05,  1.6736e-05,  7.9800e-05,  3.7738e-05,  3.4935e-05,
         5.5994e-05,  1.0611e-05,  1.8001e-06,  4.0217e-05, -8.3850e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2701e-05,  5.8730e-02,  4.1967e-05,  2.1870e-05,  3.4284e-05,
         6.6363e-05,  1.7870e-05,  4.9318e-05, -8.9321e-05,  1.1841e-07,
         7.7604e-05, -2.4081e-05,  5.2125e-05,  6.8708e-05,  3.1782e-05,
         1.6851e-05, -5.0676e-05, -2.1418e-06,  7.4151e-05, -3.9998e-06,
        -9.3227e-06,  1.2633e-04,  1.7842e-05,  1.2357e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4018e-04,  9.1706e-02, -1.4694e-05,  7.3854e-05, -5.3662e-05,
        -2.7717e-05, -1.0229e-04, -1.7345e-04,  8.6779e-05, -1.3700e-05,
        -7.5874e-05, -2.4868e-05, -2.8351e-04,  6.4745e-05, -7.3166e-05,
         4.3937e-05, -6.7364e-05, -3.4762e-05, -1.9061e-04,  5.2159e-05,
        -2.2548e-05, -8.2967e-05, -1.6215e-04, -6.5130e-05, -1.2046e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3706e-03,  6.9786e-02, -4.9044e-05, -4.2158e-05,  6.2391e-05,
        -3.5736e-05, -6.2493e-05,  2.3715e-05, -1.7520e-05, -1.1896e-05,
         1.0627e-05, -9.0632e-05, -3.3246e-06, -2.8647e-05,  6.6997e-05,
        -1.2467e-05, -9.4031e-05,  2.3718e-05, -7.6260e-05,  8.5373e-06,
        -2.4445e-05, -7.9738e-05,  2.2392e-05,  2.2111e-05, -2.2758e-06,
        -1.4964e-04, -5.6388e-05, -5.9126e-05, -2.9021e-05,  1.1548e-05,
        -1.2907e-04, -3.7418e-05, -3.7099e-05,  4.1231e-05, -8.4900e-05,
         3.8263e-05, -1.2373e-04, -1.7030e-05, -2.6714e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.4854e-04,  5.2659e-02,  2.9296e-06, -3.2074e-05, -1.0595e-04,
        -1.4798e-05, -4.3958e-05,  1.1909e-04, -1.4275e-05, -2.1087e-05,
        -2.4159e-06,  6.0435e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4592e-04,  9.7792e-02,  4.9357e-05,  9.1614e-07, -1.1789e-04,
        -4.7931e-06, -1.9560e-04, -4.7561e-04, -2.3414e-04, -3.7189e-05,
         1.8059e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.3000e-04,  6.0895e-02, -6.6591e-05,  1.2378e-05,  1.1821e-04,
        -9.2297e-05, -5.5591e-05, -6.6542e-05,  7.0179e-05,  2.1770e-05,
         7.9670e-05, -1.9701e-04, -1.0120e-04, -1.2670e-04, -1.0929e-04,
        -1.7577e-04, -1.2794e-05,  1.3603e-05, -1.7785e-05,  4.9755e-05,
        -1.2851e-04, -8.8860e-05, -2.8992e-05, -1.5726e-05,  2.3475e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3973e-04, -8.5176e-02,  9.4265e-05, -1.6800e-04, -2.1541e-05,
        -3.3121e-05, -2.5814e-05, -1.5502e-04,  1.8599e-04, -9.4362e-05,
         4.7522e-05, -5.1986e-05,  1.0870e-04, -9.7832e-05,  9.0120e-05,
        -3.8247e-05,  1.9014e-05,  1.2757e-05,  2.2407e-04,  1.1149e-04,
         9.8222e-05, -1.0264e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3807e-04,  4.5109e-02,  8.4044e-05,  8.2801e-05,  6.6329e-05,
         8.3204e-06,  5.4687e-05,  4.1607e-05, -1.4493e-06,  2.3264e-05,
         8.7866e-05, -7.6233e-06,  1.2057e-05,  5.5801e-05, -1.3181e-05,
        -1.6896e-05,  7.4590e-05, -3.9389e-06,  3.8281e-05,  4.2862e-05,
         1.2530e-04, -1.4534e-05, -6.4297e-07,  2.1732e-05,  2.2887e-05,
         8.2053e-06,  7.7897e-05,  3.4509e-05,  5.7820e-05, -4.6532e-06,
        -4.0409e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 1.5064e-04, -4.6949e-02, -1.0277e-05, -9.4844e-06, -1.7094e-05,
         1.0644e-05, -1.7225e-05,  1.0488e-05, -1.1814e-05, -1.4434e-05,
        -5.6711e-06, -2.1389e-05, -1.7891e-05,  2.1902e-06,  1.1270e-05,
        -2.2506e-05, -1.5623e-05,  3.8607e-05,  2.1462e-05,  3.7302e-05,
         3.6110e-07,  2.2119e-05, -1.8138e-06,  1.6807e-05,  2.2620e-05,
         3.8614e-05, -1.1100e-06, -8.2569e-06,  1.3931e-05, -2.0223e-05,
         1.8893e-05, -2.1401e-05,  1.3246e-05,  4.0062e-05, -7.4852e-07,
         8.0385e-06, -1.4135e-05, -1.6022e-05,  8.9874e-06, -5.0621e-06,
         5.7518e-06,  6.1991e-07, -2.3649e-05, -9.7489e-06, -8.2156e-06,
         1.9411e-05,  9.4319e-06,  8.4295e-06, -1.8813e-05, -3.0559e-05,
         2.8837e-05, -1.4007e-05, -3.2712e-05,  2.9836e-06,  9.3953e-06,
        -2.0143e-05, -3.6813e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5077e-03,  4.9555e-02,  7.6060e-05,  2.4467e-04,  1.6814e-04,
         7.3598e-05,  1.5916e-04,  2.7917e-04, -2.7828e-06, -6.0953e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8734e-04,  4.1451e-02,  5.6477e-06,  7.9578e-05, -4.1147e-06,
        -9.2752e-05,  2.2751e-05,  2.9994e-05,  1.0787e-04,  1.5729e-04,
         4.8196e-05,  2.0782e-05,  8.1493e-05,  1.0471e-06,  2.7610e-05,
         4.1746e-05,  3.7785e-05,  1.6899e-05,  1.0691e-04,  1.3732e-04,
         4.8935e-05,  5.3282e-05,  7.8785e-05,  1.3296e-04, -4.8102e-06,
         3.7361e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6810e-05,  6.5799e-02, -1.1355e-05, -1.5681e-04, -1.7531e-04,
        -2.7039e-04, -8.2826e-05, -1.7392e-04, -3.0816e-04, -3.1141e-05,
        -2.9189e-04,  3.9396e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3609e-04,  5.3707e-02,  3.1749e-05, -5.3290e-05,  5.6717e-05,
        -8.5986e-05,  4.2670e-05, -4.6437e-05,  1.6917e-04,  1.0291e-05,
        -6.9174e-05,  7.9077e-05, -1.2080e-04,  2.3786e-06,  3.7051e-05,
         1.0525e-04,  1.3805e-05,  1.9225e-06, -5.5056e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3590e-04,  7.9490e-02,  2.3576e-05,  1.1215e-04, -6.1891e-06,
        -1.0555e-04,  1.0477e-04, -1.0441e-04, -3.1460e-05,  6.4938e-05,
         1.8106e-04, -1.5466e-05, -2.4351e-05,  9.1931e-05, -7.7749e-05,
         1.8405e-04, -5.2100e-05,  6.1740e-08, -6.6750e-05, -3.7288e-05,
        -4.2967e-05, -1.5869e-05, -1.4981e-04,  3.7951e-05, -4.3016e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1082e-04,  8.3758e-02,  5.5281e-05,  4.8221e-05, -2.0585e-04,
        -1.5260e-05,  9.2195e-05, -6.1775e-05, -1.9428e-04,  2.5597e-05,
         8.6055e-05, -2.6018e-04, -3.1068e-04, -6.9978e-05,  5.8113e-05,
        -1.3670e-04, -1.0573e-04, -1.2784e-04, -7.7953e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3468e-04, -3.4023e-02, -4.0213e-05, -1.2754e-05, -4.3691e-06,
         3.2217e-05, -1.6909e-05,  2.6285e-06, -1.9275e-05, -3.4128e-05,
         3.7145e-05, -2.6789e-05,  2.1884e-05, -7.0285e-06,  3.4794e-05,
         3.1352e-05, -1.1659e-06,  2.4394e-05,  3.0814e-05, -1.6398e-05,
        -1.6931e-05, -1.8046e-05,  4.1835e-05,  4.2995e-05,  1.3726e-05,
         1.5971e-05, -1.8402e-05,  2.4992e-06, -5.4504e-06,  4.0509e-05,
         5.6210e-05, -5.4404e-07,  2.0512e-05,  3.3527e-05,  3.6169e-05,
         4.0089e-06,  1.1247e-05, -2.8512e-06,  8.0738e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.1847e-04, -1.3006e-01, -8.3620e-05,  1.3568e-04,  3.2591e-05,
         3.4986e-04,  3.3196e-04, -1.5083e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0682e-04, -9.8939e-02,  5.2299e-05,  2.0280e-04,  1.3917e-04,
         1.2436e-04, -1.3529e-04,  1.6588e-04,  4.6744e-05,  1.5823e-04,
        -1.6462e-04,  1.3614e-04,  8.6151e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1647e-04, -4.6459e-02,  1.8217e-06,  7.9978e-05,  7.7131e-06,
         2.7719e-05, -7.1194e-05, -3.0059e-05, -1.7792e-06,  8.8320e-06,
        -4.2716e-06,  4.3761e-05, -1.5197e-05, -6.2769e-06,  2.1884e-06,
         2.8416e-05,  5.4068e-05, -3.9844e-05,  6.9225e-05,  8.6583e-05,
        -6.9554e-05,  1.1168e-04, -6.1343e-05,  2.2525e-05,  6.4194e-05,
         2.4703e-06,  3.0025e-05,  9.7736e-05, -1.9992e-05, -7.2865e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1797e-04,  6.5845e-02, -2.4376e-05, -4.3625e-06, -2.1405e-05,
         1.5364e-05,  8.6216e-05,  1.3505e-04, -1.3529e-04, -1.6424e-05,
         4.2855e-05,  5.5895e-05, -2.1686e-05, -9.3355e-06,  6.1674e-05,
         5.6997e-05,  5.9182e-05, -1.1129e-04,  4.6342e-05, -1.6372e-05,
         6.7406e-05, -9.6151e-05,  1.3316e-05,  4.3725e-05,  9.3040e-06,
         1.0198e-05,  1.4436e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-4.2187e-04, -1.0825e-01, -3.0019e-06,  7.2866e-05,  9.8579e-05,
         4.6629e-05, -3.4683e-05,  4.6931e-05, -1.1878e-04,  2.5721e-05,
         7.0488e-05, -4.4906e-05, -1.1098e-05, -1.0446e-04,  3.2054e-05,
        -1.1656e-05,  5.2560e-06,  1.4479e-04,  2.3648e-04, -3.9093e-05,
        -8.7476e-05,  2.1844e-05, -6.9314e-05, -1.0169e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.0586e-05, -1.1014e-01,  2.4302e-05,  3.1717e-05, -1.9120e-05,
        -7.6787e-05, -8.9844e-05, -8.0305e-06, -5.7807e-05, -2.5707e-05,
        -4.8102e-05,  3.5244e-05, -2.0891e-05,  1.0464e-04,  6.7810e-05,
         8.8180e-05, -1.9210e-05, -1.7290e-04,  3.1757e-05,  8.1365e-06,
         1.1602e-04,  9.6657e-05,  6.9892e-05,  7.2049e-05,  7.9118e-05,
         8.4487e-05,  1.0232e-04,  5.9625e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.8018e-04, -1.1166e-01, -1.6859e-04, -1.3755e-04, -7.8488e-05,
        -2.1839e-04, -3.8563e-05, -9.5554e-05, -1.3093e-04, -7.5676e-05,
        -1.1600e-04, -1.3941e-04,  2.7662e-05,  9.2583e-06, -4.1253e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3415e-05,  6.0634e-02,  1.0604e-04, -1.5043e-05,  3.8571e-08,
        -1.5852e-05, -4.3364e-05,  2.1978e-05,  4.3733e-05, -3.2232e-05,
        -1.6483e-05, -6.3494e-05,  2.6127e-05,  5.3850e-05, -9.4383e-06,
         3.1864e-05,  4.5922e-05, -1.8014e-05, -2.5884e-05,  2.2068e-05,
        -1.6436e-05, -3.3261e-05, -9.6841e-06,  6.2024e-06,  3.0184e-05,
         6.6764e-05,  2.5267e-05, -8.7556e-05,  8.8512e-05,  2.0025e-05,
         3.2395e-05, -9.5163e-06,  6.8571e-05,  4.8201e-05,  1.8930e-05,
        -4.8438e-05,  5.1986e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5554e-05,  1.2919e-01, -5.3893e-05, -1.0550e-04, -7.5306e-05,
         2.5356e-04, -8.0574e-05, -1.3553e-04, -5.7997e-05, -2.1492e-04,
        -2.0249e-04, -1.0930e-04, -1.4873e-05, -1.3479e-04, -1.8152e-04,
         9.4134e-05, -3.9803e-05, -1.3968e-04, -1.7342e-04,  2.2392e-05,
         1.2070e-04,  8.2550e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1265e-05,  6.7113e-02,  8.3769e-05,  1.7653e-06, -3.7219e-05,
         1.4465e-04, -4.2824e-05,  3.7380e-05, -2.9225e-04,  3.3255e-05,
        -1.6122e-04,  5.0291e-05, -1.8939e-05, -4.9335e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.8967e-05,  3.7599e-02,  7.2659e-05, -3.1510e-05,  1.1382e-05,
        -1.1423e-04,  3.4454e-05,  4.8183e-05,  4.7584e-05,  6.5101e-05,
         1.1294e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2256e-03,  8.5748e-02,  7.7375e-05,  1.1870e-04, -1.0581e-05,
         2.1193e-04,  8.2563e-05,  1.1761e-04, -4.7095e-05,  1.3593e-04,
         1.2790e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0494e-04,  7.8083e-02, -6.2778e-05,  7.7062e-05,  3.8666e-05,
         1.1070e-04,  9.1717e-05,  1.1911e-05, -6.1676e-05,  1.1444e-05,
         2.7304e-05, -1.2653e-06, -1.8346e-04,  2.5283e-05, -4.6231e-05,
         3.2492e-05,  1.6801e-05, -2.8575e-05, -1.9809e-05,  1.9946e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5018e-04, -7.8848e-02, -5.6638e-05,  8.2351e-05, -1.0317e-05,
         3.6934e-05,  4.8121e-05, -3.0237e-05, -1.4683e-05,  2.3433e-05,
         2.9469e-05,  7.5405e-05,  2.2564e-05,  1.4408e-04,  3.0973e-05,
        -3.4707e-06,  8.3746e-05,  3.6520e-05,  1.0494e-04, -4.0280e-06,
         2.7307e-05,  2.1951e-06,  3.4367e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5843e-04, -8.2671e-02, -5.8113e-06,  1.3623e-04,  1.0118e-05,
         2.1661e-05, -8.5597e-05,  2.6172e-05,  1.7935e-05,  7.8457e-07,
         3.5416e-06,  5.2726e-05,  1.1785e-06,  1.3778e-04,  1.1980e-05,
         1.3628e-06,  5.1918e-06, -5.5513e-05,  2.1662e-05,  2.4027e-05,
         1.4199e-05,  2.0926e-05, -3.3048e-05,  1.1459e-05, -3.8934e-05,
         6.9856e-06,  7.9779e-05,  9.8265e-06,  7.8533e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1011e-04, -3.3314e-02, -8.5326e-06, -2.5353e-05, -2.9937e-05,
         2.9098e-05, -1.3498e-05,  4.1778e-06,  2.5327e-06,  2.5634e-05,
         6.5495e-07, -4.2928e-05,  1.1091e-05,  6.8603e-07,  2.5576e-06,
         4.0827e-08,  1.6087e-05, -4.3141e-05,  1.1132e-05,  2.6094e-05,
         9.0542e-06,  1.9973e-05,  2.4452e-05,  3.5926e-05,  7.4615e-06,
        -1.0198e-05, -1.8163e-05,  1.6017e-05,  7.4231e-06,  3.6220e-06,
         4.2210e-05,  1.7259e-05, -2.1726e-05,  2.6108e-05,  3.1024e-05,
         2.9958e-05,  2.3831e-05,  2.0764e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 9.3929e-05,  1.1651e-01, -8.1146e-05,  2.1404e-05,  4.5956e-05,
         2.1914e-05,  6.9230e-05,  1.7295e-04, -1.2134e-04, -5.7739e-05,
         3.2952e-05,  3.3142e-05, -2.9794e-05, -7.9202e-05,  6.2094e-05,
         2.3898e-05,  6.1532e-05,  1.8744e-05,  9.5143e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7340e-04,  4.0735e-02,  1.5836e-04,  4.8984e-05,  8.8328e-05,
         3.8909e-06, -9.1676e-06,  1.4793e-05, -1.3085e-05, -8.8722e-06,
         5.1450e-05,  5.1923e-05,  1.3826e-05,  7.3662e-05,  8.4311e-06,
         2.2829e-06,  3.5447e-05,  1.4535e-04, -4.8715e-05, -2.7014e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3726e-03,  6.2303e-02,  8.8157e-05, -1.3581e-04,  3.8779e-06,
         3.7696e-05, -4.7088e-05, -4.9451e-05,  1.0414e-04,  4.0442e-05,
        -7.8419e-07,  1.1125e-04, -2.3737e-05, -1.6058e-05, -1.3552e-04,
        -2.0800e-05,  4.9622e-05, -9.1186e-05, -1.0735e-04, -6.2057e-05,
        -2.3181e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.6368e-04,  7.4811e-02, -6.7318e-05, -4.9157e-05, -4.3159e-05,
        -4.2199e-05,  9.2326e-05, -7.8039e-05, -7.1200e-06,  1.9922e-05,
         2.3241e-05, -1.4941e-06, -1.9650e-04,  5.8468e-05, -7.0640e-06,
        -1.4763e-04, -6.5676e-05,  1.5838e-04,  5.8205e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2208e-04,  7.8952e-02, -4.7831e-05, -6.8350e-06, -2.7627e-05,
         3.0694e-05, -3.8970e-05,  1.1952e-05, -1.4037e-06,  3.5315e-06,
        -8.6710e-05, -3.9134e-05, -2.6203e-05,  3.4425e-05, -1.8675e-05,
        -1.2897e-04,  4.9971e-05,  3.0010e-05, -1.2687e-04, -1.3925e-05,
        -7.3181e-06,  9.9536e-06,  4.3122e-05,  4.0853e-05, -3.6581e-05,
         2.2447e-05, -1.0153e-05, -8.6378e-05, -9.5430e-05, -9.1093e-06,
         1.8060e-05, -4.7796e-06, -5.4383e-05,  5.8186e-06,  4.3833e-05,
        -4.5676e-05,  1.3333e-05, -5.9352e-05, -2.0200e-05, -2.0463e-05,
        -4.3766e-05, -4.0988e-05, -1.0314e-05, -3.3201e-05,  2.5064e-05,
        -2.2744e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4941e-04,  1.3152e-01, -4.7571e-05,  4.9118e-05, -7.4808e-05,
         7.5893e-05, -1.0210e-04, -5.9706e-05,  1.0539e-04, -1.8748e-05,
         8.4468e-05,  1.3705e-04, -4.8760e-05, -2.0664e-04,  6.2970e-05,
         8.5569e-05,  6.3655e-07, -1.2288e-04, -8.7057e-05,  4.8419e-05,
        -4.5191e-05,  8.9053e-06, -1.2572e-04,  2.0141e-04,  5.4114e-06,
        -1.1920e-04, -8.2254e-06,  2.1810e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1000e-05, -8.6186e-02, -7.4375e-05, -6.4940e-06,  1.6835e-05,
         1.6301e-05, -1.1058e-05,  8.7752e-05,  3.0357e-05,  8.1583e-05,
         6.2023e-05,  8.5652e-05, -3.4766e-05,  1.1774e-04,  9.5058e-05,
         3.1444e-05,  9.4130e-05,  3.4888e-05,  6.0626e-05,  1.0311e-04,
         1.4083e-05,  1.0420e-04,  2.9288e-05,  4.1590e-05,  1.1819e-04,
         5.7590e-05,  1.5498e-04, -2.6686e-05, -6.0898e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6557e-04,  6.4178e-02, -3.2272e-05,  7.3906e-06, -2.4836e-05,
        -3.6141e-05,  8.0308e-05,  5.0460e-05,  6.4932e-05,  2.2168e-05,
        -4.7263e-05, -1.6524e-05,  1.6762e-05, -1.4697e-05, -6.9733e-06,
        -2.3907e-05, -8.0040e-06,  6.5726e-06,  4.7127e-06, -4.9832e-05,
         6.4605e-05,  2.3594e-05,  3.3612e-05, -5.4658e-05, -2.8116e-05,
         8.5912e-05, -1.3933e-05,  1.3161e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8741e-04, -7.7300e-02, -2.6378e-05,  4.1790e-05, -1.4004e-05,
        -1.3229e-05,  2.3688e-05, -1.5411e-06,  1.0862e-05,  1.2803e-05,
        -4.0950e-05, -1.9442e-05,  2.0487e-05,  2.4707e-05,  4.9367e-09,
         7.6827e-06, -7.4114e-06,  3.8358e-06,  3.1390e-05, -6.5234e-06,
        -4.4527e-05, -3.1651e-05,  1.7937e-05,  1.9147e-05,  1.2927e-05,
         2.8523e-05,  5.0208e-05, -3.2176e-06,  3.9482e-06, -3.8099e-05,
         1.9527e-05,  1.1872e-05, -1.0646e-05, -3.7071e-05, -2.1019e-05,
         2.0657e-05,  4.2004e-07,  6.4018e-06, -1.3004e-05,  1.0777e-05,
         2.8834e-05, -1.8245e-05,  4.6270e-07,  3.5950e-05, -5.0102e-05,
        -2.9216e-05, -1.4094e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7054e-04, -7.9067e-02, -5.7117e-05, -4.0131e-05, -1.3505e-05,
        -3.2356e-05, -3.1615e-05, -3.2093e-05, -3.2199e-05, -2.4961e-05,
        -4.2406e-05, -5.7426e-05, -1.9554e-05, -1.3449e-06, -1.7902e-05,
         6.1857e-06, -3.4500e-05, -2.3331e-05, -1.1446e-05, -6.6785e-05,
         1.3572e-06, -2.6001e-05, -8.4233e-06,  7.6055e-06, -7.6886e-05,
        -3.8254e-05, -5.0083e-05, -7.0780e-06, -7.7258e-05, -1.0687e-05,
         1.0898e-06, -3.6847e-05, -2.2118e-06, -4.1648e-05,  2.4496e-05,
        -1.0483e-05,  3.9743e-05,  1.8803e-05, -1.9670e-05, -4.0636e-06,
        -7.7136e-05, -8.8398e-05,  1.8963e-05, -7.1570e-05, -8.3025e-05,
        -2.7035e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0413e-04, -6.6474e-02,  2.8767e-05, -6.0072e-06, -4.3351e-06,
        -1.1132e-05,  3.6950e-05, -4.1128e-05, -1.7694e-05,  5.2754e-06,
         8.8240e-05,  2.1372e-06, -3.5613e-05,  5.9322e-05,  9.4938e-06,
        -2.1263e-05, -2.4570e-05,  1.7397e-05, -6.9774e-05, -1.3040e-04,
        -2.9414e-05, -4.6566e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2934e-04,  1.0651e-01,  3.9383e-05,  5.3027e-06, -1.3704e-05,
        -8.8998e-05,  1.0106e-04, -9.3375e-05, -3.8413e-05, -1.6557e-05,
        -9.3688e-05, -1.6874e-05, -9.8262e-05, -3.3226e-05,  4.9100e-05,
        -8.3829e-05, -4.9150e-05,  1.5644e-06, -2.9766e-05,  3.9830e-05,
        -7.9305e-05,  3.3373e-05,  2.3579e-05,  8.4139e-05, -8.4402e-05,
        -3.2265e-05, -9.9602e-05, -8.2975e-05,  1.4030e-04,  9.7474e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([-7.1877e-04, -9.2417e-02,  1.2168e-04, -1.0639e-04, -1.1583e-05,
        -9.3956e-05,  5.2549e-05,  6.8450e-05,  4.9245e-05,  1.5951e-04,
        -1.4186e-04,  4.0907e-05,  1.8328e-04, -5.1642e-05, -1.1450e-04,
         8.3954e-05, -2.4928e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8401e-04, -1.2140e-01,  2.0898e-05,  1.9149e-04, -3.5587e-05,
        -8.9683e-06,  1.3643e-04,  3.3349e-04,  3.6479e-04,  6.5694e-05,
         1.4125e-04,  1.6542e-04,  2.0279e-04,  2.1422e-04,  8.8011e-05,
         1.4497e-04, -6.2400e-05,  1.4129e-04,  3.6443e-04,  1.2009e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2281e-04,  8.4446e-02, -7.7817e-05, -4.7625e-05,  7.4476e-05,
         1.0890e-04, -6.2388e-05,  7.1025e-05,  7.8091e-05,  1.5849e-04,
         1.7157e-04,  2.1308e-04, -5.9827e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6664e-04,  1.2193e-01, -2.0827e-04,  3.3683e-05,  1.5209e-04,
        -1.6911e-04,  8.4078e-07,  1.8864e-04,  3.7943e-05, -1.4765e-04,
        -3.6327e-05, -1.0851e-04, -1.5509e-04, -3.6351e-05,  3.8534e-05,
        -2.1278e-04,  2.3483e-05,  2.7959e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2605e-05,  1.1819e-01, -1.3741e-06, -2.0801e-06, -1.2261e-04,
         3.7635e-05, -6.0149e-06, -3.7089e-05,  1.1676e-04,  5.9112e-05,
        -2.8535e-05,  2.5684e-05, -7.4855e-05,  9.0077e-05, -8.7771e-06,
         7.3241e-05, -1.4260e-04,  5.3172e-05,  2.4433e-05,  1.5400e-05,
        -6.3684e-05, -7.2388e-05,  1.6721e-05,  3.2532e-05, -1.6899e-05,
        -5.2271e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9329e-04,  1.0445e-01, -3.8201e-05,  2.5764e-05,  8.0718e-05,
         2.5896e-04,  8.8486e-05, -2.7945e-04,  1.2518e-04,  1.4143e-04,
         3.7325e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2629e-05,  8.9524e-02, -3.9442e-04,  5.7277e-05, -1.4810e-04,
        -5.4156e-05, -5.1501e-05, -7.9680e-07, -2.5548e-05, -1.2640e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.5810e-04, -3.9063e-02,  1.2011e-05, -1.4797e-05, -2.6038e-05,
         1.4451e-05,  9.6027e-06,  6.8814e-06,  1.4309e-05, -7.1481e-06,
        -2.1045e-05,  3.4654e-06, -2.9334e-06, -2.2386e-07, -3.0989e-05,
         5.0798e-06, -2.1537e-05, -2.1862e-06, -1.3477e-06,  4.3383e-05,
         4.5128e-06, -1.0269e-06, -2.0945e-05,  1.5373e-06, -1.8530e-06,
         1.5120e-06,  5.6276e-05,  2.3565e-05,  8.7148e-06, -5.2998e-07,
        -1.8484e-05,  1.5668e-05,  2.2300e-05, -3.3448e-05,  2.6797e-05,
         4.2849e-05,  2.7258e-05, -8.9199e-06,  1.9148e-05, -8.2827e-06,
         1.6328e-05, -1.8561e-05, -1.2364e-05,  8.5023e-06,  5.9731e-06,
        -5.9919e-07, -3.4130e-06, -1.1003e-05, -1.9825e-05, -1.0513e-05,
        -6.9981e-06,  2.7010e-06, -1.4881e-05,  5.3993e-05, -2.1423e-05,
        -2.9887e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2598e-04,  1.1733e-01, -9.9819e-06, -1.0173e-04, -9.8811e-05,
        -2.1478e-04, -1.1928e-05, -9.2030e-05,  1.1826e-04,  3.2757e-05,
        -5.5843e-05, -1.5056e-04, -3.4631e-05, -1.6902e-04,  1.0233e-04,
        -7.8958e-05,  5.2267e-05, -9.4652e-05, -8.8773e-05, -7.8136e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2106e-04,  7.3610e-02,  1.2455e-04, -5.3140e-06,  2.4258e-05,
         2.1485e-05,  4.9961e-06, -2.6496e-05,  3.7808e-05, -2.1761e-06,
         1.3372e-05, -6.8223e-05, -3.2047e-05,  1.3329e-05, -3.2951e-05,
        -1.3831e-05,  6.0096e-05,  6.2565e-05, -2.3680e-05,  4.0021e-05,
         1.1029e-05,  8.5914e-05,  1.3399e-05, -3.0079e-05, -1.6102e-05,
        -1.3319e-05, -5.8235e-05,  6.4562e-05, -2.5336e-05,  6.5123e-06,
         1.0101e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7812e-04,  3.2522e-02, -1.4248e-04, -2.1646e-05, -1.0797e-04,
         6.1781e-05, -6.7765e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8037e-04,  2.8544e-02, -6.6829e-05,  1.0517e-04, -8.0160e-05,
        -2.2119e-05, -5.6204e-05, -5.1465e-05, -6.4793e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([-9.9549e-04, -1.0077e-01, -1.1787e-04, -1.0516e-05, -9.1519e-05,
         1.6260e-05,  2.0778e-05, -1.8407e-04,  3.9192e-06, -2.7082e-05,
         3.0817e-05, -2.8937e-05, -8.5040e-05, -1.8326e-05, -1.5157e-05,
        -2.0905e-05,  7.9965e-07,  2.1444e-05,  1.2542e-04, -3.9733e-05,
         3.6593e-05,  2.0468e-05, -2.9219e-05, -1.1470e-05, -8.1811e-05,
        -1.0329e-04,  2.2277e-05,  1.5968e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5050e-04, -4.8865e-02, -5.6472e-06,  2.8375e-05, -2.2068e-05,
        -4.0716e-05,  3.9121e-05,  2.1374e-05, -1.1464e-05, -1.0139e-06,
         3.5914e-05, -4.0054e-05,  8.5050e-06, -1.6004e-05, -2.9151e-05,
        -1.3500e-05,  2.9789e-05, -9.1642e-05,  2.5791e-05,  4.4193e-05,
        -2.6430e-05, -2.9804e-05,  5.3031e-06, -2.0298e-05, -5.9211e-05,
         1.0340e-05,  5.4261e-05, -5.3077e-05, -3.3723e-05, -1.4497e-05,
         5.6776e-05,  6.4672e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1860e-05,  1.6787e-01,  3.8115e-05, -3.1864e-05, -2.1097e-05,
        -1.0545e-04, -9.8889e-05, -5.7254e-05,  1.2186e-04, -2.2857e-04,
        -2.2904e-05,  1.6138e-05,  1.6749e-04, -8.8093e-05, -6.2191e-05,
        -6.3150e-05, -1.4764e-04, -4.2345e-05,  4.5734e-05,  9.9586e-05,
        -6.0598e-05,  1.0090e-05, -7.9589e-05,  3.1723e-05,  6.7678e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4252e-04,  7.9438e-02,  1.1718e-04,  2.0534e-05, -7.7125e-09,
        -8.3466e-05,  1.5368e-05,  9.2833e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5714e-07,  3.2914e-02,  1.2319e-04, -3.7122e-05, -4.8866e-08,
         8.1874e-05, -6.2052e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3875e-04, -1.6332e-01,  1.4038e-04,  1.2781e-04, -6.2219e-05,
        -1.4905e-04, -8.1642e-05, -8.1329e-05, -1.1326e-04, -2.2124e-05,
        -1.2876e-05, -4.1975e-05, -1.9394e-04,  3.9405e-05, -3.3307e-04,
         4.5073e-05,  1.2670e-04, -1.0518e-04,  3.9322e-05,  1.1455e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4520e-04,  9.9934e-02,  7.9672e-05, -1.0805e-04, -1.3080e-04,
         8.0314e-06, -2.1136e-04,  1.2918e-04, -1.6767e-05,  2.0413e-05,
         4.3497e-05, -1.7584e-04, -1.0153e-04, -2.1286e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9567e-04,  2.2920e-02,  1.3886e-04,  1.7015e-04,  5.8367e-05,
         2.1691e-04,  1.1493e-04,  2.2447e-04,  1.5223e-05, -3.5597e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8272e-06,  9.1313e-02,  9.8960e-05,  2.4687e-05,  6.4690e-06,
        -5.8532e-05, -4.6610e-05,  8.0222e-05,  1.9494e-06,  3.5869e-05,
         9.9315e-05,  3.1576e-05,  1.6124e-04, -7.2080e-05,  1.0913e-04,
         2.0571e-04, -4.3581e-05,  6.1955e-05,  7.4815e-05, -4.8629e-05,
         1.0392e-07, -3.2804e-06,  6.9562e-06,  1.3221e-04,  2.1065e-05,
         2.8068e-05,  1.2379e-04,  1.1001e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.2833e-04,  1.4561e-01, -2.8437e-05,  1.0452e-05,  1.3604e-04,
        -4.1097e-05, -1.8336e-04, -2.6084e-05, -1.3194e-04,  2.5212e-05,
        -1.1371e-04, -1.0880e-04,  2.6478e-05, -1.8420e-04,  3.9847e-06,
         1.3795e-04, -5.7072e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2077e-04,  6.6395e-02, -4.2261e-05, -2.2999e-05,  2.4408e-05,
         5.3158e-05, -4.1523e-07, -6.3454e-05, -3.3260e-05, -1.5307e-04,
        -2.6359e-05,  8.5138e-05, -3.1369e-05, -3.5472e-06, -8.2654e-05,
        -7.1691e-05,  2.5286e-06, -3.2694e-05, -6.1211e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0193e-04,  7.4788e-02,  1.2726e-04,  3.0300e-05, -2.5923e-05,
         7.2640e-05,  3.8613e-05,  5.3643e-05, -1.7734e-05, -4.2167e-05,
        -9.4241e-05, -4.2730e-05, -1.7508e-05, -2.2017e-05,  3.3435e-05,
         8.0042e-06, -4.7687e-05, -4.4955e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-2.0633e-05, -7.1161e-02, -1.0637e-04, -1.9827e-05, -4.6900e-05,
        -1.3647e-05, -5.7248e-05, -1.6853e-05, -6.3764e-05, -4.4968e-05,
        -9.5829e-05, -5.3448e-05, -4.3757e-05, -8.7512e-05,  6.8251e-05,
        -1.5551e-05,  4.1191e-06, -5.3857e-05, -3.2748e-05,  4.9037e-05,
        -1.0119e-04,  1.9275e-06, -1.1155e-05, -6.0503e-05,  2.9936e-05,
         1.7586e-05,  3.6112e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8828e-05,  9.4661e-02, -1.1521e-05, -9.0021e-05, -5.9898e-05,
        -4.1861e-05,  4.6046e-05,  1.9712e-05, -3.1966e-05, -1.0343e-05,
        -3.8644e-05, -3.8457e-06, -7.3847e-05, -2.9029e-05, -4.6917e-05,
        -2.7928e-05,  6.3080e-05, -9.4036e-06, -6.7671e-05,  9.8069e-06,
        -9.1393e-06,  5.5921e-06, -2.3937e-05, -5.7553e-05,  3.7558e-06,
        -2.8864e-05,  4.3754e-05, -5.8847e-05,  1.2167e-05, -2.0544e-05,
        -1.5789e-05, -3.3244e-05, -5.9269e-05, -6.1260e-05, -1.4570e-05,
        -8.8890e-06,  5.3671e-05, -5.6212e-05, -9.3514e-07,  4.0024e-05,
        -9.3805e-05, -3.4431e-05, -4.0413e-05, -6.9295e-05, -1.5411e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8520e-04,  1.3091e-01, -8.7651e-05,  4.8557e-05, -3.7490e-05,
         9.3844e-07, -1.1169e-04,  5.5495e-05, -4.5807e-06, -7.0932e-05,
        -6.5474e-05,  5.1271e-05,  9.6111e-05, -1.9414e-04, -4.0345e-05,
         3.2540e-06, -4.4977e-05, -1.2589e-04, -7.6133e-06, -9.7887e-05,
        -5.5905e-05, -4.8459e-05, -1.2235e-04, -1.2429e-04, -1.1402e-04,
        -2.0367e-05, -4.3799e-05, -9.5547e-05, -3.6990e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6184e-05, -1.6135e-01,  2.9220e-06,  1.9644e-04, -1.8424e-04,
         1.9098e-04,  1.7274e-04, -1.6044e-04, -9.2343e-05,  2.1713e-04,
         1.1218e-05,  6.2588e-06, -2.1394e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0221e-04, -6.2038e-02, -3.0913e-06,  1.8411e-05, -1.9573e-05,
         9.1486e-06, -4.1259e-05,  2.5952e-05, -1.1258e-06,  9.4880e-06,
         9.5852e-07,  2.0311e-05,  6.5432e-07,  1.3372e-05,  2.0127e-05,
        -4.1935e-05,  9.0185e-05, -1.2412e-05,  2.4003e-05,  1.8413e-05,
         7.1245e-06,  1.8999e-05, -3.5970e-05, -2.4881e-05, -5.6888e-06,
         2.3193e-05,  3.7815e-05,  2.8376e-05, -1.7307e-05,  1.6100e-05,
         5.0641e-05, -2.5977e-05,  4.5158e-05, -3.7463e-05, -2.3716e-05,
         2.2170e-05, -8.7805e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1551e-04, -6.5521e-02, -1.0171e-05,  1.2173e-05,  2.7931e-05,
        -6.7485e-05,  1.1736e-05, -1.9406e-05, -1.5579e-05,  3.5188e-05,
        -1.0122e-05,  1.4714e-05,  2.7060e-06,  5.0128e-05,  4.1064e-05,
         7.3901e-05,  2.3775e-05,  3.8411e-05, -3.7672e-05,  3.1531e-05,
        -2.1590e-05, -1.0960e-05,  4.0768e-05,  6.8170e-05,  5.3132e-06,
         2.2939e-05,  7.6594e-05,  1.5547e-05, -3.9591e-06, -5.1227e-05,
         3.6847e-05,  3.6339e-05, -3.7324e-05,  1.9858e-05, -2.6732e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6646e-04,  3.6575e-02,  4.6189e-05,  1.2156e-05,  4.1872e-05,
         7.4334e-05,  3.3672e-06,  5.6100e-05,  3.8398e-05,  3.3793e-05,
        -2.8936e-05, -1.1911e-05,  9.0686e-06,  5.0147e-05,  9.1629e-06,
        -2.6436e-05, -1.2138e-05,  1.5494e-04, -1.3128e-05, -2.2152e-05,
         1.2612e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2069e-04, -1.1430e-01, -9.4251e-05, -3.1571e-05, -2.2069e-05,
         2.5808e-05,  1.6632e-05, -1.4359e-05, -2.8008e-05,  1.4659e-05,
        -1.0200e-04, -9.5702e-05,  1.6773e-05, -1.0860e-04, -3.0066e-08,
        -1.2269e-04,  9.0359e-06, -9.1589e-05, -7.9030e-05, -1.8102e-05,
         1.2889e-04,  3.3889e-06,  3.4678e-05,  4.6710e-05, -2.9622e-06,
        -9.9188e-05,  4.5004e-06,  2.4735e-05, -3.9859e-05,  7.0807e-05,
        -3.2151e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2968e-05, -5.2538e-02, -1.7038e-05,  1.7258e-06,  2.0630e-05,
        -1.0347e-05, -1.5918e-05, -8.7324e-06, -3.4417e-05, -4.2724e-05,
        -6.4895e-06, -2.8301e-05, -2.8689e-05, -7.2688e-06, -3.1079e-05,
         4.6152e-05,  2.0352e-05,  1.1082e-05, -1.1068e-05, -8.0593e-06,
        -9.3082e-06, -1.0401e-05, -2.6412e-05,  1.2200e-05, -3.2970e-05,
        -2.6853e-06,  9.5639e-06, -1.7590e-05, -9.0417e-06,  1.6386e-05,
        -1.0182e-05, -1.8569e-05, -8.0975e-06, -7.2993e-06, -1.3110e-05,
        -1.8382e-05,  1.5698e-05, -3.0112e-05, -1.4502e-05, -2.4044e-05,
        -3.2457e-05, -6.3122e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5285e-05,  5.7071e-02,  4.0539e-05,  2.2253e-05,  3.3457e-05,
        -8.3618e-05,  8.3780e-06,  2.8030e-05, -1.3980e-05, -1.1203e-04,
        -6.0046e-05, -7.1820e-05, -1.0805e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4688e-04, -8.3262e-02, -4.1310e-05,  1.6581e-05,  2.1406e-05,
         5.1802e-05,  1.1478e-06, -1.7113e-05, -1.4073e-05, -2.1045e-05,
        -2.6311e-05, -4.9534e-06, -9.8979e-06, -1.0976e-05,  2.6975e-05,
        -6.6083e-06,  3.9718e-06, -8.2557e-05,  2.9376e-05,  5.1321e-05,
        -5.3167e-05,  2.1754e-05, -7.0667e-05,  2.4618e-05, -1.8245e-05,
         5.8868e-05, -3.2670e-05, -1.4439e-05, -3.7210e-05,  3.0780e-06,
        -2.0355e-05, -2.4411e-05, -6.9010e-05,  1.3721e-05, -1.1498e-05,
         7.8096e-05, -1.9077e-05,  2.8350e-05,  5.6971e-06, -7.8434e-05,
         8.7923e-06,  4.8643e-05,  4.0572e-05,  6.4404e-06,  5.1739e-05,
        -4.1812e-05, -4.5336e-05,  6.5887e-07,  2.3471e-05,  9.0890e-05,
         3.8247e-05,  2.6515e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5056e-04,  5.9574e-02, -4.3304e-05, -1.8907e-05, -7.6883e-05,
        -6.1340e-05,  2.9351e-05, -4.9924e-05, -5.0683e-05, -1.0322e-05,
        -9.2789e-05, -4.8141e-05, -4.8064e-05, -1.1187e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([-2.9335e-04, -8.7770e-02,  3.7406e-06, -4.3353e-05,  1.2872e-05,
        -4.6766e-05,  8.1256e-06, -3.4951e-05,  3.9678e-05, -3.5285e-06,
        -7.2885e-05, -4.8673e-06, -2.7587e-05, -1.3357e-05,  3.0776e-05,
        -3.9981e-05,  2.5994e-05, -3.7527e-05,  1.1824e-05,  2.8950e-05,
        -5.4099e-06, -1.9325e-05, -2.0096e-05,  6.6434e-06, -2.8933e-05,
        -2.7735e-05,  3.5477e-06,  2.8851e-05, -1.6827e-05, -4.7590e-05,
        -1.0291e-04,  2.5363e-05, -5.1208e-05, -3.4079e-05, -2.4235e-05,
        -6.7055e-06,  4.0107e-06, -2.6762e-05, -4.0067e-05, -1.4655e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2070e-05, -5.0297e-02,  1.5445e-06,  7.8105e-06, -5.5497e-06,
         4.9012e-05,  9.2735e-06, -2.6569e-05,  2.0666e-06,  1.7606e-05,
        -9.9593e-06, -6.4360e-06,  6.1758e-06,  3.5869e-06,  7.8798e-06,
         2.8686e-05, -3.3368e-05,  1.3961e-05,  6.6412e-06,  1.5881e-05,
        -2.3302e-05, -1.3013e-05,  1.3649e-06, -2.0261e-05, -2.3508e-06,
         9.0289e-06, -7.7735e-06, -5.9315e-08, -2.4592e-05, -8.0346e-06,
         2.9909e-05, -4.9004e-06,  2.3109e-06,  1.5001e-05, -4.5396e-06,
        -1.4657e-05, -7.1844e-06, -4.5918e-06, -3.1776e-06,  1.1415e-05,
        -5.0037e-06, -1.8389e-05, -4.8741e-07, -1.9150e-05,  7.6013e-06,
        -3.2861e-05,  4.0136e-06,  7.0544e-06, -8.9220e-06,  7.6732e-07,
         4.5260e-07, -9.4169e-06, -1.0234e-05,  1.9537e-05, -3.8091e-05,
         1.5257e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0897e-05, -8.8819e-02,  5.6942e-05,  5.2172e-07,  2.9158e-05,
         1.8688e-05,  1.9561e-06,  5.3021e-05, -3.9040e-05,  2.4224e-05,
        -4.5731e-06, -1.6240e-05,  3.0106e-07,  8.7806e-06, -2.2985e-05,
        -2.9428e-05,  1.8397e-05,  2.1203e-06,  2.7588e-05, -7.6653e-05,
        -3.4960e-05,  5.9677e-05, -2.2087e-06, -2.8595e-05,  4.4928e-05,
        -2.5374e-05, -4.0054e-05,  4.1996e-06,  2.8248e-05,  6.7599e-06,
         1.2097e-05,  4.1130e-06, -1.4571e-05,  2.1536e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6447e-04,  1.1608e-01,  4.6318e-05, -2.0890e-04,  5.5172e-05,
        -1.0045e-04, -2.0989e-05,  7.7976e-05,  3.2569e-05, -3.6275e-04,
         4.3717e-05,  6.2267e-06,  1.1092e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9393e-04,  7.5771e-02, -2.5789e-05,  3.3675e-08, -7.6469e-05,
         1.3246e-05,  3.5761e-05, -1.2899e-05, -9.4729e-05, -1.9697e-05,
         4.1956e-06,  1.0174e-04,  4.5416e-05, -2.5853e-05, -1.0255e-05,
        -1.0873e-05, -6.7445e-06,  7.2638e-05, -1.9108e-05,  4.8260e-05,
         6.4447e-05,  5.4171e-05, -2.0998e-05, -5.1429e-05,  8.8375e-05,
        -1.1152e-04,  9.0977e-06, -8.4030e-05, -3.0400e-05, -3.2387e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0284e-04, -1.1324e-01, -5.5731e-05, -4.2680e-05,  8.7844e-05,
         3.3642e-05,  6.3593e-05, -8.2407e-06, -1.7721e-04, -8.9644e-06,
        -6.0527e-05,  6.7680e-06, -9.9169e-05, -1.1942e-04, -6.5027e-05,
        -1.9138e-05,  1.0533e-04, -6.9603e-05,  2.4595e-05,  1.7798e-04,
         2.5344e-05,  1.5513e-05, -1.0578e-04, -1.1371e-04, -1.0571e-04,
        -5.3025e-05, -1.4365e-04, -1.3359e-04, -1.7631e-05, -1.6939e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0278e-04,  6.3939e-02, -2.6306e-05,  6.5769e-06,  3.9838e-05,
        -2.3395e-05, -3.1571e-06,  2.9721e-05, -1.6751e-06,  4.5699e-05,
        -4.8367e-05,  6.1614e-05,  5.5961e-05,  3.3362e-05,  3.4659e-05,
        -6.8612e-06,  6.0932e-05, -3.7910e-05, -7.2600e-07, -4.5612e-05,
         5.3664e-05, -4.9886e-05, -2.3915e-06, -6.0701e-06,  6.1891e-06,
        -7.6285e-06,  2.6856e-05, -3.3721e-05, -5.3226e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4424e-04, -1.6802e-01,  1.0814e-04, -2.8591e-05,  3.1691e-04,
         7.8862e-05,  3.4069e-05, -2.5063e-05, -2.0021e-04, -5.5230e-04,
         1.7034e-04,  1.0054e-04,  1.6670e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1181e-04, -1.6530e-01, -1.2038e-04,  5.6188e-06, -1.0002e-04,
         9.4294e-05, -3.7665e-06,  1.5983e-04,  6.3005e-05, -1.3221e-04,
        -1.0121e-04,  1.5504e-04, -7.5902e-06,  8.8163e-05,  6.1100e-05,
         1.4980e-04, -1.5038e-04,  1.2004e-04, -1.5291e-05, -9.5528e-06,
         2.0728e-04,  1.7026e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5577e-04, -1.6468e-01,  7.4476e-05,  1.8516e-05,  1.0001e-05,
        -2.0404e-05, -3.9003e-05, -5.2617e-05,  6.9597e-05,  2.3390e-06,
        -8.2273e-05,  4.8262e-05,  8.5725e-05,  9.1176e-05, -4.2961e-05,
         7.3487e-05,  5.3982e-05,  2.2023e-05, -2.9788e-05,  8.7429e-05,
        -4.1889e-05,  1.2601e-04, -8.5727e-05,  2.2851e-05,  1.0835e-04,
         4.6257e-06,  2.9890e-05, -2.0653e-06,  2.7907e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4806e-04, -1.3494e-01,  1.0472e-04,  5.7436e-05, -3.0469e-05,
         4.4392e-05, -2.0967e-05,  4.4019e-05,  4.1166e-05,  7.5329e-05,
         6.7620e-05,  9.1138e-05,  4.8880e-05,  4.5079e-05,  6.3574e-05,
         1.9803e-04, -2.9743e-05, -6.4177e-05,  6.0163e-05,  1.0552e-04,
         1.0368e-04,  3.7321e-05, -3.3991e-05, -1.2829e-05, -1.3744e-05,
         6.3802e-05, -3.0055e-05,  8.6903e-05,  5.9306e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6761e-04,  4.1331e-02, -9.3349e-05, -6.9013e-06,  4.0906e-05,
         4.3899e-05,  3.1022e-05, -1.5136e-05, -9.4665e-05, -1.8506e-05,
         4.5612e-06,  9.1960e-06,  3.7783e-05, -2.3018e-06,  5.9295e-07,
         2.0202e-05,  1.2955e-04,  3.0581e-06,  5.4654e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 1.0131e-04, -1.3899e-01,  5.4186e-05,  5.7538e-05,  4.1044e-05,
        -2.9530e-05,  6.2512e-05,  4.6866e-06,  1.0194e-05,  5.7903e-05,
         2.8375e-05,  1.4086e-05, -2.3707e-05,  5.2475e-05, -5.7067e-05,
         4.0694e-05,  7.4228e-05,  3.0267e-05,  1.2584e-05,  4.0144e-05,
        -3.7453e-05,  1.1409e-06,  5.9403e-05, -1.7645e-05, -4.7357e-05,
         8.3714e-05, -6.5184e-05,  1.0188e-05,  4.8676e-05,  4.5562e-06,
         3.8427e-05,  2.4012e-05, -6.1458e-06, -1.0081e-05,  1.7489e-05,
         9.6631e-05,  1.1225e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.2348e-04, -8.2835e-02, -3.6084e-05,  1.6547e-05,  5.7985e-06,
         2.5621e-05, -1.3450e-05, -2.6390e-05, -1.0906e-04, -2.8087e-05,
        -1.0928e-04,  7.1311e-05,  5.3933e-06, -4.5681e-05,  1.8518e-05,
        -7.3205e-05, -5.6342e-05, -9.1213e-05,  1.4926e-04, -7.5128e-05,
        -5.5803e-06,  6.1239e-05, -5.6057e-05,  3.1249e-05,  2.1143e-05,
         5.5187e-05, -7.2189e-05,  3.2424e-05, -1.1471e-04, -5.7274e-05,
        -9.2696e-05, -4.8184e-05, -2.2324e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4130e-05, -1.5820e-01,  3.3728e-05,  4.5259e-05,  1.3727e-04,
         1.9060e-04, -8.6245e-05, -9.5180e-05,  1.1637e-04,  3.2230e-04,
        -2.9776e-05,  1.9606e-04,  3.5498e-05, -2.8004e-05,  8.3284e-05,
         1.2051e-04,  3.6797e-05,  5.8873e-06,  1.2251e-04, -4.6452e-05,
         1.8244e-04,  1.1103e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2369e-04, -1.1922e-01, -2.9233e-05, -2.7128e-05,  3.2138e-06,
        -1.9422e-05,  1.0777e-04,  3.1896e-06, -3.2614e-05, -1.9156e-05,
         9.8005e-06,  9.5285e-05,  1.8905e-06,  8.8269e-05,  4.0945e-05,
         1.1913e-04, -3.0505e-05, -2.8987e-05, -8.2910e-05, -3.2685e-05,
         2.5863e-05, -2.8354e-05,  2.6171e-05,  9.7114e-06, -7.0679e-05,
         4.2964e-05, -2.0119e-05,  1.3053e-04,  9.2719e-05,  6.1104e-05,
         1.2134e-04,  4.4127e-05,  1.5796e-05,  1.6499e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9752e-05,  1.2377e-01,  2.5216e-05, -1.4156e-05, -3.3061e-05,
         1.4289e-04,  7.3287e-05, -1.8716e-04, -1.7422e-04, -7.2413e-05,
         1.3329e-04, -6.2725e-05, -4.7049e-05,  1.1543e-04, -1.7304e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9103e-04,  9.8414e-02,  6.7833e-05,  3.1724e-06,  1.3820e-04,
         5.4191e-05,  5.8767e-05, -2.2632e-05, -1.7294e-04, -2.3194e-05,
         1.1122e-05,  9.4147e-05, -1.1360e-04,  9.5211e-05,  6.6439e-05,
         9.9272e-05, -1.2463e-04, -8.9599e-05, -3.6509e-05,  5.3929e-05,
         5.4201e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5454e-04, -8.7456e-02, -3.1003e-05,  4.2588e-06,  1.3627e-05,
         1.8500e-05, -2.0541e-05, -1.0385e-05, -1.7691e-05,  1.6694e-05,
        -2.4828e-05, -3.5529e-05,  9.2913e-05, -5.1319e-06, -1.2702e-05,
         1.2072e-04, -3.6038e-05,  2.6370e-05, -4.0492e-05,  2.7418e-05,
        -5.3805e-05, -2.6883e-05,  5.3949e-05, -4.2845e-05, -4.7090e-05,
         2.1370e-05,  4.5463e-05,  2.8712e-06,  5.1424e-05, -3.5503e-05,
        -2.8996e-05, -5.4433e-05,  6.2513e-05,  7.2319e-06,  4.1727e-06,
        -4.1690e-05, -9.2138e-06,  3.0934e-05, -3.3644e-05, -1.3262e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9589e-04, -1.3724e-01, -1.8316e-04, -1.3174e-05,  2.3060e-05,
        -9.3199e-05, -6.8669e-05,  4.6817e-05,  9.9200e-06,  8.9143e-05,
         5.1518e-05, -2.0637e-04,  1.0301e-05, -4.1964e-05, -5.4083e-05,
         1.2197e-04,  1.5388e-04,  1.8533e-04, -8.5391e-06, -2.7426e-05,
         1.3210e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5543e-04, -1.9617e-01,  3.8087e-05,  2.0143e-04, -1.2875e-04,
        -1.4233e-04,  6.2902e-05, -1.6533e-04,  1.6163e-05, -1.2532e-06,
         1.0343e-04,  2.1684e-04,  1.3759e-04, -8.3601e-05,  9.6010e-05,
        -1.2646e-04, -3.4282e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4157e-04,  9.6288e-02,  9.6244e-05,  8.4617e-06, -1.5821e-05,
        -6.3923e-05,  3.7447e-05,  1.8646e-05,  7.8621e-05, -2.4355e-05,
        -4.7313e-05, -3.1462e-05,  1.6035e-05, -1.2848e-04, -2.5715e-05,
        -3.4781e-05, -7.4800e-05,  7.9584e-05, -6.9445e-05, -1.4563e-05,
        -2.7061e-05,  1.4786e-05, -8.6113e-05,  4.7184e-05, -4.7531e-06,
         3.2191e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1248e-04,  6.3497e-02, -6.4825e-05,  1.0070e-05, -5.1772e-05,
        -2.7407e-04,  1.1549e-04,  1.0555e-05, -6.7395e-05, -1.2976e-04,
        -2.4313e-05, -9.4134e-05, -6.7229e-05, -6.9753e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4538e-04, -1.0877e-01,  4.4286e-05,  6.4039e-05, -7.5920e-05,
        -7.6258e-05, -1.1715e-04, -7.6877e-05, -4.8729e-05,  1.9216e-05,
         8.7332e-06, -4.0551e-05, -1.1103e-05,  4.3753e-05, -1.1859e-05,
         2.5734e-05,  2.7208e-05, -3.5286e-05,  4.1016e-05, -2.4368e-05,
         2.8615e-06, -9.6002e-05, -1.6948e-05,  6.5140e-06,  1.9503e-05,
         2.2921e-05, -4.4620e-05, -6.3483e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.1121e-04,  1.0855e-01,  1.9411e-04, -4.5606e-05, -3.7893e-05,
         4.7796e-05, -6.7330e-05,  6.8125e-05,  7.9653e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3402e-04,  2.0412e-01,  9.8536e-05, -5.3451e-05,  3.1685e-05,
         3.3770e-05,  7.8313e-05, -1.4765e-04,  1.7685e-04,  1.9535e-05,
        -2.5968e-04,  1.1667e-04,  5.9219e-06,  1.4596e-05,  2.3114e-05,
         2.1699e-05, -6.7072e-05,  6.5098e-07,  7.6836e-05,  1.3617e-04,
        -1.1628e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4694e-04,  1.4882e-01,  8.3165e-05,  1.2476e-04, -1.0978e-04,
        -1.1017e-05,  1.3775e-04,  5.7115e-05, -2.8318e-07, -8.4746e-05,
        -1.1963e-04,  3.0065e-05, -2.0475e-04, -3.3652e-04,  2.3149e-05,
        -2.6249e-04, -7.0484e-05, -2.3394e-05, -6.8746e-05, -6.3301e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2301e-04, -1.7272e-01,  1.1781e-04, -2.3730e-06, -1.4321e-04,
         2.3315e-04, -1.6609e-04, -6.2316e-05, -3.8495e-04,  3.6264e-05,
         2.2729e-04, -1.1387e-04,  1.5063e-04, -2.4930e-04,  1.0619e-04,
        -2.7065e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9732e-06,  1.4227e-01,  1.1486e-04, -1.7099e-04, -5.5563e-05,
         5.6145e-05, -1.1733e-04, -5.7609e-05, -3.2907e-04, -1.5049e-04,
         1.5621e-04, -1.7485e-04, -1.6328e-04, -9.8922e-05, -1.1088e-04,
         1.2457e-04, -9.1325e-06,  7.7558e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4039e-05,  4.0618e-02,  2.7483e-05, -5.6346e-05, -2.2690e-05,
         1.6768e-06,  1.6483e-05,  3.8405e-05,  3.0896e-05, -1.5826e-05,
        -1.3265e-05, -1.4826e-05, -3.5437e-05,  6.0362e-05,  3.1649e-05,
        -4.5693e-05, -1.8256e-05, -3.1728e-05, -6.2176e-05, -1.0085e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0966e-04,  7.1069e-02,  8.6993e-06,  6.8530e-06, -3.1596e-06,
        -4.8979e-06, -5.0696e-06, -4.2998e-06,  4.8850e-05, -6.9933e-05,
        -2.6391e-05,  6.1088e-05,  2.1917e-05,  8.4427e-05,  4.4738e-05,
         8.3770e-06, -3.8619e-05, -4.3406e-05, -2.7348e-05, -1.0010e-04,
        -8.5503e-05,  2.1437e-04, -5.2942e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3295e-05,  7.7748e-02, -3.3828e-05,  4.4470e-05,  3.7738e-05,
        -2.7463e-05, -5.3525e-05, -2.1766e-05,  1.5553e-05, -2.3768e-05,
        -7.4784e-05, -7.6921e-05,  5.2841e-05, -4.5147e-05, -1.5169e-05,
        -7.5580e-05, -5.0637e-05, -6.3161e-05, -3.4477e-05, -6.2858e-05,
        -3.0853e-05, -4.1293e-05, -5.7956e-05, -3.7679e-05, -3.2576e-05,
        -4.3815e-05,  5.8106e-05, -4.7032e-05, -9.0816e-05, -3.9762e-05,
        -2.4659e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0201e-04, -1.6142e-01,  8.1411e-05, -1.1260e-04,  3.0585e-05,
         3.3196e-05,  7.3935e-05, -3.2568e-05,  3.8519e-05, -2.2670e-05,
        -6.3535e-06, -9.4528e-05,  1.0218e-05, -1.1509e-04, -5.4256e-05,
        -1.0349e-04,  2.2555e-05,  6.2412e-05,  5.8270e-05, -3.3260e-05,
         2.0405e-05, -3.4954e-05, -7.6572e-05,  4.5984e-05,  2.0306e-05,
        -7.2071e-06, -1.6877e-05, -7.2273e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4882e-04,  1.0865e-01,  1.3522e-04, -1.4845e-04, -1.3349e-05,
         7.4486e-05, -6.1920e-05, -6.7419e-05, -7.3182e-05, -2.5626e-05,
         3.3963e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1216e-04, -9.5618e-02, -5.4379e-05,  5.3328e-06, -1.4457e-04,
         1.7954e-05, -7.8391e-05,  2.0144e-05, -1.0491e-04,  5.2593e-05,
        -5.2282e-05,  5.5309e-05, -1.1784e-04, -8.1252e-05, -2.5530e-05,
        -1.1664e-04, -1.7452e-04,  9.0608e-05,  3.2448e-05,  2.5927e-05,
        -9.4041e-05,  3.8619e-06, -7.2307e-05, -3.0960e-05, -1.1450e-04,
         4.1480e-05, -1.2740e-05,  1.1387e-05,  3.7884e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7977e-04,  1.8011e-01, -7.8038e-05,  9.6353e-05, -5.3970e-05,
         1.6980e-05, -1.7725e-04,  6.1798e-05, -8.4605e-06,  1.2305e-04,
         9.9728e-05,  1.4740e-04,  6.6844e-05, -1.7431e-04, -5.3591e-05,
        -1.2679e-04,  1.5181e-04,  9.2897e-05,  3.1077e-05, -6.6260e-05,
         2.4975e-06, -4.6607e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 3.7491e-05,  1.1264e-01, -1.6325e-04,  3.2802e-05, -2.8834e-04,
        -2.1378e-05,  1.3028e-05,  1.9191e-05, -3.2795e-05, -4.8201e-05,
        -1.1488e-04, -2.1732e-04, -2.1685e-04, -9.8492e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4049e-05, -1.1837e-01, -6.8067e-05, -2.5686e-05, -1.2139e-05,
        -1.2815e-04,  5.8123e-05,  6.5148e-05,  8.5026e-05, -6.9488e-05,
         5.2681e-05,  4.7989e-05,  3.0125e-05, -3.5610e-06,  1.9314e-05,
         1.5155e-05,  3.3211e-05, -9.9202e-05, -4.7177e-05,  4.5325e-05,
         3.0812e-05,  5.7949e-05,  2.9920e-06, -2.1952e-05, -4.9361e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.7514e-05,  7.4574e-02,  8.7175e-05, -7.9409e-05,  1.2212e-06,
        -5.9464e-06,  1.8237e-05,  6.6446e-05,  5.3981e-05,  2.1734e-05,
        -3.7203e-05, -1.3101e-05,  1.1234e-05, -1.5588e-06, -4.0647e-05,
         8.2591e-06, -7.1948e-05, -1.9048e-05, -8.3532e-07, -2.4361e-05,
        -2.5692e-05, -1.8234e-05,  1.8531e-05, -3.2593e-05, -7.5435e-05,
        -3.9725e-05, -1.8780e-05,  1.6219e-07,  5.8727e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8267e-05,  5.4014e-02,  8.8125e-05,  2.5749e-05, -4.0932e-05,
         1.5354e-04,  1.2702e-04, -7.8479e-05,  1.6139e-04,  6.4338e-05,
         5.9957e-05,  2.2317e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.7016e-05,  4.1234e-02, -5.7487e-05, -1.0183e-04,  5.4720e-05,
        -3.5650e-05,  1.4096e-05,  2.6541e-06, -3.2406e-05,  4.4145e-05,
         5.3267e-05, -4.8950e-06, -3.3831e-05, -1.8468e-05, -2.4741e-05,
         7.3877e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7558e-04,  5.6149e-02, -2.5394e-05,  3.9759e-05,  5.1533e-05,
         3.4384e-05,  1.5985e-05,  2.6921e-05,  6.9435e-06,  2.6774e-05,
         7.3589e-05,  2.2000e-05,  3.6371e-05, -1.8556e-05,  2.8018e-06,
         1.1403e-05,  1.2853e-04, -6.4682e-05, -3.0249e-05, -3.1616e-05,
        -3.7119e-05, -3.8267e-05,  2.8317e-05,  5.1852e-05,  3.5789e-05,
         3.3314e-05,  6.7160e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4764e-04, -1.7380e-01,  1.2525e-04,  5.0061e-05, -1.6831e-04,
         1.7439e-05,  3.2122e-05, -1.8361e-04, -2.3904e-05, -1.6415e-04,
        -3.8776e-05, -1.7192e-04, -6.8052e-05,  1.5683e-04, -1.4685e-04,
         8.0007e-05, -1.1875e-05,  1.1118e-05,  1.5562e-05,  3.1982e-05,
         7.5580e-05,  7.8101e-05, -1.6253e-04,  5.8583e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.4760e-04, -1.4135e-01, -3.4626e-05, -7.2928e-05, -2.8524e-05,
        -4.2319e-05,  2.2997e-06,  1.6806e-05, -3.4372e-05,  1.3079e-05,
        -1.3794e-06, -3.8191e-05,  6.7271e-05,  8.6091e-05, -2.2736e-05,
        -5.2582e-05, -5.7738e-05,  6.2595e-05,  1.5700e-05, -7.8750e-05,
         2.1993e-05,  8.4852e-06, -2.4067e-05, -2.7552e-05, -8.1612e-05,
        -3.2178e-05,  2.9036e-05,  2.0985e-07, -1.6767e-05, -5.4831e-05,
        -7.9970e-05,  5.9482e-05, -3.5241e-06,  3.6734e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2354e-06,  1.9487e-01, -1.3210e-04,  1.1343e-04,  1.3722e-06,
         9.3514e-05,  7.7158e-05, -2.2537e-05,  9.9455e-05,  3.8210e-05,
         1.7314e-04,  1.3639e-04,  7.7338e-05,  8.2426e-05,  1.9840e-04,
        -3.2220e-05,  2.3142e-04,  1.0168e-04, -4.6539e-06,  1.9880e-05,
        -1.4844e-04,  1.5574e-04,  8.4887e-05,  8.1763e-05,  1.1160e-04,
        -7.7286e-05, -3.3964e-05,  5.9385e-05, -4.0382e-05,  2.5027e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1522e-04,  1.5853e-01,  6.5020e-05, -1.1182e-04, -4.2264e-05,
         1.7245e-04, -2.9519e-05, -1.4978e-04,  4.4876e-05, -4.5797e-06,
        -1.5410e-04,  1.7834e-05, -9.5577e-05, -1.5346e-04, -1.6182e-04,
        -7.2420e-05, -1.4249e-04, -1.3313e-04, -3.7938e-05, -3.9610e-05,
         6.3062e-05,  6.2482e-05, -1.2653e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.3628e-04, -1.4941e-01, -1.1074e-04, -1.0680e-05, -9.1751e-05,
        -7.0844e-05, -1.5460e-06, -7.5630e-06,  2.0913e-06, -5.0500e-06,
        -4.4471e-05,  2.2681e-05, -8.7855e-05, -3.8214e-07, -5.0034e-05,
         3.1957e-06, -8.1719e-05, -5.4494e-05,  1.4992e-04, -4.8572e-05,
        -6.5169e-05, -2.2323e-05, -1.7066e-05,  1.8977e-06, -1.3304e-05,
        -9.6553e-05,  3.7002e-05, -3.3256e-05, -7.2203e-05, -1.2361e-04,
        -4.9079e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0208e-04, -1.0972e-01, -3.2324e-05,  4.7080e-05, -4.5862e-05,
        -5.3130e-05, -3.8712e-05, -7.6774e-06,  3.5692e-05,  1.1543e-05,
         5.5071e-06,  6.4920e-06, -4.5398e-05, -4.4355e-05,  2.0392e-05,
         5.4341e-05, -1.0262e-05, -1.0750e-05, -1.8054e-05,  2.7441e-05,
        -5.8906e-06, -3.1593e-05, -6.7325e-05,  5.3740e-06, -2.8954e-05,
        -7.8912e-06, -1.9368e-05, -1.6715e-05,  4.6293e-05, -8.4119e-06,
        -6.0684e-06, -8.3798e-06, -2.8773e-05, -6.7024e-05, -6.5127e-05,
        -7.3304e-05, -2.9037e-05,  5.1030e-05, -5.1799e-05,  1.6772e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.4844e-04,  1.9451e-01, -1.4825e-04,  2.4115e-04,  9.3352e-05,
        -2.5542e-04, -5.0814e-05, -1.6001e-04, -1.4851e-04, -2.0967e-04,
         1.4103e-04, -6.0318e-05, -1.4417e-04, -5.9544e-05, -1.0971e-05,
        -1.9351e-04,  2.4680e-05,  8.8917e-05, -3.3746e-05, -5.1477e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2061e-04, -1.2415e-01,  2.5064e-05, -8.5509e-05,  4.0037e-04,
         1.7762e-04, -1.0542e-04,  2.8220e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1887e-04,  1.5013e-01,  1.9551e-04,  1.3464e-04,  1.1889e-04,
        -9.8714e-05,  9.5619e-05, -3.5298e-04, -1.4834e-04,  1.8768e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9230e-04, -1.4079e-01, -1.2066e-04, -7.6432e-05, -5.7092e-05,
         1.2455e-05, -2.5628e-05, -8.5194e-06, -1.3503e-05, -1.0112e-04,
        -7.2130e-05, -1.8364e-06, -3.2738e-05, -1.1656e-04,  1.4067e-05,
        -4.9244e-05, -1.4442e-04,  4.2773e-05,  7.3904e-05,  7.1574e-05,
        -1.0070e-04,  4.5417e-05, -1.7489e-05, -2.9351e-05,  1.2150e-04,
        -7.8086e-05,  2.9969e-05,  8.2747e-05, -5.7912e-05, -1.0155e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6937e-04,  1.1363e-01, -5.9525e-06, -4.6084e-05, -8.0757e-05,
        -5.3834e-05, -2.9638e-05,  3.3007e-06, -8.3528e-05,  5.9377e-05,
         1.6828e-05, -2.3667e-06, -5.7090e-05, -9.8439e-05, -7.2411e-05,
        -9.6757e-05, -9.6629e-05,  2.6545e-05, -2.9366e-05, -1.1631e-04,
        -7.5408e-05,  4.2097e-05,  1.1559e-05,  1.0766e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0373e-04, -1.0541e-01, -1.0617e-05, -3.1805e-05, -3.6571e-05,
         2.8721e-05,  5.1713e-05, -5.2473e-05, -6.6155e-05, -7.4186e-05,
        -2.5369e-05, -5.0463e-05,  3.9296e-05, -2.0267e-05,  3.6577e-05,
         1.1393e-05, -8.5437e-05, -5.2448e-06, -8.3331e-06, -6.9220e-05,
         1.0365e-04,  1.2561e-04,  3.6411e-05, -1.0714e-05,  8.1858e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.9398e-04,  1.4465e-01, -3.7091e-05, -4.7369e-05,  2.5236e-05,
         3.4434e-05, -2.7839e-05, -8.5282e-05,  2.2037e-05, -5.2270e-05,
         3.9091e-05, -1.7391e-05, -1.1488e-04,  5.7969e-05,  6.9354e-05,
         1.7944e-05, -5.2790e-06,  2.3817e-05, -3.8241e-05,  6.6732e-05,
         5.9114e-05,  7.2973e-05,  2.0236e-05, -8.7009e-05, -4.6734e-05,
        -7.1084e-05, -4.4966e-05,  7.3342e-05, -1.8640e-05, -1.1265e-04,
        -1.9318e-04, -2.8356e-05, -6.5047e-05,  1.1610e-05, -8.6585e-05,
         2.7486e-05, -5.0667e-06, -1.0795e-06, -1.5149e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0777e-04,  6.5901e-02,  1.1497e-04,  8.5972e-05,  1.2014e-04,
        -7.5855e-05, -3.7924e-05,  1.1459e-04,  2.7822e-05,  5.2971e-05,
         1.0995e-04,  9.5779e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.5002e-04,  1.7775e-01, -8.4747e-05,  3.0229e-04,  2.1802e-04,
        -1.4275e-04,  4.2546e-04,  9.7626e-06,  2.2957e-05, -1.2145e-04,
        -5.3483e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.8684e-04,  1.1173e-01, -8.2325e-05, -4.1124e-05,  1.0538e-04,
        -2.1395e-05, -5.0872e-05, -6.7774e-05, -9.9773e-05,  4.9798e-05,
         4.2957e-05, -8.2752e-05,  1.2810e-05, -4.9770e-05, -8.5853e-06,
        -1.2465e-04, -7.5917e-05, -6.1361e-05, -1.0876e-04, -1.1380e-05,
        -9.8012e-05, -1.3241e-04,  6.8670e-05,  6.2825e-05, -5.0771e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9344e-05, -1.7322e-01,  5.8355e-05, -1.1363e-05,  5.4667e-06,
         9.0872e-05, -4.1741e-06,  1.5137e-04,  1.1207e-04, -1.0203e-05,
         7.4510e-05,  3.6306e-05, -1.5164e-05, -3.3445e-05, -4.7636e-05,
        -1.7686e-05,  1.8835e-04,  5.1626e-05, -8.9678e-06,  2.3276e-04,
         1.4268e-04,  1.1897e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4444e-04,  8.8873e-02,  4.7011e-06,  2.3873e-05, -7.4967e-05,
        -6.7761e-05,  1.4612e-04, -4.3822e-05,  1.1437e-04, -3.1496e-05,
        -3.6902e-05, -4.3132e-05, -2.6917e-05,  3.8309e-05, -7.5068e-05,
        -7.0057e-05, -5.0505e-05, -4.0166e-05,  8.8215e-06,  4.1200e-05,
        -4.7333e-05, -6.9734e-05,  2.0680e-05, -2.5951e-05, -6.9222e-05,
         4.2614e-06,  1.9301e-05,  1.4935e-05,  2.2028e-05, -1.2885e-04,
        -3.7070e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-2.6100e-05, -7.2221e-02, -1.3464e-05, -1.9145e-05, -2.3699e-06,
        -2.0771e-05, -1.9114e-05, -2.8683e-05,  1.3929e-05, -9.1343e-06,
        -1.3403e-05, -2.2062e-06, -2.6076e-05, -6.3954e-06, -2.9823e-05,
        -1.6403e-05, -1.9088e-05, -1.1658e-05, -2.9578e-05, -1.4918e-05,
        -3.0511e-05,  2.3689e-06, -1.4428e-05, -1.1570e-05,  1.2479e-05,
        -1.8962e-05, -1.5787e-05, -1.2430e-06, -1.8749e-05, -1.7038e-05,
         9.1869e-06, -2.6897e-05,  3.5741e-06,  3.6208e-06, -6.2388e-06,
         1.3053e-05, -1.8704e-06,  1.9978e-05,  1.9985e-06, -2.0905e-05,
        -1.0333e-05, -1.9948e-05, -1.5856e-05, -3.5887e-06, -1.0662e-05,
         1.8942e-05,  1.8559e-05, -8.1994e-07, -1.8449e-05, -2.2033e-05,
         1.3810e-05, -6.6817e-06, -2.0302e-05, -2.2953e-05, -2.3846e-05,
         3.6863e-06, -1.9878e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0301e-04,  1.0885e-01, -4.8440e-04, -2.0524e-04, -6.3468e-04,
        -8.3966e-05, -1.0093e-04, -1.0199e-04, -5.4781e-04,  2.6018e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8356e-04,  7.3684e-02,  3.0985e-05, -5.3886e-05, -2.2772e-05,
        -3.9307e-05,  5.7418e-05,  4.8432e-05,  3.7777e-05,  8.9237e-05,
         2.0108e-05,  6.4587e-05,  7.3167e-05, -1.8601e-05, -4.1461e-05,
         1.1555e-05, -8.4340e-05, -6.1289e-05,  5.9011e-05,  2.7672e-05,
         4.5789e-05,  9.8641e-05,  8.7906e-05, -5.1058e-05, -3.9781e-05,
        -8.7507e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0411e-04,  7.2830e-02,  1.0241e-04, -3.5941e-05,  5.5664e-05,
        -9.3157e-05,  8.3213e-05, -3.4883e-05, -4.2918e-06,  3.3518e-05,
        -1.3846e-04,  6.4827e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9651e-04, -7.3030e-02,  2.0382e-05, -4.4763e-06,  6.9459e-06,
        -2.4291e-06, -4.4479e-05, -3.9714e-05, -9.9087e-05, -1.5026e-05,
        -4.1874e-05, -3.1092e-05,  7.3951e-05,  1.5159e-05, -6.4372e-05,
        -2.7318e-05, -7.0786e-05, -2.7398e-05,  9.1724e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2146e-04, -1.6114e-01, -5.7779e-05, -3.0506e-06,  5.8299e-07,
        -5.1716e-05, -2.1360e-05,  1.7393e-05,  3.3597e-05, -1.4135e-04,
        -2.6395e-06, -1.0757e-04,  7.6114e-05, -1.0866e-05,  2.2259e-05,
        -2.1673e-05,  1.0494e-04, -7.1208e-07,  8.0003e-05, -1.1491e-04,
         3.1806e-05,  8.7928e-05,  7.5525e-05, -7.6113e-05,  4.4568e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9012e-06, -9.8933e-02,  6.8213e-05, -7.4427e-05, -8.4019e-06,
         2.5530e-05, -1.0141e-04, -4.9228e-07, -4.1059e-05, -1.5526e-04,
        -1.2096e-04,  1.7515e-05,  9.5895e-05, -2.2777e-05, -1.1979e-04,
        -2.0390e-05,  3.1919e-05, -2.3736e-05, -4.5199e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5822e-04, -7.7785e-02, -2.2411e-05,  1.4967e-05,  1.8883e-05,
        -9.8886e-06,  1.4641e-06,  4.7568e-05,  7.1576e-06,  3.8414e-06,
        -1.2911e-05,  2.6926e-06,  2.8248e-05, -2.2642e-05,  5.1062e-06,
         8.0745e-06,  4.1998e-06,  3.4792e-06, -8.9356e-06, -7.3025e-06,
        -5.2811e-06,  1.6892e-05, -4.4962e-06,  4.6944e-05,  1.5154e-05,
         5.4475e-05, -1.9365e-05, -6.8028e-06, -6.2729e-06,  2.1165e-05,
         2.0487e-05, -2.6771e-05, -8.9306e-06,  1.4761e-05,  3.1781e-05,
        -3.3156e-05, -1.8621e-06,  2.2452e-05,  6.6692e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0109e-04,  8.8785e-02, -2.6168e-05,  7.9414e-05, -4.8536e-05,
        -9.8099e-05,  2.0673e-04, -2.1908e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7494e-04, -2.0918e-01,  6.2268e-05, -3.4059e-04,  7.6070e-05,
         1.7434e-04, -4.8117e-05,  2.7694e-04, -2.1220e-04, -3.2116e-05,
         9.5251e-05,  2.7394e-04, -7.9775e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0181e-04,  1.1295e-01,  6.6453e-05, -4.8260e-05,  3.2421e-05,
        -1.2848e-04, -2.7228e-05, -2.8359e-05, -4.2863e-05, -2.6397e-05,
         4.3332e-05, -1.0392e-04,  4.3821e-05, -1.3982e-04, -4.5855e-05,
        -1.9245e-05,  8.6445e-05,  4.2529e-05, -1.7608e-05, -3.6228e-05,
        -4.8476e-05, -9.3499e-05,  1.5442e-05, -1.4600e-05, -1.5998e-05,
        -1.1647e-04,  9.3089e-05, -7.1703e-05, -7.9144e-06,  3.1122e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.0118e-05,  4.3578e-02,  4.2097e-05, -2.9639e-05,  2.0972e-05,
         1.7549e-05,  3.5155e-05,  4.8945e-05, -1.5201e-06, -1.0020e-05,
         3.2358e-05,  4.2164e-06,  5.5224e-05,  2.9741e-05,  4.0158e-05,
         3.6152e-05, -5.2622e-05,  3.5606e-05,  4.8998e-05, -1.5648e-05,
         1.9992e-05, -5.9944e-06, -1.7009e-05,  1.4549e-05,  7.8214e-05,
        -9.7460e-06,  1.5337e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.0056e-04, -6.6211e-02,  8.9358e-05, -1.3255e-07,  1.3242e-05,
        -1.3665e-05, -1.7762e-05,  4.3411e-06, -6.3081e-05,  7.3522e-05,
        -1.5031e-05,  9.2935e-05, -5.2044e-05,  4.8385e-05, -1.8438e-05,
        -2.7200e-05, -2.3218e-05,  4.0436e-05,  1.2901e-05,  7.1919e-05,
        -1.3328e-05,  1.0952e-05, -4.7305e-05, -1.7195e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4408e-05, -1.3251e-01,  4.8475e-05,  3.1921e-05,  2.1787e-05,
         4.2171e-05, -3.9334e-05,  5.2411e-05,  9.5770e-06,  5.3148e-05,
         1.5805e-05,  5.8053e-05, -9.5340e-05, -2.8799e-05,  3.8020e-05,
         1.0568e-04, -5.8914e-05, -1.9513e-05, -2.6960e-06,  3.6708e-05,
         3.2902e-06, -2.2357e-05, -2.3068e-05,  3.0542e-05, -5.8385e-05,
        -1.8735e-05, -3.1977e-05,  3.0701e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3367e-05,  1.3393e-01, -9.4894e-06, -5.9399e-05, -1.7561e-04,
         1.7326e-04,  1.2315e-05,  2.7200e-05,  2.1865e-05, -4.7954e-05,
        -3.2473e-06, -1.5056e-04, -2.2079e-04, -1.9261e-04, -2.0411e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2520e-04,  7.4550e-02,  4.4456e-05, -3.4257e-05,  2.9273e-05,
        -1.1067e-05,  1.8940e-05,  4.2467e-05, -1.6482e-05, -3.8899e-05,
         6.8750e-06, -4.0994e-05,  3.9086e-05, -2.0579e-05,  4.5237e-05,
         5.6428e-05, -4.0461e-05, -1.2991e-05, -2.3267e-05, -3.0838e-05,
        -2.1506e-05, -2.9740e-05,  8.7685e-07,  7.0789e-05,  4.7509e-05,
         6.8897e-06,  3.8266e-05, -1.7495e-05,  9.2804e-05,  9.8198e-06,
         3.1480e-05, -2.0062e-05,  8.9462e-06,  4.7511e-05,  1.7567e-05,
         5.6269e-06, -1.5770e-06,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0097e-04, -1.5809e-01, -2.0895e-05, -7.4689e-05,  9.9038e-06,
        -8.0615e-05,  1.0212e-04, -3.6169e-05, -2.2162e-05, -9.4792e-07,
        -7.3825e-06,  1.3790e-04,  6.3474e-05, -3.8090e-05, -9.5008e-05,
        -7.2490e-05, -1.1359e-04, -4.0257e-05, -1.2791e-04, -9.6379e-05,
        -7.1689e-06, -9.6993e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.1410e-05,  1.2925e-01, -9.0958e-05,  2.8246e-05,  6.1101e-05,
         2.0017e-04, -1.2466e-04,  5.8733e-05,  3.8755e-05,  3.8513e-05,
        -3.2368e-05, -2.6878e-05, -2.1653e-04, -3.0117e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.5247e-04,  9.0197e-02,  2.4498e-04,  1.2186e-04,  2.7018e-05,
        -1.7046e-04, -1.4964e-04, -2.6795e-05,  3.8280e-05, -4.6931e-05,
         2.4924e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5179e-04,  1.7996e-01, -1.6467e-05, -7.6222e-05,  1.6494e-04,
         4.9833e-05,  9.5004e-05,  2.2357e-04,  4.0714e-05,  1.8642e-04,
         4.7526e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.7726e-05,  1.4847e-01,  1.1658e-04, -6.3135e-05,  3.1312e-05,
        -4.1176e-05,  7.0983e-05,  8.1114e-05,  1.8983e-05,  1.1160e-04,
         4.5425e-05,  2.0075e-05, -6.5284e-05,  3.3557e-06,  3.6845e-05,
        -2.3978e-04, -3.0350e-05, -9.9884e-05, -3.1719e-05,  1.3941e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.3854e-05, -7.3885e-02,  6.1222e-06, -4.9780e-05, -3.0446e-05,
         3.1880e-05,  3.6258e-05, -5.7213e-05, -2.1217e-05, -4.1575e-05,
        -2.1116e-05, -2.4196e-05, -7.7824e-05, -1.3147e-05, -1.1793e-05,
         1.0373e-05, -5.6314e-05, -6.1812e-05, -4.4585e-05, -4.9043e-05,
        -4.0037e-05, -2.3100e-05,  5.7732e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7421e-04, -1.7534e-01,  2.9075e-05, -1.6090e-05,  3.9597e-05,
         8.2171e-05,  1.0385e-04,  5.7920e-05, -4.7006e-05,  1.9766e-05,
         1.2059e-04,  1.1867e-04, -3.1186e-05,  5.8089e-06,  8.8146e-05,
        -2.4321e-05,  1.4126e-04, -7.9097e-05,  1.2183e-04, -1.5847e-06,
         7.5454e-05, -5.2538e-05, -5.8772e-05,  6.1994e-05,  4.7626e-05,
        -1.5043e-04, -6.0322e-05, -1.3046e-04,  1.0721e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0307e-04, -1.4089e-01,  9.1844e-06, -8.0992e-05, -5.2738e-05,
        -9.9274e-05,  2.5505e-05, -1.3225e-05, -9.0422e-06,  1.0409e-04,
         3.0891e-05, -6.1271e-05,  7.3521e-05, -9.2000e-06,  5.5132e-05,
        -9.7085e-05, -3.9424e-05, -2.1600e-05, -2.1895e-05,  1.1783e-04,
        -4.2992e-05, -5.9210e-05,  1.7615e-05, -2.6683e-06,  7.4030e-06,
        -7.9183e-06,  3.3316e-05, -1.1340e-06, -9.2610e-05, -2.5019e-05,
         7.0466e-05, -1.5958e-05,  5.0350e-05,  7.1040e-05,  7.7035e-05,
         8.9842e-05,  4.0452e-06,  1.0101e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 2.0778e-04, -1.6561e-01,  2.2582e-05,  4.3668e-05, -1.4908e-04,
        -9.5920e-05, -1.6023e-05, -9.3549e-05, -9.9579e-05,  3.8858e-05,
         2.1551e-04, -5.6168e-05, -4.7252e-05,  1.5642e-05,  4.9818e-05,
        -1.2798e-04,  8.7256e-05, -4.5061e-05,  2.3261e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8405e-04,  6.9368e-02,  3.8125e-05,  2.7160e-07, -5.3187e-05,
        -3.4253e-05,  3.1979e-05,  3.7391e-05, -4.3105e-05,  3.1916e-05,
        -1.6807e-05, -6.3031e-05,  1.2555e-05,  5.2131e-05,  5.1032e-05,
         3.4460e-05,  9.9139e-06, -6.0470e-05,  3.1413e-05,  5.2329e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3573e-04, -1.2323e-01, -3.2901e-06,  3.9360e-05,  7.7921e-05,
         1.3947e-04,  5.8198e-05, -3.8119e-06,  2.4685e-05, -6.6104e-05,
         5.4962e-05,  4.5298e-05, -2.7592e-05,  7.2596e-05,  1.1380e-04,
         7.7551e-05,  1.4391e-05,  3.5512e-05,  9.0766e-06, -3.8117e-05,
        -4.0148e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3703e-04,  1.7209e-01,  1.4204e-05, -7.5936e-05, -2.8060e-04,
        -1.4438e-05, -7.8174e-06,  2.4057e-04,  1.6401e-04, -1.3214e-04,
         2.2246e-05, -9.9911e-05, -3.5838e-05,  6.9047e-05,  1.7152e-04,
         2.7576e-04, -1.2985e-04,  1.5546e-04,  1.9191e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.7852e-05, -1.0475e-01, -7.2306e-06,  6.2715e-06, -3.3247e-05,
         3.3780e-05,  7.0307e-05, -4.9501e-06,  5.3420e-05, -3.7040e-06,
         8.4557e-05,  4.7887e-05, -1.1579e-05,  4.4415e-05,  3.5129e-05,
         3.7192e-05, -1.6472e-05, -6.8880e-05,  3.6590e-05, -1.1303e-05,
         6.8993e-05,  1.7546e-05, -6.1822e-05,  7.5714e-05, -6.3638e-05,
        -5.1961e-05,  8.4862e-06,  1.1672e-05, -7.5006e-06, -1.9851e-06,
         8.3118e-05,  7.3008e-06,  8.7856e-06, -1.2139e-05, -2.1644e-05,
         2.0756e-05,  2.4338e-05,  2.9240e-05,  1.3226e-05,  3.0370e-06,
        -6.4921e-05,  2.3826e-05, -4.6171e-05, -6.3488e-05,  4.7009e-05,
        -5.7512e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0257e-04, -1.2877e-01,  1.4274e-04,  1.9766e-05,  7.5760e-05,
         2.5466e-05,  2.8292e-05, -4.9102e-05,  4.3714e-05,  1.6195e-05,
         9.9538e-07, -4.0178e-05,  2.8944e-05,  9.7884e-05,  4.2700e-05,
        -7.0298e-06, -2.3094e-05, -2.6432e-05,  2.2724e-05,  1.6456e-05,
         3.2946e-05,  1.3733e-04, -9.7105e-05,  6.2681e-05,  3.6956e-05,
        -4.8445e-05, -3.6377e-05,  5.3262e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2995e-05, -1.0503e-01,  2.8148e-05, -7.4012e-06,  2.0886e-05,
         4.7001e-06,  1.0619e-05,  1.0723e-05, -4.5512e-05, -1.1164e-05,
        -2.1056e-05, -7.1025e-06, -2.5000e-05, -4.0297e-05, -8.0402e-05,
        -2.7643e-05,  3.5931e-05,  1.7414e-05,  1.9763e-05,  6.9801e-05,
        -3.6926e-05, -6.0412e-06,  3.3178e-05, -1.1739e-04, -1.5594e-05,
         5.5547e-06,  8.2557e-06, -5.3223e-05, -4.6195e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.9333e-05,  9.6801e-02, -1.1034e-05,  9.3725e-05,  7.5968e-05,
        -8.2334e-05,  7.3239e-05,  6.3679e-05, -4.4961e-06,  4.3917e-05,
        -8.6863e-06, -2.1903e-05,  3.0997e-05,  4.6938e-05, -3.3531e-05,
         1.7145e-05, -7.5740e-06,  3.7547e-05,  9.9293e-06, -3.6554e-05,
         4.4520e-05,  1.9326e-05, -2.6931e-05,  7.6316e-05,  4.2128e-05,
         1.0693e-04,  6.3938e-05, -1.5361e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2781e-04, -1.3090e-01, -4.9549e-05,  9.5226e-06, -2.5368e-05,
        -4.7863e-05,  2.3141e-05,  2.1493e-05, -2.6630e-05,  6.5481e-05,
        -1.8421e-05,  8.3829e-06,  8.8407e-06, -2.0007e-06, -7.4847e-06,
         3.5599e-05, -1.2106e-05,  3.0574e-05,  7.4963e-05, -1.9440e-05,
        -1.6225e-05, -5.3495e-05,  9.7338e-06, -5.0205e-05, -7.7589e-06,
        -3.0818e-05,  5.7088e-05, -5.7748e-05, -4.8910e-05, -1.2346e-05,
        -4.9316e-05, -7.4146e-06,  1.7575e-05, -3.8877e-05, -2.1366e-05,
         6.4906e-05,  3.6013e-07,  4.4182e-05, -4.0656e-05, -1.7174e-05,
        -1.3335e-05, -6.5261e-05, -3.0195e-06,  5.9262e-05,  1.9314e-07,
         2.4718e-05,  8.6732e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3297e-04,  1.9718e-01, -2.6840e-06,  3.9083e-06, -2.1272e-05,
         7.7827e-06, -1.0657e-05, -1.1389e-05,  5.4389e-05, -4.7309e-05,
        -3.7555e-05, -3.9199e-05,  3.2668e-05,  3.0145e-06,  7.8724e-05,
         1.0743e-05,  1.1039e-05, -2.2056e-05,  3.8474e-05,  6.7646e-05,
        -1.8532e-05, -2.0740e-05, -4.6203e-05, -1.9727e-05, -7.2458e-06,
        -2.2414e-05,  1.8817e-05, -2.9532e-05,  4.5173e-05,  2.2902e-05,
        -7.5030e-05,  6.3848e-05, -2.7549e-05, -3.8228e-05,  6.9149e-06,
        -6.7055e-05, -1.0491e-05, -2.3879e-06, -3.4662e-05, -5.6192e-06,
        -9.5867e-06,  4.5651e-05,  4.6190e-05,  1.6548e-05, -4.5473e-07,
         1.2676e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0689e-04,  2.0112e-01,  1.2498e-04, -1.0310e-04,  2.5636e-04,
         1.1857e-04,  2.0160e-05,  8.1319e-05,  1.2658e-04,  1.0727e-04,
         2.8306e-05, -4.7156e-05,  1.1335e-05,  8.5100e-05, -7.0852e-05,
        -1.5356e-04, -3.8254e-05, -6.9059e-05,  2.0415e-04, -2.2045e-04,
         1.5085e-07,  9.4815e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0025e-03,  2.0065e-01, -1.4168e-05, -1.2708e-04, -1.6954e-04,
        -1.0443e-04,  4.6251e-05, -1.5406e-04, -4.9257e-05, -1.8346e-04,
         4.5105e-06, -2.9872e-04, -2.0314e-05, -5.6317e-05,  3.3315e-05,
        -2.7902e-04, -1.5414e-04, -9.4547e-05, -7.2861e-05,  4.6939e-05,
        -1.2111e-04,  5.6386e-05, -4.4733e-05, -4.1462e-05,  3.8653e-05,
        -1.2937e-04, -1.5835e-04, -7.1681e-05,  9.9291e-05, -2.3373e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([-4.1281e-04, -1.3994e-01, -3.9964e-05,  2.7673e-05,  9.3025e-05,
         1.2730e-04, -6.1588e-05,  8.7591e-05,  1.6047e-04,  4.4190e-05,
        -7.9604e-05,  1.5283e-04,  7.3280e-05,  1.0455e-04, -6.4874e-05,
         8.8480e-05,  1.0670e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2652e-05, -1.8861e-01, -1.8946e-04,  9.6990e-05,  8.6879e-05,
        -3.9177e-05, -1.2293e-04,  3.3223e-05,  5.3573e-05, -5.0864e-05,
         3.4362e-04, -7.7103e-05,  6.2032e-05, -8.5133e-05,  3.7754e-05,
         1.1486e-04,  6.7549e-05,  8.1817e-05,  8.6062e-05, -5.3431e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5132e-05,  1.2294e-01, -1.3528e-04,  3.7889e-05,  1.2381e-04,
        -1.3702e-04, -8.2493e-05, -4.5604e-05,  5.8741e-05,  2.2418e-05,
        -1.3321e-05,  2.2601e-05, -7.9934e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0816e-04,  1.5456e-01, -3.5543e-05,  5.0309e-05,  4.8208e-06,
        -3.8053e-05, -2.1358e-04,  7.0431e-05, -1.3600e-04,  3.1236e-05,
        -2.4528e-05, -8.1392e-05, -1.3906e-04, -1.5880e-04, -6.2200e-05,
        -1.7305e-04, -8.5753e-05, -3.1564e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4457e-04, -8.6922e-02, -6.9985e-05, -2.6522e-05,  1.1263e-05,
        -7.8155e-05, -4.3970e-05, -4.6089e-05, -5.0096e-05, -1.5701e-05,
         2.7572e-05, -4.2523e-05, -3.5692e-05,  3.1943e-05,  9.8569e-07,
        -7.1548e-05, -7.8218e-05, -7.3951e-05, -1.1336e-05, -1.1501e-05,
         2.2464e-05, -1.8317e-05, -5.3960e-05,  5.8375e-05, -1.7415e-05,
        -5.6541e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2873e-05,  1.5367e-01,  3.2758e-05,  2.2514e-04, -2.6093e-04,
         7.3471e-04,  1.4022e-05,  4.1735e-04,  2.5704e-04, -4.9266e-05,
        -9.7530e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4491e-05,  1.0587e-01, -3.0379e-04, -8.8303e-05, -6.9578e-05,
        -1.3408e-04, -6.3570e-05,  5.7922e-05, -3.0590e-05, -1.6869e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3028e-04, -1.0778e-01, -1.3886e-05, -5.3590e-05,  2.2468e-05,
        -5.6720e-06,  1.5960e-05, -1.7794e-05,  1.0807e-05, -1.6233e-07,
        -2.0042e-05, -8.8382e-06,  3.5595e-05,  4.1692e-05,  2.1773e-06,
         1.4691e-05, -3.0650e-05, -3.7564e-06,  1.6115e-05,  8.6222e-06,
         2.5705e-06,  7.4672e-06,  1.3151e-05, -6.0872e-06, -2.7506e-06,
        -2.4633e-06,  7.2271e-06, -2.4241e-05, -2.1879e-06,  7.0745e-06,
        -1.9165e-05,  1.3432e-05,  9.9184e-06,  2.5728e-05, -4.4685e-06,
         1.7193e-05, -2.4893e-05, -3.0501e-05, -1.3430e-05, -1.7559e-07,
         1.9719e-05,  2.5722e-05,  9.1618e-05,  7.0877e-06,  1.3391e-05,
        -9.6325e-06,  4.0081e-05, -1.9218e-05,  3.5997e-05,  2.2699e-07,
        -4.0630e-05,  5.4905e-05,  2.1462e-06,  4.6846e-05,  3.2175e-06,
         1.4460e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5420e-04, -1.9029e-01,  2.3526e-05, -6.5046e-05, -2.9831e-05,
         1.9093e-04, -1.0108e-04, -3.2832e-05, -1.4577e-05,  8.1619e-05,
         9.7967e-05,  1.2803e-04,  2.6178e-05,  6.9524e-05,  2.1565e-05,
        -4.8480e-05,  1.8854e-05,  8.3924e-05, -9.0833e-05,  1.2346e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6968e-04, -1.4148e-01, -5.9724e-05,  2.0382e-05,  9.3835e-05,
         7.7226e-05,  3.9876e-05, -2.5364e-05,  6.8902e-05, -2.8788e-06,
         1.0320e-04,  4.6765e-05,  1.8611e-04,  1.2293e-06,  1.7088e-04,
         6.7014e-05, -7.3614e-05,  6.2734e-05,  1.2986e-04,  4.9321e-05,
        -5.1828e-06, -9.9401e-05, -5.9639e-05,  2.8906e-05,  2.0541e-05,
         3.6277e-05,  1.5096e-05, -1.0998e-04, -5.8559e-05, -1.6989e-05,
         2.8709e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1696e-04,  3.7213e-02, -3.5542e-05, -4.7999e-05, -1.4451e-04,
         4.5521e-06, -9.5363e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7889e-04,  5.8315e-02,  3.4810e-05,  6.5292e-05, -7.7190e-05,
         1.2641e-05,  2.3175e-05,  1.3776e-04,  6.9453e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.6125e-04, -1.1482e-01, -2.0764e-05,  1.1955e-05,  2.8058e-05,
        -1.5626e-06,  2.2912e-05,  1.8570e-05,  1.0977e-04, -6.5345e-05,
        -3.8809e-06, -8.2271e-05,  2.0980e-05,  1.4047e-05,  2.1743e-05,
         3.6647e-06,  3.0145e-05, -4.3497e-05,  3.7790e-05, -4.8236e-06,
         6.4930e-05, -3.1156e-06,  4.2490e-06,  2.3241e-05, -5.7671e-05,
        -1.8249e-05, -8.6940e-05, -4.6724e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6866e-07, -9.1992e-02, -7.6102e-05,  5.0325e-05, -3.6368e-05,
        -2.5456e-06,  6.8640e-05,  2.3214e-05,  2.5496e-05,  9.9007e-05,
         4.3085e-05,  9.9352e-07,  4.5196e-05, -1.6189e-05, -2.8254e-06,
         4.6038e-05,  5.9970e-05,  1.6456e-05,  3.9536e-05, -2.5547e-05,
        -1.1722e-05,  5.0562e-06,  3.1913e-05, -1.0895e-05, -2.2715e-05,
         7.8965e-05,  3.0706e-05,  3.4500e-05,  2.1397e-05, -2.5556e-05,
         2.4901e-05,  4.0187e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7260e-04, -1.8198e-01, -6.7470e-05,  8.5488e-05,  1.0396e-04,
         1.4663e-04, -4.2689e-05, -6.1525e-05, -1.1949e-04,  4.1746e-05,
         1.5474e-04, -6.3902e-06,  1.8361e-05, -5.4153e-05,  1.5462e-04,
        -4.4932e-05,  2.1330e-04,  1.0385e-04, -4.2132e-05, -1.4053e-04,
        -5.3462e-05,  1.6582e-04,  1.4737e-04,  3.2948e-05,  6.6344e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.6874e-04,  1.2657e-01,  2.3017e-04, -9.3303e-05, -1.4098e-04,
        -2.7160e-04, -1.4344e-04,  3.9746e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3387e-04,  4.7229e-02, -3.0903e-05, -1.0885e-04, -2.8303e-05,
         3.6969e-05, -4.4954e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.4488e-05, -1.8815e-01, -3.0062e-05, -2.5988e-04,  1.7871e-05,
        -1.4178e-04,  7.7212e-06,  1.9114e-04,  1.7986e-04,  9.0919e-06,
         3.1929e-05,  1.3862e-04, -5.4963e-05,  5.6948e-05,  4.1712e-05,
         2.9250e-05,  6.4774e-05, -2.1391e-05, -3.9002e-05,  4.1445e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4367e-04,  1.1613e-01, -6.7809e-05, -1.9180e-05,  1.0221e-05,
         1.9264e-05, -1.4658e-05,  6.7596e-06, -1.7190e-04, -5.5079e-05,
        -1.7859e-04, -1.2373e-04,  8.8408e-05,  7.2075e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2982e-04,  6.2975e-02,  6.7940e-05,  3.4502e-05, -5.4601e-05,
         2.6603e-04, -8.7089e-06,  1.0993e-06,  5.1954e-06, -1.0938e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9687e-05,  8.5821e-02,  6.1317e-05,  4.2981e-05, -3.4206e-05,
        -1.8737e-08, -1.2778e-05,  5.7268e-06,  3.5481e-05,  1.9020e-06,
         4.7360e-06, -1.4548e-05, -2.1812e-06, -7.1614e-05,  2.2223e-05,
         5.3667e-05,  4.3697e-05,  1.6288e-05,  4.8506e-05, -2.7361e-05,
         1.0379e-05, -1.5880e-05,  3.2584e-05, -1.3879e-05,  3.2594e-05,
        -3.3551e-05, -1.6709e-07, -2.6070e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7212e-05,  2.5011e-01,  2.1592e-04, -1.5003e-04, -2.7654e-06,
         7.8040e-05, -6.0230e-05, -4.3472e-05,  5.2391e-05,  1.0273e-05,
         2.0262e-06, -7.3929e-05,  4.8058e-05, -1.2926e-04,  1.3651e-04,
        -3.7365e-05, -4.6269e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7833e-04,  1.2520e-01, -1.4408e-04,  3.1494e-05, -1.3173e-04,
        -1.5774e-04, -2.5887e-05, -5.5389e-05, -1.2802e-04, -3.4455e-05,
        -6.1729e-05, -5.5416e-05, -1.4829e-04,  6.1127e-05, -3.5714e-06,
        -8.2284e-05, -1.6761e-04, -8.3302e-06, -2.9217e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3069e-04, -1.6096e-01, -9.9183e-05,  5.2414e-05, -3.9708e-05,
        -1.0108e-04, -1.9850e-04, -1.4054e-04,  1.1242e-04,  1.6167e-04,
        -6.4416e-06,  3.6880e-05,  6.7197e-06, -2.7342e-04, -5.9923e-05,
         2.0112e-04, -1.5474e-04, -7.3427e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-1.6032e-04, -1.6556e-01,  4.2861e-05, -1.6597e-05,  1.0719e-05,
         5.1327e-05, -1.6704e-04,  2.6349e-05, -1.0561e-04, -1.0030e-05,
        -1.5069e-04, -8.1603e-06,  3.3365e-05, -1.2719e-05,  3.5840e-05,
        -2.0513e-04, -1.0694e-04,  9.2123e-05, -2.9947e-05, -9.1858e-05,
        -1.0481e-04, -2.8857e-05,  1.5882e-04,  2.8595e-06, -9.8447e-06,
         1.8053e-05,  1.1476e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1167e-04,  1.1053e-01,  5.4441e-05, -3.1089e-06, -9.2858e-05,
         4.1148e-05,  2.9738e-05,  4.9467e-05, -2.5821e-05, -2.7833e-07,
        -1.4262e-06, -6.4633e-06, -1.1198e-05,  1.5292e-05, -3.0946e-05,
        -4.6750e-05,  4.3227e-06,  3.0625e-05, -8.1557e-06,  1.8901e-05,
         1.2056e-05,  2.1325e-05, -1.2992e-06, -5.0890e-05,  6.1944e-06,
        -7.4513e-07,  1.8477e-05, -1.1444e-05,  3.9103e-05,  2.3065e-05,
         3.6817e-06, -1.8009e-05, -4.6337e-05,  1.2604e-05,  5.4953e-05,
        -4.0863e-05,  5.1056e-05,  8.3571e-06,  3.8882e-05,  1.7660e-05,
         1.1917e-05,  3.9004e-05,  1.3686e-05, -2.7886e-05, -1.4986e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5365e-06, -1.4755e-01,  5.8461e-05, -6.7863e-05, -1.8471e-05,
        -6.5928e-07,  1.0669e-04,  4.8964e-06, -9.0820e-05,  2.5825e-05,
        -5.4127e-05,  5.0259e-05, -1.4394e-04, -1.1561e-04, -3.3713e-05,
         1.7522e-06, -1.3774e-04,  6.6171e-05, -4.6802e-05,  6.0862e-05,
         1.1553e-05, -1.8994e-05,  5.1058e-06,  2.1907e-05, -5.7333e-05,
        -7.5532e-05,  9.3563e-05,  2.7332e-05,  3.1530e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9872e-05, -9.6773e-02,  1.4290e-04,  1.3939e-05, -5.0036e-05,
        -6.9743e-05, -1.3573e-04, -8.3635e-06, -1.0397e-04, -3.3920e-05,
        -4.8851e-06,  3.4866e-05, -5.5676e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3603e-05, -1.3947e-01,  3.7956e-05,  2.7747e-05, -3.8663e-06,
        -1.6408e-05,  3.7062e-05,  8.4244e-05,  3.9710e-05, -7.3076e-06,
        -7.8549e-05,  3.5732e-05,  3.7201e-06,  3.8501e-05, -2.0408e-06,
         2.2730e-05,  9.3115e-05, -2.9482e-05, -6.3853e-06,  3.8624e-07,
        -1.2484e-05,  1.6927e-05, -5.4418e-05,  3.6829e-05, -3.7101e-05,
        -8.8180e-06,  3.2075e-05, -1.0796e-05, -1.5054e-05,  4.9225e-06,
        -5.9564e-05, -7.7622e-05, -5.4785e-05,  3.4253e-05, -2.9692e-05,
         6.2219e-05, -1.0074e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7015e-04, -1.4738e-01, -1.8286e-05, -5.1546e-05, -7.9422e-05,
        -4.9118e-05, -6.6091e-05, -5.1334e-05, -6.3677e-05,  6.2902e-05,
        -5.1484e-05,  6.6003e-05, -4.4244e-05,  8.5102e-05,  3.8661e-05,
         6.2356e-05, -3.5168e-05, -3.2386e-06,  4.3069e-05, -1.4103e-05,
        -5.3139e-05,  9.4626e-05,  5.8518e-06,  4.9080e-05, -6.6751e-07,
        -1.8001e-05, -3.6012e-05, -5.6076e-06,  3.6689e-06, -4.0771e-05,
        -1.9019e-05, -8.5755e-05, -3.5703e-05,  2.3677e-05, -1.0471e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1916e-04,  4.2493e-02,  4.6593e-05, -3.2679e-05,  3.1300e-05,
        -2.2046e-05,  1.7157e-06, -8.6953e-06, -1.2034e-05, -5.6034e-05,
         1.5260e-05, -4.4481e-06, -3.1575e-05,  5.7699e-05, -6.4254e-05,
        -2.3009e-05, -6.8498e-05,  7.7280e-05, -4.5915e-05,  1.6828e-06,
         9.3903e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6295e-05,  1.2044e-01,  6.4732e-05,  2.5648e-06,  7.5420e-05,
         8.6923e-06,  2.6029e-05,  2.0238e-06,  8.6451e-05, -4.1089e-06,
         6.0509e-06, -4.1935e-05,  2.5029e-08,  2.8326e-05, -4.3031e-05,
        -3.0853e-05, -3.7541e-05,  1.1869e-04, -2.9610e-05,  7.9699e-07,
        -8.4923e-05, -9.1863e-06,  6.1632e-05, -3.5569e-06, -1.2821e-05,
         2.2165e-05, -6.4842e-05,  4.2548e-05, -4.3593e-05,  9.3672e-05,
        -2.1806e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9036e-04, -1.4642e-01, -1.1744e-05, -1.1032e-05, -4.7123e-05,
        -3.5742e-05, -2.1469e-05, -5.7490e-05, -6.0694e-05, -7.8406e-05,
        -3.4971e-05, -7.2430e-05, -5.7675e-05, -1.9340e-05,  5.4737e-05,
        -4.6822e-05, -3.1411e-05, -7.1590e-05, -1.1518e-05, -1.5062e-05,
        -4.1304e-05, -7.6822e-06, -1.6835e-05,  3.8694e-05,  7.6372e-06,
        -3.8985e-05,  3.0267e-07,  2.0571e-05, -1.2187e-05, -1.8288e-05,
         2.0649e-06, -2.0972e-05,  3.1429e-06, -6.7312e-05, -2.2521e-05,
         3.5645e-05, -7.3951e-06,  4.1185e-05,  5.4504e-05, -4.6635e-05,
         6.4023e-05, -3.4847e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8911e-04,  5.0755e-02,  9.1584e-05,  1.2903e-04, -2.1815e-05,
        -1.2188e-05,  1.6855e-05, -6.7675e-05, -1.1763e-04, -1.8358e-05,
         1.1073e-04,  4.1180e-05, -2.7789e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9127e-05, -7.5227e-02, -3.7594e-05,  7.1809e-06,  1.5990e-05,
         3.8980e-05, -4.1364e-05, -2.4013e-05, -2.9394e-05,  1.2526e-05,
        -4.0010e-05, -1.6817e-05,  1.6956e-06, -2.7824e-05,  1.2443e-05,
        -1.0952e-05,  5.0759e-05, -9.5301e-06,  1.4142e-05, -1.1478e-05,
        -5.5615e-07, -2.6101e-05, -5.8521e-05,  6.0158e-06,  1.3553e-05,
         2.4835e-05, -1.6126e-05, -2.5739e-05,  2.6000e-06, -3.6287e-05,
        -1.9320e-05, -1.3694e-05,  1.6817e-05,  1.1785e-05,  1.3625e-06,
        -3.4747e-06,  8.5656e-07, -8.1249e-06,  2.9980e-05, -2.0041e-05,
        -1.6140e-05,  2.4319e-06,  3.9440e-06, -3.4379e-05, -4.2814e-05,
         2.3739e-05,  8.1183e-06, -5.4312e-06,  2.8219e-05,  6.5956e-06,
         1.0151e-05,  2.8061e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1206e-04,  7.4516e-02,  6.1613e-05,  2.2927e-05,  2.2617e-05,
        -3.6658e-05,  5.1484e-05,  3.9129e-05, -5.3331e-05,  9.4861e-05,
        -5.4831e-05,  9.8769e-05,  1.8524e-04,  2.6935e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 7.0132e-06, -9.0192e-02, -1.2877e-05, -2.1447e-05,  4.2506e-05,
        -2.0482e-06,  1.4113e-06, -3.3206e-05,  2.3019e-05, -3.6078e-05,
        -1.6880e-05,  3.7351e-05,  9.5499e-06,  3.8042e-05,  5.0666e-05,
         1.8141e-05,  1.8668e-06, -2.7790e-06, -5.8040e-06, -2.6786e-06,
         3.1807e-05,  1.1273e-05,  3.2935e-06, -2.0998e-05, -2.2115e-05,
        -2.5140e-05,  1.5149e-05,  2.2362e-05, -2.1775e-05,  3.4067e-05,
        -6.7926e-05,  1.0489e-06,  6.2400e-06, -2.0678e-06,  1.5039e-06,
        -3.5638e-05, -1.9138e-05, -7.2393e-06,  3.5874e-05, -2.8928e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3276e-04, -7.3144e-02,  4.7963e-06,  4.1629e-06,  2.1869e-06,
         3.0489e-05, -1.5947e-05, -1.4632e-05, -1.5926e-05,  8.4903e-06,
        -3.6765e-06, -2.4280e-05,  1.8231e-05, -1.9369e-05, -3.6780e-05,
         1.4390e-05, -3.8855e-05,  2.9050e-05, -7.5957e-06, -1.6921e-05,
        -3.3055e-05, -1.8816e-05, -3.5170e-05, -2.1644e-05,  3.1239e-05,
        -5.6773e-06,  7.4868e-06, -3.6815e-06, -2.0498e-06, -7.2024e-06,
        -8.2458e-06,  9.9290e-06, -9.2019e-06,  3.2140e-05,  2.7703e-06,
        -7.5295e-06,  2.2497e-05,  1.4467e-05,  2.4696e-05,  2.9299e-05,
         4.1705e-06, -1.2484e-05, -1.8865e-05, -7.1324e-06,  1.1830e-05,
         2.4527e-06, -2.2667e-05, -3.1307e-07, -3.1687e-05,  8.1711e-06,
        -1.1211e-06, -2.6583e-05, -1.0098e-05,  2.7638e-05, -1.0931e-05,
         2.8343e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9782e-05, -1.7370e-01,  1.9851e-05,  3.0540e-06, -2.4990e-05,
        -4.4275e-06,  2.2356e-05,  1.0729e-05, -2.4484e-05, -1.0371e-05,
        -2.7020e-05, -5.0896e-05,  1.9225e-05,  6.5473e-06, -1.2640e-05,
        -9.8076e-06,  8.1428e-06, -6.8765e-05,  4.4812e-05, -3.2492e-05,
        -1.1350e-06,  6.0904e-05,  6.9074e-05, -5.1562e-05,  1.2209e-05,
         1.6062e-05,  4.0308e-05,  3.7611e-05, -8.7088e-06,  3.0055e-06,
         2.2436e-05,  7.1574e-05,  1.9256e-05,  5.7605e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5277e-04,  1.1885e-01, -7.9515e-05, -1.4267e-05, -2.3404e-04,
        -1.1809e-04, -7.2402e-05, -9.8026e-07, -7.3961e-05, -5.1986e-05,
        -5.7530e-06,  2.5901e-06,  3.8629e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5613e-04, -1.0019e-01, -1.1585e-05,  2.6733e-05, -4.6746e-05,
         9.8063e-05,  7.6850e-05,  2.4900e-05,  9.6239e-05,  9.0043e-06,
        -2.0129e-05, -6.9912e-05,  5.6784e-05, -1.9624e-06,  1.9211e-06,
         1.0528e-05, -1.8580e-05, -5.4937e-05,  1.1310e-04, -1.9549e-05,
         6.1292e-06,  5.1662e-05, -4.6850e-06,  7.4071e-05, -4.5487e-05,
         4.9356e-05,  1.5586e-05,  1.9960e-05, -2.9294e-05,  3.8397e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1181e-04, -1.1111e-01, -4.9538e-05,  3.0992e-05, -1.5449e-05,
         7.6876e-05,  3.7816e-05,  1.2148e-05, -3.7624e-05,  2.2657e-05,
         1.0133e-04, -4.2757e-05, -1.4299e-05,  3.0060e-05, -8.0723e-06,
        -5.2059e-05,  1.3915e-05, -4.0467e-05, -7.1026e-05,  3.3689e-05,
         4.1843e-05, -6.6889e-05, -9.9971e-06,  3.6662e-05, -2.7220e-05,
         2.1481e-05, -8.0746e-05, -9.1131e-05, -2.5417e-05, -3.5904e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4342e-04,  1.1681e-01,  3.2685e-05, -7.3887e-05, -4.3319e-05,
        -4.0127e-05,  4.0641e-05,  6.5855e-05, -6.7136e-05, -7.4213e-05,
        -1.1241e-04, -6.3246e-05,  2.1481e-05, -5.9725e-05,  3.5988e-05,
        -4.4850e-05, -5.9880e-05,  7.2293e-06, -1.6095e-05,  9.5990e-07,
        -6.4535e-05, -6.7994e-05,  2.7621e-05, -4.3636e-06,  5.4834e-05,
         8.4875e-05, -1.2269e-04, -9.9650e-06,  6.3957e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2883e-04, -1.4401e-01,  6.8359e-05,  3.6260e-05,  1.0735e-04,
         8.9420e-05, -8.1165e-05, -1.1360e-04,  2.2450e-04, -2.0935e-04,
         2.0074e-05,  3.7340e-05,  1.6360e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8450e-04,  1.6247e-01,  9.0165e-05, -1.2359e-04,  2.0694e-05,
        -8.3985e-05, -4.7186e-05,  4.8388e-06, -1.5163e-05,  1.6263e-05,
        -1.0046e-05,  5.0827e-06, -7.0429e-05, -1.3347e-04, -1.8505e-04,
        -9.0395e-05,  5.2110e-05,  1.4801e-04, -1.8670e-04, -5.7163e-05,
        -1.7598e-04, -6.5725e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5581e-04, -1.1520e-01,  6.8782e-05,  1.9443e-05,  1.2269e-05,
         9.6305e-06,  6.6552e-05,  4.0632e-05,  3.8666e-05, -2.9902e-05,
         6.8887e-05, -3.4875e-05,  4.9821e-05,  7.1650e-05, -2.2724e-05,
         6.1382e-05,  6.2900e-05, -1.5272e-05,  4.1087e-05, -3.9072e-05,
         6.1615e-06, -1.6892e-05, -2.4109e-05, -2.0751e-05,  6.0357e-05,
         1.0505e-05,  3.2525e-05,  3.5722e-05,  4.9721e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2578e-04, -1.8191e-01,  2.7066e-05,  1.8852e-05, -6.8874e-05,
        -2.8837e-06, -5.7123e-05,  4.5394e-05,  1.3720e-04, -2.1935e-05,
         1.0731e-04,  7.2779e-05,  8.0119e-05, -4.6443e-05,  4.9827e-05,
         7.5351e-05,  2.5459e-05,  2.7808e-05,  2.1332e-05,  6.0599e-05,
         5.8411e-05,  1.4525e-04,  5.0049e-05,  7.3653e-05,  7.0915e-05,
        -8.8409e-05, -8.6560e-05,  7.4874e-06,  7.8465e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0146e-04,  7.0062e-02, -6.3825e-05, -6.4174e-05,  1.3098e-05,
        -4.4770e-06,  1.1728e-06, -1.1066e-04, -1.1471e-04, -1.0563e-04,
        -1.9840e-05, -4.8065e-05, -4.1922e-05, -5.1396e-05, -5.7658e-05,
        -2.2628e-05,  5.7959e-05, -1.2303e-04, -2.4079e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 6.4712e-05, -1.9817e-01, -3.5642e-05, -8.3706e-06,  2.6451e-05,
        -5.0003e-06,  3.0082e-05, -2.3749e-05, -1.7068e-05, -4.7849e-05,
         1.7338e-05, -9.8510e-06, -2.1258e-06,  3.5790e-05, -3.4076e-05,
         1.2847e-05,  1.8161e-05, -2.6581e-05, -1.1754e-04, -1.4683e-04,
        -2.7425e-05, -2.9904e-05, -3.3686e-05, -7.5598e-05, -1.6355e-04,
         3.4397e-05,  1.6991e-05,  5.4996e-05,  2.8665e-05, -1.6816e-05,
        -2.8442e-05,  6.4761e-05, -2.4604e-07, -4.4385e-05, -3.6805e-05,
         3.7041e-05,  9.1415e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2995e-04,  5.6445e-02,  8.1046e-06,  4.8740e-05, -2.1235e-05,
        -1.5837e-05, -1.6735e-05,  1.4327e-07, -9.9883e-06,  7.4749e-05,
         5.8340e-05, -2.5901e-05, -1.1109e-05, -1.4600e-05,  2.4914e-06,
         2.1644e-05,  4.3041e-05,  5.1552e-05,  1.5855e-05,  4.4054e-05,
        -2.3487e-05, -1.3798e-05,  9.9880e-06, -1.4818e-05, -2.2326e-05,
        -1.1220e-05, -2.5883e-05,  9.0678e-06,  3.4164e-05,  2.6520e-05,
        -1.0113e-06,  1.8937e-05, -5.2041e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1889e-04,  9.0902e-02,  5.5357e-05,  6.6253e-05,  7.7691e-05,
        -2.7847e-05, -1.6496e-05,  8.6458e-05,  3.3934e-05, -3.8697e-05,
         1.8818e-05,  3.8402e-05,  5.3546e-05,  2.0746e-05, -1.8106e-05,
        -5.0529e-05, -1.3185e-05,  1.2159e-04,  1.3036e-04,  1.2474e-04,
         1.9383e-05,  2.2210e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7980e-04, -1.5257e-01,  8.0693e-05, -1.9427e-05, -7.2014e-06,
         1.1186e-05, -1.2432e-05, -6.0160e-05, -2.0207e-05, -1.4672e-05,
         6.5069e-05,  7.9534e-05,  8.8859e-05, -3.5377e-06,  1.7203e-06,
         5.4687e-06, -9.9943e-06, -2.5438e-05, -9.9966e-05,  1.2300e-05,
         7.2502e-05,  1.8576e-05, -2.1760e-05, -1.3361e-05, -4.4271e-05,
         8.6595e-05, -2.0596e-05,  7.5355e-05,  5.0196e-06, -4.0171e-05,
         3.9458e-05,  5.0557e-08, -3.3740e-06, -3.7674e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5578e-04,  1.5470e-01, -1.2474e-05,  8.4037e-05, -4.3684e-05,
         8.1806e-05,  9.3906e-05,  2.8707e-05, -1.8206e-06,  4.6087e-05,
        -3.8139e-05, -1.4039e-04,  8.9947e-05, -1.2011e-04,  3.4728e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5569e-04,  1.9211e-01,  1.2921e-05, -1.3754e-04, -3.8188e-05,
        -1.2527e-04, -9.0581e-05,  5.0699e-05, -1.5977e-04,  1.3683e-05,
        -1.8976e-04,  6.6522e-05, -3.4382e-04, -9.2309e-05, -1.8099e-04,
        -1.0294e-04, -1.0510e-04, -1.6206e-04,  1.1478e-04, -9.6508e-05,
        -6.6280e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.6419e-05, -7.5982e-02,  8.8540e-06,  1.0174e-06,  6.1161e-06,
        -1.9332e-05, -8.3615e-06,  4.8134e-06, -6.0723e-06, -1.2872e-06,
         2.5523e-05, -3.3021e-05, -2.8462e-05, -2.5918e-06,  2.4904e-05,
        -8.0231e-06,  3.4439e-07,  4.9834e-06,  5.1227e-06,  3.1988e-06,
         2.2472e-06, -1.7842e-05, -2.2330e-05, -3.9465e-05, -8.8177e-06,
        -2.2132e-05,  1.1132e-05,  3.4544e-06,  5.8538e-06, -1.9904e-05,
        -1.4834e-05,  3.2873e-05,  5.0225e-05, -1.7393e-06,  5.5195e-06,
        -1.4888e-05, -2.5539e-05,  2.2355e-06,  1.3836e-05,  7.3456e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6335e-04,  1.4573e-01,  3.0607e-05, -5.7633e-06,  4.8523e-05,
        -5.2680e-05,  7.8146e-05, -6.4248e-05,  1.4992e-05,  7.0515e-06,
         9.8026e-05,  1.3434e-05,  3.3925e-05, -9.6444e-06, -1.2875e-05,
        -8.4726e-05, -2.1184e-06, -8.1779e-05, -6.8003e-05,  9.3198e-06,
         1.8253e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0215e-04,  1.8261e-01,  1.5839e-04,  6.1765e-06,  1.0079e-04,
         1.3758e-05,  1.9658e-05,  1.5692e-04,  4.2346e-06,  3.0003e-05,
         2.1096e-04, -9.8043e-05, -1.0066e-05,  1.2948e-04, -1.2888e-04,
        -1.9813e-06,  1.6950e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.5612e-04, -1.6590e-01, -1.1993e-04, -2.8403e-05, -6.1305e-05,
         1.0977e-04,  8.6928e-05,  1.3193e-05, -3.1013e-05, -3.2003e-06,
         9.8770e-05, -4.4584e-07,  2.6318e-05,  3.7092e-05,  1.1727e-04,
         1.2982e-04,  1.9164e-04, -1.0310e-04,  4.1199e-05, -1.5306e-04,
         2.2334e-05,  4.7112e-05, -2.3954e-05, -3.0995e-06,  2.7913e-05,
         9.8102e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.7742e-05,  4.0104e-02,  4.4441e-06, -5.0095e-05, -3.1849e-05,
        -6.5291e-05,  1.9413e-05,  4.1548e-06, -2.0163e-05,  7.5231e-06,
        -8.0980e-06,  1.3240e-05,  6.0240e-06,  5.9832e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1548e-04, -1.5099e-01,  1.0669e-04, -9.8742e-05, -5.6389e-05,
         1.9815e-05, -8.5341e-05, -6.4410e-05, -3.4720e-05, -3.0502e-05,
        -3.1611e-05, -9.6400e-06,  8.9462e-06, -2.4950e-05, -8.1215e-06,
        -7.6440e-05, -8.3047e-05,  2.5228e-05, -6.9369e-05, -8.0168e-05,
         6.0295e-05,  7.7321e-05,  4.8284e-05, -2.5861e-05,  1.2116e-04,
        -6.1403e-05,  4.6546e-05, -9.8671e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.7904e-04,  1.2135e-01,  1.1686e-04, -2.6473e-05, -6.8474e-05,
         4.4914e-06,  1.8579e-04,  2.5281e-05, -4.0117e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2404e-05, -1.4405e-01, -1.6594e-05, -2.0662e-05,  4.4006e-05,
        -1.3979e-05, -1.4221e-04, -1.4280e-05, -2.8814e-05, -7.3059e-05,
         7.3670e-05, -1.1739e-04,  1.4382e-05,  3.7679e-06, -6.9936e-05,
         6.1268e-05, -5.9690e-06, -7.8556e-05, -7.2191e-05, -3.8673e-05,
         7.5654e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4450e-04, -1.1578e-01,  1.6860e-05, -6.8958e-05,  1.4057e-05,
        -5.1547e-06, -5.5276e-05, -1.0559e-04,  8.6339e-06, -6.4915e-05,
         7.5143e-05,  2.5732e-06,  8.4881e-05,  3.3926e-05, -5.6424e-05,
         9.1317e-05,  8.9104e-05,  2.0780e-05,  4.7261e-05, -1.4936e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0637e-04,  2.3986e-01, -1.5005e-04, -5.0543e-05, -6.4135e-05,
        -5.1978e-05,  1.4712e-04,  4.9752e-05,  3.5292e-04, -8.5001e-05,
        -9.8424e-06,  9.4577e-05,  2.2695e-04, -4.8085e-05, -5.3253e-05,
         1.0805e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5029e-05,  1.1664e-01, -2.6492e-05, -7.4816e-05,  7.0409e-05,
        -1.1136e-04, -7.5310e-05, -4.4128e-06,  4.7837e-05,  7.1517e-06,
         3.4882e-05, -1.2652e-05, -6.7303e-05, -1.7760e-05, -1.7867e-05,
        -8.3421e-05, -8.0410e-05,  5.5297e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1118e-05,  5.1119e-02, -9.2688e-06, -7.3694e-05, -2.3850e-05,
         4.8056e-05, -3.0450e-05, -7.1044e-05,  1.2069e-05, -4.5544e-05,
        -1.7298e-05,  2.8802e-05, -4.5247e-06,  5.3641e-05, -4.3104e-06,
         6.0674e-05, -9.4889e-06, -3.8541e-05,  1.3114e-06, -4.2401e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1585e-04, -1.5605e-01,  3.1255e-06, -1.7255e-04, -9.5510e-05,
        -8.0646e-05,  1.2623e-04,  3.1354e-05, -1.3273e-04,  8.1866e-05,
        -8.6533e-05, -2.1934e-05,  2.5559e-05, -4.8412e-05, -9.0717e-05,
        -2.3412e-05, -6.2567e-05, -1.1621e-05, -4.7821e-05,  1.1706e-04,
         1.2633e-04, -1.1284e-04, -3.5373e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3237e-05,  5.8628e-02,  3.1068e-05, -6.1141e-06,  1.1869e-05,
        -1.8000e-05,  1.7833e-05, -2.5467e-05,  1.1321e-05, -2.2953e-05,
        -6.7881e-05, -1.8541e-05, -4.9130e-05, -5.0886e-07,  5.5995e-06,
        -8.0349e-05,  4.8314e-05, -2.5131e-05,  2.5719e-06,  1.6737e-06,
         4.6051e-05, -3.4552e-05,  4.0783e-06, -1.9694e-05, -2.4794e-05,
        -2.6645e-05, -2.4699e-05, -1.7340e-05, -7.0138e-05,  3.5018e-05,
        -1.2656e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7313e-05, -6.6138e-02, -3.1202e-05, -3.9074e-05,  1.1684e-05,
        -8.7460e-06,  3.1665e-05, -3.5191e-05,  1.7456e-05,  3.1179e-05,
         4.8178e-07, -2.3064e-05,  4.9184e-06, -2.4244e-05, -6.3796e-05,
         1.2074e-05, -3.5315e-05,  1.7504e-05,  1.8619e-05, -1.8872e-05,
        -5.2294e-05,  3.5070e-05, -2.8603e-05, -5.6686e-06,  2.0501e-05,
        -4.5935e-06, -4.7617e-05,  9.9109e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.6725e-07, -1.6605e-01,  2.0018e-04, -4.8193e-05, -7.5103e-05,
         6.8257e-05,  2.1736e-04, -1.9678e-04,  9.6137e-05, -2.0517e-05,
         1.3200e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0672e-04, -1.2987e-01,  9.6203e-05,  1.5943e-05, -2.2270e-05,
        -7.2976e-05, -2.8844e-05,  6.1445e-05,  6.2540e-05,  5.3291e-05,
        -1.2294e-04,  2.1046e-05,  4.7610e-05,  9.9260e-06,  4.6517e-06,
         2.1150e-05, -9.7059e-05,  9.1673e-06,  1.2183e-04,  1.4299e-05,
        -1.3443e-04,  6.0261e-05,  5.2643e-05,  6.6266e-06,  8.9957e-05,
        -3.7033e-05,  1.6392e-05,  3.4893e-06, -1.3491e-04,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3748e-04, -1.6907e-01, -7.8544e-05, -1.5650e-04, -1.3974e-05,
        -9.5365e-05,  2.9979e-05,  5.3667e-05, -5.3788e-05,  2.5227e-05,
        -9.9459e-06,  1.0752e-04, -1.0303e-04,  2.9700e-05,  3.4033e-05,
        -2.2015e-05,  8.7452e-05, -2.5819e-05,  1.0865e-04,  1.9587e-04,
         1.0993e-04,  3.7759e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.3009e-04,  1.6202e-01, -3.2173e-04,  4.2760e-05, -4.4375e-05,
        -9.0216e-05,  6.9813e-05,  4.3879e-04, -2.2258e-04,  3.7084e-05,
        -2.6463e-05, -2.0794e-04, -1.7288e-04,  1.5850e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.9427e-05, -1.6658e-01,  1.6444e-05,  3.2583e-05, -6.9154e-05,
         9.6194e-06, -4.2219e-05, -1.1428e-04,  1.0800e-04, -2.5579e-06,
        -1.1462e-04, -3.3957e-05, -7.6357e-06, -9.1460e-06, -4.4013e-05,
         1.0340e-04,  4.5375e-05, -7.5608e-05, -4.6944e-05,  8.0999e-05,
         2.8071e-05, -1.3321e-04, -2.9466e-05, -8.5954e-05, -1.8513e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2854e-04,  9.5579e-02, -3.2534e-05, -4.3529e-05,  8.7977e-06,
        -6.1184e-05,  1.2430e-06,  3.0358e-05,  3.9377e-05, -5.8196e-05,
        -4.1260e-05, -1.2225e-05, -7.8448e-05,  4.9989e-06, -7.3551e-05,
        -9.6521e-05, -1.2672e-04,  5.7475e-05, -2.3256e-05, -3.8414e-05,
        -1.6215e-06, -7.1790e-05,  2.6617e-05, -3.7436e-05,  1.0180e-05,
        -7.2244e-05, -4.0515e-05, -3.1042e-05,  8.1617e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4991e-05,  6.1696e-02,  1.1082e-04,  1.1684e-04,  7.9857e-05,
        -1.2272e-04,  8.5100e-05,  1.0998e-04, -3.9633e-05,  1.3795e-05,
         5.8443e-05,  2.8747e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.4284e-05,  2.9697e-02,  4.5241e-06, -3.0077e-05, -2.0846e-05,
        -1.2177e-05,  3.3722e-06, -1.9769e-05,  3.9196e-07,  1.3713e-05,
         2.0048e-05,  1.3479e-05, -3.9003e-06, -1.1245e-05, -5.5242e-05,
         2.8660e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0833e-07,  7.8829e-02, -6.7292e-06, -3.6002e-05,  1.3805e-05,
         5.0632e-05, -4.5119e-05,  1.1823e-05, -4.5002e-05,  3.0680e-05,
        -5.0149e-05, -1.6078e-05,  6.6419e-05, -4.9278e-06,  1.4856e-05,
        -6.3626e-06,  2.7559e-05, -4.8185e-05,  4.2499e-06, -3.4822e-05,
        -5.9849e-05, -8.8943e-06, -4.7391e-05,  1.8331e-05,  4.1619e-05,
         1.1798e-05,  2.9668e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3410e-05, -7.3516e-02, -5.6914e-05,  1.9284e-05, -2.6662e-05,
        -3.5336e-05,  5.9370e-05, -7.3811e-05, -4.9716e-05, -2.7036e-05,
        -4.3801e-05, -7.8176e-05, -1.6218e-06,  4.1247e-06, -2.1576e-06,
        -4.1691e-05, -2.1623e-05, -6.4413e-05, -1.3914e-05, -7.7321e-05,
        -2.0246e-05, -9.3483e-06, -6.5911e-05, -5.1379e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0620e-05, -1.8863e-01,  4.2575e-05,  1.9614e-05,  5.1745e-06,
         3.0929e-05,  1.0174e-04,  5.2108e-05,  3.8767e-05,  2.6985e-05,
         1.4775e-04, -7.2697e-05,  1.0052e-04, -1.9190e-05,  5.2944e-05,
         3.0580e-05, -5.6267e-05,  1.1736e-05, -4.6366e-05, -3.5817e-05,
         5.5163e-05, -2.7580e-05, -5.9496e-05, -2.8110e-05,  5.0428e-05,
         1.9088e-05, -2.4710e-05, -1.1606e-05, -4.9527e-05,  5.6789e-05,
         6.1893e-05,  5.8641e-05,  3.9793e-06,  2.0088e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9628e-05, -1.0529e-01,  1.1013e-05, -2.8769e-05,  7.4561e-06,
        -6.9084e-07, -1.1588e-05,  4.0811e-05, -1.7027e-05,  5.1052e-05,
        -1.9408e-05, -8.6277e-06, -4.4909e-05,  1.0595e-05, -4.2921e-05,
         1.7231e-05, -4.9301e-05,  3.3679e-05, -1.7044e-05,  1.9151e-05,
         6.9735e-05, -9.7602e-06,  2.4133e-05,  9.9347e-06, -1.4211e-05,
         6.8146e-06,  1.7592e-05, -1.5391e-05,  3.0827e-05,  1.4172e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6486e-05,  2.2906e-01,  5.3135e-05,  6.3603e-06, -6.1143e-05,
         9.4242e-05, -3.7889e-05, -8.5658e-05,  1.3611e-04, -1.2994e-04,
        -1.2565e-05,  8.0305e-05, -5.7082e-05, -1.9767e-05, -5.9171e-06,
        -2.3982e-04, -2.5676e-04, -4.8612e-05,  5.4655e-05,  1.2699e-04,
        -5.4132e-05,  9.4041e-05, -7.7444e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8817e-05, -1.1905e-01, -6.2366e-05, -1.0869e-04, -2.7057e-05,
        -1.1031e-05,  5.0551e-07, -3.1187e-05,  2.9753e-05, -1.0653e-04,
         2.1666e-05,  1.5474e-05,  2.2013e-05,  1.4135e-05,  5.2819e-05,
         1.8078e-05, -3.1376e-05, -8.2049e-05,  1.1586e-05,  6.2949e-05,
         1.1518e-04,  6.4241e-05, -1.4024e-04, -1.4655e-05, -3.3514e-05,
        -5.6923e-05,  1.7166e-05,  5.2347e-05, -2.8108e-05, -2.3478e-05,
        -6.1952e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1688e-04, -9.4417e-02,  1.0757e-05, -2.5843e-05,  1.6859e-06,
         1.0108e-05,  2.9568e-05, -2.7516e-05, -1.0065e-05,  3.0966e-05,
         1.8168e-05,  1.8133e-05, -3.0372e-06, -2.4116e-05, -1.3126e-05,
         2.9283e-05,  1.6314e-06, -1.6734e-06,  1.1839e-05,  1.4945e-05,
        -6.4798e-06, -3.2000e-05, -1.3241e-05, -4.5166e-05, -7.3860e-05,
        -5.2526e-07, -1.5351e-06,  4.5070e-06,  4.1971e-05, -3.6355e-06,
        -6.9895e-06,  2.0789e-05, -1.4196e-05,  1.2292e-05, -7.0741e-06,
        -2.3841e-05,  2.5345e-05, -1.4728e-05, -1.1231e-05, -1.4421e-07],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 2.3207e-05,  1.0309e-01, -7.5856e-05,  7.7071e-05,  1.0079e-05,
        -8.1634e-06, -1.7309e-05,  3.7809e-05, -2.7295e-05, -4.9623e-05,
        -3.6818e-05,  7.1974e-05, -1.0971e-04,  4.6293e-05,  6.8985e-05,
        -1.0448e-05,  8.7729e-05, -3.9404e-06, -1.5853e-05,  4.8212e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3287e-04,  8.2275e-02, -3.2699e-05,  9.3958e-05, -7.5888e-05,
         1.6131e-04, -5.3627e-05, -4.3417e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8148e-05,  1.5685e-01,  1.5231e-04,  1.2181e-04, -1.1761e-04,
        -1.1843e-04, -2.2433e-04,  1.2637e-05,  2.7174e-04, -9.4899e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2153e-04, -1.7256e-01, -6.6456e-05, -5.7714e-05,  1.3947e-05,
        -7.7967e-05,  4.8261e-05,  3.4268e-05, -3.8784e-05, -3.5680e-05,
        -4.2028e-05,  7.9000e-05, -4.1132e-05, -5.7650e-05, -2.8200e-05,
        -1.1588e-06, -2.6341e-05, -3.1099e-05, -2.4199e-05,  6.2133e-05,
        -2.4506e-05, -1.3325e-04, -1.9504e-05, -3.6072e-05, -2.8813e-05,
        -2.0004e-05,  9.4881e-05,  5.6675e-05,  5.4878e-05, -5.3598e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4668e-05, -1.3975e-01,  2.0670e-05, -3.5785e-05,  1.6101e-06,
        -1.3948e-04, -4.9758e-06,  6.7655e-05, -1.9201e-05, -1.5345e-05,
        -5.1719e-05, -1.0891e-05, -2.1616e-05, -6.6770e-05, -1.3527e-05,
        -1.3892e-04,  3.5047e-05,  7.7841e-05,  8.0329e-05, -5.7492e-05,
         7.2390e-05,  9.5892e-06, -6.3730e-05,  3.1055e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4925e-10, -1.2497e-01, -7.2306e-05, -3.4990e-05, -3.2386e-05,
         8.8924e-05,  6.3737e-05,  5.0799e-06,  8.5520e-05, -9.3218e-05,
         5.0066e-05,  4.7102e-05, -2.2180e-05, -1.8084e-05,  3.5893e-05,
        -6.7300e-06, -3.4930e-05, -4.6485e-05,  1.3356e-04,  2.1704e-05,
         7.2879e-05,  2.3151e-05,  1.1222e-04, -2.5540e-05,  4.9843e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7192e-05, -1.3157e-01,  1.7915e-05,  1.1498e-04,  5.6518e-05,
        -4.4702e-05,  5.5124e-05, -1.6047e-06,  1.3046e-04, -3.0841e-05,
        -1.9097e-05,  6.5868e-05,  6.9371e-05,  4.2708e-05,  2.4043e-05,
         2.8960e-05,  7.6501e-05, -3.0213e-05, -1.2540e-05, -3.9837e-05,
        -1.5266e-05, -7.0868e-05,  6.3215e-06,  6.5877e-05, -7.8662e-07,
         2.9818e-05,  3.3343e-05, -2.4884e-05,  4.4237e-05,  8.4866e-05,
         1.0507e-04,  2.8646e-05,  5.1212e-05,  4.0848e-05,  8.8576e-06,
        -3.2425e-05,  1.9040e-05,  8.6894e-06, -1.4925e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2973e-04,  6.2252e-02, -3.7728e-05, -1.7634e-05,  8.6784e-05,
         9.5062e-06, -8.8394e-05,  1.1053e-04,  5.6439e-05,  8.0953e-06,
        -4.3710e-05,  1.6388e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0017e-05, -1.6672e-01, -9.3368e-05, -3.2610e-05,  3.1589e-06,
        -1.0199e-06, -2.6728e-05,  5.7507e-05,  1.4741e-05, -1.5173e-04,
         1.5025e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8116e-04,  8.9534e-02, -1.1298e-04,  1.3699e-05,  5.4221e-05,
        -2.8604e-05, -4.9017e-05, -1.2416e-04,  1.2811e-05,  2.7318e-05,
        -5.9288e-05, -4.4576e-05, -2.6676e-05, -4.5658e-05,  1.6426e-05,
        -3.1770e-05,  8.1204e-06, -1.2745e-05, -9.7338e-05, -1.3779e-05,
         2.9978e-05, -1.5410e-05, -4.2897e-05,  2.6767e-05,  4.6816e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9575e-05, -1.4728e-01,  9.4356e-05, -9.2962e-05, -7.1586e-05,
         4.4927e-05,  8.2632e-05,  5.0133e-05, -7.1779e-05,  3.1796e-05,
         7.9764e-05,  7.3733e-05,  5.8878e-06,  5.2102e-05, -7.2709e-05,
         1.1823e-05, -4.8209e-05,  3.9647e-05, -9.7231e-05, -2.8864e-05,
        -4.0000e-05,  3.9473e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8195e-04,  6.1196e-02, -3.5255e-06,  1.8354e-05,  4.3630e-05,
        -2.2398e-05,  2.5787e-06, -5.8282e-06, -5.1414e-05,  1.9944e-05,
        -1.5976e-05,  1.5976e-05, -2.5954e-05,  4.9540e-06, -2.0696e-05,
        -3.6231e-05, -4.3356e-05,  6.1006e-06, -3.1714e-05,  5.7194e-06,
        -2.6098e-05, -1.4944e-05, -1.6682e-05, -2.1003e-06, -5.0101e-06,
         1.4702e-05,  2.2013e-05,  2.9810e-05,  7.2307e-05, -2.2238e-05,
        -2.3309e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-4.7571e-05, -1.4864e-01,  2.9146e-05,  1.9041e-06,  1.5132e-05,
         3.3905e-06, -1.0278e-05, -1.2025e-06,  5.7787e-06,  8.4290e-06,
         1.4003e-05,  6.5860e-07, -1.1954e-06,  1.7462e-05,  8.8765e-06,
        -2.1229e-05,  1.0424e-06,  2.6954e-05,  1.1227e-05,  1.4865e-05,
        -1.2260e-05, -6.6393e-06,  2.6928e-06, -4.8220e-05,  3.4913e-05,
         7.1400e-06,  2.4717e-05,  3.3759e-07, -1.0974e-05,  4.3479e-05,
        -3.0080e-06,  1.8782e-05,  1.6051e-05, -1.4950e-05,  1.5163e-06,
         1.0484e-05,  4.8689e-06,  1.2968e-05, -2.0775e-05, -2.7590e-05,
        -8.3536e-06, -2.5937e-05, -4.6479e-05,  4.8827e-05, -2.4778e-05,
        -1.0070e-05, -1.3933e-05, -2.3092e-05,  4.8511e-06, -2.1848e-05,
        -1.5196e-05,  2.5794e-05,  7.5399e-06,  9.9949e-06,  9.1297e-06,
         3.6323e-05, -2.4074e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5612e-05,  7.5242e-02,  6.0649e-06,  3.3314e-05, -1.4870e-04,
        -3.4739e-05, -6.0035e-05, -1.4997e-05,  9.3711e-05,  5.2367e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2722e-04,  8.3726e-02, -6.3024e-05,  5.1130e-06, -3.7979e-05,
         3.4441e-05, -2.6521e-06,  4.6216e-05,  4.6423e-05,  8.0390e-05,
         2.3019e-05, -6.5642e-05,  8.8274e-05, -2.5098e-05, -6.6855e-05,
        -3.5879e-05, -2.9692e-05, -2.6787e-05,  5.7793e-05, -4.6102e-05,
         5.7191e-05,  4.2553e-05,  4.7710e-05, -2.9483e-05, -2.3103e-05,
        -2.2054e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9930e-05,  5.4718e-02,  5.8889e-07, -1.1627e-05, -3.3321e-05,
         6.6346e-05, -1.0943e-05,  3.3616e-06, -2.1685e-05, -4.6420e-05,
        -4.7351e-05, -2.4597e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9962e-05, -1.6199e-01,  3.5352e-05, -2.2804e-05, -9.3941e-05,
        -1.1595e-04,  2.7514e-05, -7.8538e-05, -2.1982e-05, -2.8895e-06,
         1.2890e-04,  8.4262e-06,  9.6645e-05,  1.6399e-05,  5.5904e-05,
         1.4812e-04, -2.2554e-05, -3.2129e-05, -1.5478e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7801e-04,  1.6662e-01, -1.6777e-05,  1.2141e-04, -7.2564e-06,
         4.2407e-05,  1.1206e-05, -9.3788e-05, -6.7398e-05,  2.0848e-05,
        -5.1395e-05,  1.3060e-04, -5.6131e-05,  1.2299e-04, -5.9142e-05,
        -1.2719e-05,  9.6860e-05,  5.8086e-06, -3.6403e-06,  4.6206e-05,
        -6.9202e-05,  7.8928e-05,  1.0547e-04, -9.3176e-05,  6.1971e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3591e-05, -1.9827e-01, -8.7693e-05, -5.0633e-05,  2.0051e-04,
        -3.4664e-05, -1.3607e-04, -5.6579e-05,  5.6690e-05, -1.3327e-05,
        -5.3470e-05, -1.7886e-04,  6.7458e-05, -1.8218e-05,  6.9192e-05,
        -2.0736e-04, -1.6192e-04,  1.6908e-04,  2.0351e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7343e-06, -1.6049e-01, -5.7251e-05,  1.7426e-05, -4.0923e-05,
         2.2130e-05,  4.0373e-05,  1.6731e-05, -3.0886e-05,  2.8088e-05,
        -1.1647e-05,  7.0408e-06,  7.5547e-05, -1.1361e-05,  5.6858e-05,
        -5.5874e-05,  9.6993e-06, -1.0246e-05,  1.6427e-05, -8.3728e-06,
         6.0932e-05, -1.1286e-05,  1.2261e-05,  5.1463e-05,  2.3816e-05,
        -8.2768e-07,  1.9098e-05, -4.7988e-05, -3.8959e-05, -8.0092e-05,
         5.9451e-05, -6.9117e-06, -1.7749e-05, -1.8238e-05, -5.0190e-05,
        -2.3696e-05, -1.3603e-05, -3.0262e-05, -4.6907e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3877e-04,  1.7817e-01, -3.2941e-04, -6.8814e-05, -1.6802e-04,
        -5.4765e-05,  3.6550e-06, -3.1322e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2517e-04,  1.5662e-01,  1.4765e-04, -2.3200e-05, -3.4131e-05,
        -2.8345e-05, -1.5708e-05, -1.4762e-04,  7.0922e-05, -1.1895e-05,
         2.6118e-05,  6.6346e-05, -2.8888e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7731e-04, -1.4105e-01, -7.1719e-06, -3.0686e-05, -2.3799e-05,
        -5.4729e-05,  2.3048e-07,  1.0406e-04,  9.1026e-05, -8.5347e-06,
         2.6229e-05,  6.3828e-05, -7.0156e-05,  6.9649e-06, -2.7154e-05,
        -1.5457e-05, -7.4770e-05, -1.2236e-05, -1.0705e-04,  1.5893e-06,
         5.1667e-06,  1.7286e-04, -7.6680e-05,  1.3014e-04, -1.9041e-05,
        -1.3135e-05, -1.6333e-04,  2.1974e-05,  7.7821e-06,  3.2127e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7914e-05,  4.5599e-02,  3.5376e-05, -1.0417e-05, -4.9266e-06,
        -3.1476e-06,  2.4212e-05,  7.3663e-06, -4.6774e-05, -1.2088e-05,
         4.7505e-05,  1.1564e-05,  1.7218e-05,  2.4055e-05,  7.7128e-07,
         1.5531e-05,  8.2839e-06,  2.0358e-05, -4.9432e-06, -1.4447e-05,
         8.5141e-06,  5.1971e-06,  5.7680e-06,  1.2008e-06,  2.2073e-05,
        -2.3087e-06,  1.0762e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 8.2147e-05,  1.6700e-01,  2.1120e-05, -4.0464e-05,  3.4241e-06,
         7.1063e-05,  6.7620e-05,  3.5168e-05, -5.6926e-05, -6.1500e-05,
         4.1740e-06, -1.3240e-04,  4.9839e-05,  3.5601e-05,  1.6531e-05,
         4.4138e-05, -2.3604e-05, -1.4269e-05, -9.6213e-05, -7.9753e-05,
        -5.0965e-05,  2.6973e-05, -1.7604e-05,  6.2464e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9213e-05, -8.7961e-02,  1.0834e-05,  6.4035e-06, -3.0844e-05,
         4.9299e-06,  3.3686e-05,  7.6663e-06, -1.3677e-05, -5.1275e-05,
        -4.5531e-05,  2.2980e-05,  1.2846e-05, -4.1548e-06,  1.3430e-05,
         2.8890e-05, -2.2265e-05, -1.3471e-05, -3.7473e-05,  2.6283e-05,
        -1.9936e-05,  3.8523e-05,  2.0701e-05,  6.4264e-06, -2.7372e-05,
        -4.1593e-05,  8.4544e-06,  1.4220e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1351e-04,  1.2296e-01, -7.7972e-05, -5.4802e-05,  2.4055e-05,
         1.4911e-04,  1.0057e-04, -4.3066e-05,  1.0580e-04,  8.1781e-05,
         1.0234e-04,  1.1742e-04, -9.1694e-05,  9.2780e-05,  3.1356e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0329e-04,  9.7368e-02, -5.5422e-05, -3.8975e-05, -2.8194e-05,
        -1.7385e-05,  1.2343e-05, -1.5397e-05, -7.0272e-05, -3.2338e-05,
        -4.7022e-05, -4.0429e-05, -1.4554e-05, -4.1033e-05,  3.2646e-05,
         2.2011e-05, -1.0980e-05, -4.3967e-05, -5.0820e-05, -8.5805e-05,
        -2.5396e-05,  5.7656e-07, -4.0707e-05, -4.0988e-05, -5.0464e-06,
         6.0018e-06,  2.8010e-06, -7.3579e-06, -1.7559e-07,  4.0068e-06,
         7.0597e-06, -5.3646e-05,  3.6015e-06, -3.0308e-05, -2.5708e-05,
        -4.9811e-05,  3.8852e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.7555e-06, -1.6538e-01, -7.1355e-05, -1.9704e-05, -3.2178e-05,
        -1.7326e-04, -9.8238e-05,  3.9774e-06,  2.6089e-06, -1.0675e-04,
        -7.4617e-05,  7.9888e-05,  4.1416e-07,  1.0669e-05,  6.4275e-05,
         8.0573e-06, -4.5098e-05,  5.8255e-05, -2.2081e-05, -1.5003e-06,
         6.2598e-06,  6.3298e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1662e-04,  1.2658e-01, -3.2378e-05,  8.7015e-05,  3.5003e-05,
         9.6999e-05,  2.5447e-05,  2.6576e-07, -6.4202e-05,  1.4487e-04,
        -5.2946e-07, -1.3160e-05,  1.8071e-06, -2.8887e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4846e-04,  1.0399e-01,  1.1271e-04,  1.0696e-04, -1.0745e-04,
         5.9356e-05, -4.2155e-05,  7.3203e-05,  1.3410e-04,  1.3566e-04,
        -4.8900e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1699e-04,  1.9062e-01, -2.0505e-04,  5.0427e-05, -3.1305e-05,
         2.4205e-05,  2.6240e-04, -3.0008e-05,  3.1379e-05, -2.4640e-04,
        -6.7225e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.1636e-05,  1.8239e-01,  7.0019e-05,  3.2852e-04,  1.0851e-04,
         5.4263e-05,  2.4957e-05,  8.4756e-05,  1.6078e-04,  1.6154e-04,
         5.5904e-05,  7.8724e-05,  9.5388e-05,  1.1975e-04, -5.1482e-05,
         1.2005e-05,  6.8538e-06, -1.1457e-05,  8.0573e-05,  4.5314e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5793e-05, -1.2735e-01, -6.7210e-05,  6.7351e-06,  8.6407e-06,
        -6.8557e-05,  1.5417e-05, -1.0649e-05, -2.8371e-05,  1.3933e-05,
         5.1729e-05,  2.9736e-05,  3.9660e-05,  3.8005e-05, -4.7456e-05,
         1.5634e-05,  5.7364e-05, -1.8422e-05,  6.9271e-05,  3.3076e-05,
         6.5298e-05, -8.8394e-05, -8.9634e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.0479e-05, -7.5205e-02,  2.3869e-05, -4.9488e-05,  9.4967e-07,
         7.2529e-06, -7.4293e-06,  1.5993e-05,  1.1400e-06, -2.3809e-05,
        -2.9557e-05, -2.1170e-05, -3.3793e-05, -2.7909e-05, -2.7540e-05,
        -3.0771e-06,  1.7639e-05, -2.5306e-05, -3.8853e-05,  1.9802e-05,
         2.7938e-05,  1.0511e-05, -2.7621e-05,  2.5476e-05, -2.4551e-05,
        -5.2581e-05, -5.0591e-06, -2.0893e-05, -1.1076e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.9343e-07, -1.1601e-01,  1.3821e-05, -3.7014e-05,  1.0339e-05,
        -3.1571e-05,  1.3245e-05,  1.6000e-05, -3.6952e-05,  4.2313e-05,
        -5.3286e-05,  1.2636e-06,  1.0844e-04, -1.8782e-05, -2.7226e-06,
        -2.2557e-05, -2.9316e-05, -8.5020e-06,  9.8731e-06,  1.4542e-05,
        -1.7007e-05, -3.5352e-05, -5.0150e-05,  4.5664e-05,  1.5132e-05,
         1.5522e-05,  2.0148e-05, -1.9744e-05, -1.8551e-05,  1.7211e-05,
         3.2661e-05, -5.9218e-05, -7.1738e-06, -1.1090e-05,  9.2156e-06,
         1.0250e-04, -4.0543e-05,  3.2922e-08], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 9.7179e-05,  1.2279e-01, -1.5540e-05,  6.8633e-07,  1.0808e-05,
         3.4276e-05,  7.9886e-05,  3.2343e-05,  1.1794e-05, -2.5917e-06,
        -1.0160e-04, -4.8823e-06,  2.4514e-05, -1.4330e-05, -3.2561e-05,
        -5.3463e-05,  6.4664e-05, -2.7438e-05,  2.4210e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6405e-06,  9.2980e-02, -6.7916e-06,  5.9852e-05,  2.8382e-05,
         5.3194e-05, -5.2297e-05, -6.9701e-05, -1.3286e-05, -3.1597e-05,
        -3.1572e-05,  2.5427e-05, -6.4160e-05, -1.4207e-05,  1.7611e-06,
        -6.9842e-05, -1.7842e-05,  3.0712e-05, -5.8928e-05,  1.5234e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1596e-05,  1.7007e-01,  1.4898e-04, -6.1979e-05,  5.8406e-05,
        -3.5546e-06, -6.7045e-05, -8.6678e-05, -1.2515e-04, -4.2873e-05,
        -1.1919e-04,  3.4198e-05, -2.1841e-04, -1.3374e-04, -2.9294e-05,
        -1.2514e-04,  1.3931e-04, -6.5610e-05,  3.0545e-05, -1.0764e-04,
        -2.0773e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0645e-04, -1.6534e-01, -1.2662e-04, -1.1648e-04, -1.6263e-05,
        -3.9397e-05, -6.8209e-05,  2.1609e-05, -1.3789e-04, -8.2688e-05,
         2.4553e-05, -8.0477e-05,  8.7728e-05, -1.6206e-04, -5.0073e-05,
         5.3021e-05,  9.9163e-05, -1.9733e-05, -1.1270e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4516e-04, -1.1240e-01,  5.2443e-05,  4.4584e-06,  1.6002e-05,
        -3.4836e-05,  2.8992e-05,  8.3336e-05,  3.5968e-05,  7.0499e-07,
         5.1029e-05,  2.5053e-05,  1.7838e-05,  2.0346e-05,  3.5303e-05,
         8.3636e-05,  4.8157e-05,  1.2583e-05, -2.7256e-05,  3.5724e-05,
         2.5951e-05,  3.4294e-05, -4.9693e-05, -1.4336e-05,  2.9930e-05,
        -8.7483e-06, -2.3333e-05, -7.1314e-06,  2.1968e-05,  1.0653e-05,
         2.1960e-05, -2.3833e-05, -4.7579e-05, -1.0846e-05, -2.7025e-05,
         5.0182e-05,  2.7825e-05,  4.2467e-05,  1.4430e-05,  2.2954e-05,
        -1.6005e-05,  3.5634e-05, -3.8124e-05, -8.5500e-05,  6.7624e-05,
         3.6730e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4689e-05,  1.8838e-01,  5.0601e-05, -1.2079e-05,  3.4554e-05,
        -4.1684e-05,  5.3783e-05,  2.6742e-08, -1.5484e-06,  2.2417e-05,
         3.3097e-05, -3.0846e-06,  5.1042e-05, -3.6993e-05, -1.8016e-05,
         5.5523e-05,  5.2683e-05,  1.0332e-04, -7.0917e-05,  5.1831e-05,
         3.2238e-05,  7.3270e-05,  4.0186e-05,  9.1025e-05,  3.9683e-05,
         9.2769e-05,  4.2325e-05, -4.1418e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4523e-04, -1.8207e-01,  2.3581e-05,  2.3132e-06, -4.2344e-05,
        -2.3619e-05, -7.1108e-06,  1.1672e-05, -4.3906e-05, -1.4416e-04,
        -1.0859e-05, -9.6219e-07, -6.5704e-05,  5.1922e-05,  1.2805e-05,
        -2.7256e-06, -2.9604e-05, -2.4818e-05, -4.0350e-05,  4.2668e-05,
        -2.6672e-05,  6.2004e-06,  4.9951e-05,  3.7582e-05, -7.8592e-06,
         2.4917e-05,  3.5277e-05,  8.1766e-05, -3.6966e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1677e-04,  8.9250e-02,  1.7667e-05,  1.5765e-05, -5.5506e-05,
        -3.1765e-06, -3.0004e-05,  5.4406e-05,  3.1623e-05, -1.2970e-05,
         4.3063e-06,  7.5678e-06, -5.9942e-05,  4.2617e-05,  5.4516e-05,
         3.4566e-05, -1.2091e-04, -7.2606e-06, -3.1240e-05, -3.5519e-05,
        -8.0488e-06, -2.0482e-05,  1.1002e-05,  2.4531e-05,  9.1257e-05,
        -1.0481e-05, -5.6117e-05, -2.1856e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2299e-04, -8.1694e-02, -1.4126e-05, -8.1105e-06, -2.0560e-05,
        -4.5230e-05,  4.3665e-06, -4.8932e-06, -9.9967e-06,  2.6212e-05,
        -9.9889e-07, -3.9086e-05,  1.9111e-05, -1.4149e-05, -6.5043e-06,
        -1.2281e-05, -1.5098e-05,  3.3172e-06,  1.3048e-05,  8.4839e-06,
         1.0390e-05, -3.8057e-06, -4.7739e-06, -4.6888e-06, -3.4152e-06,
         2.6769e-05, -3.0420e-05, -1.0682e-06,  2.3320e-05,  2.6782e-05,
         1.9410e-05, -3.1847e-06, -3.7313e-06,  1.9504e-05, -2.4461e-05,
        -2.8506e-05,  2.5693e-05,  2.5590e-05,  6.6045e-06,  3.7554e-05,
        -7.7431e-06, -1.8207e-05, -1.1763e-05,  4.7531e-05, -9.8561e-06,
        -2.5163e-05, -6.5927e-07], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4410e-04, -1.3660e-01,  2.6119e-05, -6.9226e-06,  6.1424e-06,
         1.6258e-05, -4.2504e-05, -4.3729e-07,  4.9778e-05, -4.1974e-06,
        -1.1066e-05, -1.6977e-06, -1.4181e-05, -1.9056e-05, -6.7837e-05,
        -1.9233e-05,  3.5918e-05, -1.0475e-05, -1.6338e-05, -9.7712e-06,
         1.3354e-05, -2.0166e-05,  6.5890e-07,  1.1770e-05,  7.0944e-06,
        -5.8689e-06, -2.2212e-05, -2.6704e-05,  2.2871e-05,  1.4700e-05,
         2.9437e-06,  4.4241e-05,  1.6945e-05, -1.6215e-05,  2.2334e-05,
         6.0102e-05,  3.4958e-05,  5.7057e-05,  5.3154e-06,  6.9765e-06,
        -1.2158e-05, -1.1863e-05, -2.6948e-06,  4.3423e-05, -1.8875e-05,
         6.3882e-06,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.3170e-05, -1.0305e-01,  3.6258e-05,  5.1486e-05, -6.1510e-05,
        -1.6268e-06, -3.2078e-05, -5.8348e-05,  1.3030e-06, -4.5041e-05,
        -5.7306e-05,  8.5102e-06,  1.0674e-05,  1.8239e-05,  4.2552e-05,
         2.8179e-05, -2.9518e-05, -3.5180e-05, -9.3105e-05, -2.8663e-06,
        -3.0953e-05,  6.2647e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2652e-04, -1.5684e-01, -9.4680e-06,  8.0721e-05,  7.0210e-05,
        -5.9450e-05, -1.4297e-04, -8.6410e-06, -1.4934e-05, -1.8008e-05,
        -1.2011e-05, -7.6136e-06,  7.4930e-05,  2.3843e-05, -2.1192e-05,
         1.3596e-04,  9.8380e-05, -3.5134e-06,  3.4051e-05, -6.8481e-05,
         5.2692e-06,  2.1212e-05,  1.0798e-04, -5.0053e-05, -6.4031e-05,
        -1.0577e-04,  1.4816e-05, -5.5304e-05, -4.8283e-05,  7.6666e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 2.6505e-04, -1.1673e-01, -8.1465e-05, -8.6997e-05,  2.4228e-05,
        -2.4990e-06, -6.2446e-05,  1.0601e-04,  7.3882e-05,  6.6351e-05,
         8.8578e-06, -2.2174e-04, -7.8544e-05,  5.7405e-05, -6.5240e-05,
         8.2216e-05, -2.4665e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4392e-04, -2.2022e-01,  1.1067e-04, -8.1948e-05, -1.0192e-04,
         4.3728e-05, -9.8792e-05, -1.1657e-05,  1.0834e-04, -1.0239e-05,
         1.8041e-04,  1.8001e-05,  1.4485e-04,  1.7159e-04, -2.4553e-05,
         2.2500e-04, -1.4345e-04,  8.8322e-05,  9.7277e-05,  9.7296e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9459e-04, -1.8511e-01,  4.0052e-05,  9.7393e-05, -1.2596e-04,
         3.1637e-05,  2.0186e-04, -4.7847e-05,  2.3180e-04,  2.3505e-04,
        -1.2294e-04,  6.8215e-05,  2.8462e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9085e-04, -1.6243e-01,  1.6774e-05, -3.0763e-05,  1.0178e-04,
         1.8556e-05, -1.6435e-05, -6.6960e-05,  1.1366e-05,  1.2234e-04,
        -8.3811e-05,  9.0717e-05,  5.9043e-06,  3.2445e-05,  1.3168e-04,
         3.5502e-05,  3.0683e-05,  3.2024e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9454e-04, -1.7291e-01, -1.3307e-05,  8.2748e-05,  5.9338e-05,
         8.7357e-05,  6.6293e-05, -2.5042e-05, -1.3107e-05, -8.2189e-05,
         2.1356e-05,  8.4304e-06, -3.2495e-05,  1.3049e-04,  1.1581e-05,
        -7.6530e-05,  9.8219e-05,  8.0562e-05,  2.8072e-05, -1.3614e-05,
         4.5214e-05, -3.0350e-05, -3.8067e-05,  3.3163e-05,  1.1821e-04,
        -5.0776e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2085e-04,  1.4328e-01, -1.3564e-04,  1.1846e-04,  1.7773e-04,
         3.3800e-04,  2.9571e-04,  1.3738e-04,  6.8034e-05, -2.3719e-05,
        -1.0514e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5613e-04,  6.8429e-02, -1.0160e-04, -2.5576e-06, -1.1735e-04,
         7.0190e-05, -2.3740e-05,  1.0616e-05,  1.8491e-05,  5.6220e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5132e-05, -1.1057e-01, -2.4721e-05, -1.0183e-05,  4.2334e-05,
         1.5391e-06, -5.3762e-07,  1.6386e-05,  8.7682e-06,  1.6998e-05,
         1.0667e-06,  8.5709e-06,  5.0111e-05, -4.1097e-05, -3.8453e-06,
         5.4947e-05, -1.2705e-05,  1.9446e-05,  4.4087e-05, -6.5148e-06,
         1.0902e-05,  1.5632e-05,  4.5460e-08, -1.7837e-05, -1.4476e-05,
        -1.2445e-05,  3.2380e-05,  2.4185e-05,  2.5245e-05, -1.7081e-05,
         9.6082e-06,  3.0945e-05,  7.8507e-06,  2.0008e-05, -4.2158e-06,
         1.5680e-05,  1.4529e-05,  7.0890e-06,  1.8291e-05,  1.0819e-05,
         7.9564e-06,  2.2599e-05, -1.0780e-05, -4.9735e-05,  1.1674e-05,
         2.8143e-05,  3.5849e-05,  9.6896e-07,  6.1568e-06, -2.9434e-06,
         4.4454e-06,  2.2114e-06, -2.6671e-05,  4.5081e-05,  2.5433e-05,
        -2.2019e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8783e-04,  1.5105e-01, -1.0754e-04, -8.9763e-06, -3.9773e-06,
         1.8271e-05,  9.2889e-05, -5.6897e-05, -5.3541e-05, -2.1027e-05,
         4.3973e-05, -3.4904e-05,  5.3715e-05,  9.7050e-05,  2.6844e-05,
         3.8305e-05,  4.6322e-05, -1.8822e-06,  1.0547e-04, -2.8096e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6517e-05, -1.3688e-01,  8.6242e-06,  6.1390e-07,  3.3162e-07,
        -4.0295e-05, -2.0063e-05, -6.9570e-05,  5.9105e-05,  5.2124e-05,
         6.9210e-05,  6.8958e-05,  1.1206e-04,  5.9026e-05,  5.9422e-05,
        -2.0968e-05, -6.5860e-05,  1.4988e-05,  2.0625e-05,  1.3378e-05,
         2.7121e-05,  4.3265e-06,  7.5068e-05, -3.4729e-05, -5.4834e-05,
         7.3777e-05,  5.0727e-05,  4.9046e-06,  2.3474e-05,  4.1958e-05,
         6.2099e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([2.9878e-05, 8.5921e-02, 1.7101e-05, 1.4114e-05, 4.6892e-05, 5.9923e-05,
        7.9683e-06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4563e-04,  4.5807e-02, -2.0278e-05, -3.4910e-05, -1.4636e-04,
        -3.9537e-05, -3.8162e-05, -9.6616e-05, -2.1702e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 1.7191e-04,  1.9741e-01,  7.2664e-05, -8.3628e-05, -5.0655e-05,
         3.5378e-05,  9.9633e-06, -3.6585e-05, -5.7638e-05,  3.4596e-05,
        -8.0581e-07,  4.3325e-05, -4.7503e-05,  2.8032e-05, -9.4904e-06,
         1.0528e-04,  1.0572e-04,  2.1919e-05,  3.4953e-05, -1.0686e-04,
        -1.1026e-04, -8.6302e-05,  2.0265e-06,  6.0331e-05, -4.6587e-05,
        -1.6576e-05,  9.1373e-05,  3.9581e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2741e-04,  1.6018e-01,  1.4341e-04,  7.8696e-05,  8.1158e-05,
        -3.7253e-05, -4.5013e-05,  7.5004e-07, -1.4382e-05, -6.6768e-05,
         4.6892e-05,  1.1330e-05, -4.3423e-06,  1.1256e-06,  2.3413e-05,
        -1.6669e-05, -2.4665e-05,  2.5082e-05, -6.5914e-05, -1.6159e-05,
        -3.6145e-05, -1.5917e-05, -3.2431e-05, -8.2252e-05,  2.2953e-05,
        -8.3471e-05,  2.4235e-05,  5.3793e-05,  3.6098e-05, -2.1932e-05,
        -2.4115e-05, -7.0203e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5405e-05, -1.6291e-01, -8.0773e-05,  5.7264e-05,  6.1335e-05,
         3.4423e-05,  6.4530e-06,  6.8572e-05, -1.4494e-05,  4.7760e-05,
         2.3140e-05,  2.6398e-05,  2.5554e-05,  1.2147e-04,  1.2679e-04,
         9.7659e-05,  9.2799e-05, -4.2816e-05,  3.7658e-05, -1.7233e-05,
         5.9428e-05, -5.4012e-05,  4.3079e-05,  8.0241e-06, -3.0534e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2320e-05,  1.5723e-01, -3.5366e-04, -1.2415e-04, -1.0178e-04,
         1.1534e-04, -2.0561e-04,  4.2574e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5769e-04,  7.0897e-02,  1.2295e-04, -5.5619e-05,  1.4309e-04,
         1.8825e-05,  1.8122e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4261e-04, -2.0553e-01, -7.2335e-05,  3.8713e-05, -6.3553e-05,
         5.1297e-05,  8.7176e-05,  2.7540e-05, -1.3665e-04,  1.0446e-04,
        -6.8881e-05, -7.3467e-05,  2.2226e-05,  1.5567e-05,  8.4378e-05,
         5.1002e-05, -3.7650e-05,  8.9866e-06,  3.6753e-05, -1.3038e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9685e-04,  1.6162e-01, -1.0913e-04,  1.5076e-04,  5.8408e-05,
        -1.1950e-04,  5.2516e-06,  1.0318e-04, -4.9764e-06,  9.0466e-05,
        -1.3193e-05, -1.4656e-04,  9.6004e-05,  3.3294e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4277e-04,  5.3887e-02,  1.3813e-05,  1.2245e-06, -5.3425e-05,
         8.6136e-05,  5.2015e-06, -1.0984e-04, -2.6191e-05,  4.0934e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5076e-04, -1.3430e-01, -1.2480e-05,  1.4700e-05, -1.2953e-05,
         1.5422e-05,  3.6855e-05,  2.8467e-06, -7.6494e-05, -5.4403e-05,
        -3.7350e-05,  2.2550e-05,  8.6405e-06,  8.6135e-06, -5.1924e-05,
        -4.0179e-05,  3.6516e-05, -4.5528e-05, -5.8501e-05,  9.8082e-05,
        -4.8507e-05,  2.8415e-05,  6.6070e-05,  1.3260e-04,  4.3448e-06,
        -5.8804e-06,  3.2218e-05, -1.3168e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9053e-05,  1.3212e-01,  4.6209e-06, -1.8682e-05, -2.2708e-05,
        -8.3710e-05,  1.4835e-04, -9.2795e-06,  1.1651e-04, -4.3990e-07,
         3.6948e-05,  4.6725e-05,  3.4291e-05, -5.0958e-05,  9.5525e-05,
        -8.4222e-05, -4.2765e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6688e-05, -1.4916e-01, -4.4374e-05, -1.1210e-04,  8.4416e-05,
        -1.1062e-04,  5.9687e-05,  3.5044e-06,  1.0376e-04, -1.1311e-04,
         7.4622e-05, -1.1672e-05,  3.7509e-06, -4.5895e-05, -6.4822e-05,
         1.7575e-04,  5.1460e-05,  1.2177e-05,  2.7576e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1613e-04, -1.7797e-01,  6.1270e-05,  3.7633e-05,  1.3496e-04,
        -1.1383e-04, -1.0307e-04, -1.0924e-04,  3.5968e-05,  8.1318e-05,
         3.4675e-05, -8.9616e-06,  1.4628e-04, -5.7742e-05,  4.6870e-05,
         3.0506e-05, -3.7581e-05,  1.3858e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 6.7272e-06, -1.6281e-01, -1.7557e-05,  4.8377e-05, -3.7235e-05,
        -1.3467e-05, -1.1397e-04, -6.4559e-06, -4.0355e-05,  2.8934e-05,
        -6.7516e-06,  5.8271e-05,  2.0922e-05,  4.5235e-05, -2.5172e-05,
         1.4421e-07, -3.4262e-05,  3.7458e-05, -3.1432e-05,  3.8915e-05,
         3.6833e-05,  1.9781e-06, -3.0945e-06, -2.8774e-05,  8.6124e-06,
         1.0964e-05,  9.1911e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1853e-04, -8.5684e-02, -1.7030e-05, -9.8321e-06,  5.8013e-05,
        -3.6182e-05, -2.2787e-05, -4.5893e-05,  3.4951e-06,  1.9168e-05,
         4.1840e-06, -7.2516e-06, -1.8151e-05,  3.7861e-05,  4.0816e-05,
         7.0266e-06,  4.5884e-06, -2.6612e-06, -5.3104e-06, -2.3542e-06,
        -1.8372e-05, -1.3960e-05,  2.1574e-05,  3.1895e-05, -1.3840e-05,
        -6.1836e-07,  4.0941e-06,  2.7799e-05,  1.9966e-05, -1.0617e-06,
         1.8867e-05, -5.5144e-06, -4.1327e-05,  3.8707e-05, -1.7306e-05,
        -2.7329e-06,  1.5820e-05,  1.3729e-05,  1.2868e-05, -4.0976e-05,
         9.9648e-06, -4.9840e-05,  1.5004e-05, -3.4187e-05, -1.9838e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7048e-05,  8.3531e-02, -5.5050e-05,  2.1988e-05,  1.5236e-05,
         4.1643e-05, -5.4928e-05, -6.4125e-06,  5.2545e-05, -4.1726e-05,
         7.3039e-06, -3.3867e-05,  1.3391e-05, -4.8526e-07, -3.8892e-05,
         9.8859e-06, -2.9322e-05, -2.6283e-06,  4.6699e-05,  6.5996e-06,
         1.5718e-05,  4.4249e-06, -9.4930e-06, -5.7140e-05, -3.5693e-05,
         4.3333e-06, -2.4349e-05, -2.5990e-05, -2.7489e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0132e-04,  1.9711e-01,  1.0028e-04, -8.3134e-05,  1.4681e-04,
        -3.1306e-05, -2.4694e-04,  1.8812e-04,  1.2518e-04, -1.0117e-04,
        -1.1001e-05,  6.4196e-05,  1.1513e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2412e-04, -1.4126e-01,  8.7378e-07,  3.8433e-05,  1.2233e-05,
         2.8094e-05,  2.3399e-05,  4.2951e-05,  3.3899e-05, -1.2106e-06,
         5.3873e-05, -1.1227e-06, -2.7735e-05,  1.8090e-05, -5.8665e-06,
        -5.3872e-06,  5.9587e-05, -4.8006e-05,  1.3385e-05,  2.4636e-06,
         1.0782e-05, -3.4095e-05, -2.7844e-05, -3.5959e-05, -4.8316e-05,
        -1.4483e-05, -1.9288e-05, -6.9471e-06, -1.4115e-05,  2.4377e-06,
        -1.0190e-05, -5.3033e-05,  2.5019e-05,  3.1789e-05, -9.1219e-06,
         7.7539e-05, -5.0083e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5809e-04, -1.2565e-01,  3.9441e-05,  3.0257e-06, -7.4848e-06,
        -2.9287e-06,  1.4543e-05, -1.9804e-05, -3.7000e-05,  8.5613e-05,
         2.2396e-05,  4.3214e-05,  1.3697e-05, -2.6409e-05,  3.2392e-05,
         8.6257e-05, -5.5435e-06,  2.3895e-05,  1.8893e-05,  3.5993e-05,
        -1.7477e-05,  4.5851e-05,  5.7410e-05,  2.2461e-05,  9.6219e-06,
         1.8918e-06, -2.8047e-05, -2.7227e-05, -1.9485e-06, -3.6270e-06,
         2.7036e-05,  3.8464e-05, -7.8957e-05, -6.4013e-05, -2.8754e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4465e-05,  4.1536e-02, -9.7428e-06, -3.3648e-05,  1.2184e-05,
         5.8455e-05, -2.3247e-06,  2.9840e-05,  3.4259e-05, -5.9285e-05,
         1.5194e-05, -6.7993e-05,  2.1806e-05, -1.6421e-05, -4.9316e-05,
         9.7783e-06, -3.5440e-06,  3.1275e-05, -1.2715e-05, -5.2819e-05,
        -2.7061e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3693e-05, -1.5705e-01, -2.2741e-07,  6.1850e-05, -4.5434e-05,
        -2.8556e-05,  1.7895e-05, -1.6670e-05,  2.9925e-05,  2.9944e-05,
        -3.0524e-05,  2.9467e-05, -2.1369e-05, -3.0885e-05,  1.1067e-05,
        -2.4361e-05, -9.5695e-05,  1.6689e-05, -3.1123e-05,  6.9923e-05,
         4.6236e-05,  5.5169e-05, -4.7560e-05,  1.5521e-05,  6.8060e-05,
        -1.8712e-05,  2.7327e-05, -4.0308e-05,  2.3297e-05, -4.7265e-05,
         2.5365e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0857e-04, -1.2191e-01, -1.1561e-05, -4.4704e-06, -1.4537e-05,
         1.2020e-05, -1.5196e-05, -2.1010e-06, -3.6485e-05,  5.1525e-05,
        -1.8468e-05, -1.6922e-05, -1.6976e-05,  2.2160e-05, -7.9953e-08,
        -2.4649e-05, -3.7342e-05, -1.7414e-05, -5.4994e-06,  3.6815e-05,
        -2.9844e-05, -3.1985e-05, -4.5337e-06,  1.4789e-05, -4.2535e-05,
        -1.8107e-06, -2.6763e-06, -6.6881e-06,  1.0172e-05,  2.1968e-06,
        -1.3905e-06,  7.0012e-06, -1.4397e-05, -3.8272e-05,  8.8593e-07,
        -1.4889e-05,  5.0111e-05,  6.3787e-06, -3.0662e-06, -3.3015e-05,
        -2.2989e-05, -3.1679e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.4761e-05,  6.8164e-02,  1.0481e-04, -4.6158e-06, -6.1899e-05,
        -1.0237e-04, -6.6203e-05, -6.3814e-05, -9.9877e-05,  2.1659e-05,
         5.1455e-06,  4.9137e-05, -2.1941e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7641e-04, -4.1233e-02,  6.0765e-06,  1.7079e-05, -7.3054e-06,
         3.7057e-06,  2.3235e-06, -2.2955e-05, -9.7967e-06, -1.9234e-06,
        -4.7649e-06, -5.4287e-06,  5.6051e-06, -5.3079e-06, -1.8580e-05,
         9.4559e-06,  1.3708e-05, -5.7034e-08,  8.2961e-06, -9.4632e-06,
        -2.3868e-05,  1.6763e-05, -2.1872e-05,  2.0872e-05,  1.5223e-05,
        -7.6437e-06,  5.0335e-06, -2.2350e-05,  3.4377e-06, -1.7400e-05,
         6.5033e-06,  3.4613e-06, -2.1467e-06, -3.2532e-06, -1.1243e-05,
        -1.0751e-06, -1.1764e-05,  2.6847e-06,  4.9352e-06,  6.8491e-06,
        -2.8096e-06, -8.4022e-07,  3.4500e-05, -1.7417e-05,  7.3731e-06,
        -1.4755e-06, -1.4248e-06, -1.3114e-05, -1.1614e-05,  1.2252e-06,
        -2.1838e-06, -5.0821e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.2720e-05,  9.5642e-02,  8.4123e-05,  9.9463e-05, -2.2794e-06,
         1.7711e-04,  9.1098e-05, -1.0206e-04,  5.9555e-06,  1.2327e-04,
         1.2997e-04,  2.3949e-05, -6.7819e-05, -4.2717e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([-4.3127e-05, -1.6784e-01, -8.2517e-06, -2.7451e-05, -4.4151e-05,
        -2.2392e-05, -1.7732e-05, -5.4811e-05,  5.1911e-05,  3.5523e-05,
        -3.7710e-05, -2.5406e-05,  1.2281e-05, -6.1103e-05,  3.7690e-05,
         1.4363e-05,  8.7658e-06,  1.4293e-05, -1.3427e-05, -3.6160e-05,
        -5.0143e-06, -3.5874e-05, -4.7683e-05,  2.0830e-05, -1.3906e-05,
         2.2517e-05,  5.0136e-05, -2.4811e-05, -2.0686e-05, -3.8786e-06,
        -1.6008e-06, -2.8054e-05,  3.6718e-05, -5.8830e-05, -4.0890e-05,
        -5.6740e-06,  1.3071e-05, -3.5862e-05, -3.2032e-05, -5.7637e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7093e-04, -1.3179e-01, -3.4566e-05, -5.1868e-05, -6.2012e-05,
        -1.0829e-05, -1.9763e-05, -6.1877e-06, -2.6194e-05,  2.3158e-05,
         9.8497e-06, -7.7250e-06,  7.8003e-06, -2.1673e-05, -2.4200e-05,
         3.5408e-05, -7.1258e-06, -1.8835e-05, -4.2723e-06,  2.6731e-05,
         3.6444e-06, -1.5809e-05,  1.4035e-05, -3.6287e-05, -5.6295e-05,
        -1.4772e-06,  1.1309e-05, -8.0815e-06, -1.8865e-05, -3.4382e-05,
        -3.4473e-06, -8.5488e-06, -5.8024e-06, -2.2004e-05,  6.1351e-06,
        -5.4182e-05,  1.9740e-07, -2.7055e-05,  2.7237e-05,  3.1299e-05,
        -6.3938e-06, -1.4415e-05,  3.7033e-06, -1.7145e-05,  3.2561e-05,
        -2.8753e-05, -5.4743e-06, -5.1651e-05, -1.7111e-05,  1.6602e-05,
        -3.6584e-06, -3.6200e-05, -1.5176e-05, -1.7285e-05, -9.7748e-06,
        -1.4044e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6915e-04, -1.8856e-01, -1.2652e-05,  6.0382e-05,  1.4876e-05,
         3.7947e-05, -5.1773e-05, -2.1650e-05, -5.4853e-06,  1.9587e-05,
         1.3685e-05,  6.3112e-05,  5.4604e-05,  6.4416e-05, -1.6558e-05,
        -3.1325e-05,  5.5807e-05,  1.6801e-05, -8.7692e-06, -1.2462e-05,
         7.2135e-05,  1.9866e-05, -2.2229e-05, -2.8265e-05,  2.2906e-05,
         3.7957e-05, -6.2935e-06,  2.5001e-05,  1.1488e-05,  4.7017e-05,
         7.3927e-05,  9.3930e-05,  7.0778e-05,  5.6251e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0247e-05,  1.4868e-01, -6.2684e-05,  8.0807e-05,  1.5719e-04,
         2.0511e-06,  1.6640e-04,  1.1887e-04, -3.8012e-05, -9.4528e-05,
         9.0237e-05,  6.2264e-05, -1.4762e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0873e-04,  1.2229e-01,  2.5401e-05,  4.5939e-05,  1.9764e-05,
        -2.3483e-05,  1.9366e-05, -1.4829e-06, -3.5307e-06, -1.0119e-05,
        -1.8403e-05,  1.1368e-06,  9.9409e-06,  2.8320e-05,  9.3321e-06,
         9.3439e-05, -3.8206e-05,  9.6967e-05, -3.7945e-05,  4.0878e-05,
         7.2811e-05, -3.8306e-05,  1.0109e-04, -4.1518e-05,  1.9800e-05,
        -6.5953e-05,  7.7484e-05, -5.6934e-06,  3.1615e-05, -3.5996e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9698e-07,  9.8412e-02,  6.9613e-05, -2.4786e-05,  2.4884e-05,
        -7.0132e-05, -4.6975e-05, -5.3479e-06,  2.3990e-05,  4.9777e-06,
        -3.2567e-05,  6.8672e-06, -1.0520e-05,  1.1164e-05,  2.2133e-05,
        -3.8250e-05, -5.4311e-06,  3.3945e-06,  5.2054e-05,  1.2211e-05,
        -2.0394e-05,  3.4111e-05, -1.0879e-05, -1.3799e-05, -5.9464e-05,
        -4.7869e-05,  7.1970e-05,  5.9120e-05, -4.6082e-05,  3.9550e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.2334e-05,  9.3167e-02,  4.1424e-05, -6.4448e-05,  5.0986e-05,
        -1.5141e-05,  4.1597e-05,  3.0194e-05, -3.0481e-07, -6.6229e-06,
        -4.7496e-05, -6.2692e-05,  1.7616e-05, -2.9383e-05,  4.8734e-05,
        -8.4266e-05,  3.4606e-05,  3.5043e-05, -6.2635e-06,  2.2280e-05,
        -3.7467e-05, -8.0029e-05,  1.2173e-05,  2.9605e-05,  3.8671e-05,
         5.5619e-05, -2.8957e-05,  1.7511e-05,  1.4028e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2829e-04, -1.6121e-01, -4.5117e-05, -1.4228e-04,  2.1987e-05,
        -1.7007e-04, -8.2602e-05, -7.1362e-05, -3.9471e-05, -4.0160e-04,
         4.8256e-05, -1.2222e-04, -2.1288e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3291e-04,  2.3060e-01,  1.8178e-04, -6.9384e-05, -6.6149e-06,
        -8.5797e-06,  2.7239e-05, -9.1755e-05,  6.4695e-05,  1.3067e-04,
         1.0858e-04,  5.1745e-05,  5.2315e-05,  2.5568e-05,  9.3284e-05,
         1.2352e-05, -1.9165e-04,  3.7920e-05, -1.0745e-04, -8.7918e-05,
        -4.1222e-05, -2.9204e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3974e-05,  1.9652e-01,  8.9804e-05, -2.9406e-05, -3.2782e-05,
        -8.1158e-05, -4.0093e-05,  5.4885e-05,  2.3273e-05, -1.9584e-06,
        -5.1452e-05,  6.2859e-05,  3.2663e-05, -3.6592e-05, -3.5797e-05,
         1.1737e-05, -4.1233e-05,  5.6101e-05, -1.2511e-05,  4.4506e-06,
        -1.2667e-05,  3.8899e-05,  5.5143e-06,  1.1871e-05, -6.8715e-05,
         6.8984e-05,  3.2010e-05,  3.8628e-05,  3.0676e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.0965e-04, -1.3548e-01,  2.7562e-05,  6.1669e-06, -3.5639e-05,
        -1.6523e-05, -5.0229e-05,  3.7199e-05,  6.2894e-05, -3.0550e-05,
         7.2235e-06, -3.8354e-05, -5.3565e-05, -7.1091e-05,  2.3752e-05,
         8.5763e-06, -2.2818e-05, -7.4104e-06,  6.7912e-06,  2.0099e-05,
        -4.9659e-05, -1.4154e-05,  1.4565e-06,  3.8865e-05, -2.5304e-05,
        -4.1420e-05, -4.1054e-05, -1.4525e-05, -1.0324e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0696e-04,  7.5554e-02, -1.6821e-04,  6.5888e-05, -1.6594e-05,
        -5.7192e-06, -7.0767e-05, -7.1162e-06, -3.6950e-05, -3.4594e-05,
        -2.0095e-05, -3.1478e-05,  2.3420e-05,  2.7080e-05,  1.4282e-05,
         3.7461e-05,  7.9292e-05, -5.7967e-06,  5.8585e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([-1.2530e-04, -1.4358e-01, -2.5605e-06,  6.4932e-05, -2.0189e-05,
         2.5274e-05,  1.4205e-05, -3.7903e-05,  9.8116e-06,  4.8415e-05,
         1.2647e-05,  2.9218e-05, -8.0800e-06, -3.2624e-05, -2.7262e-05,
         2.5089e-05, -1.4955e-05, -2.8735e-05,  1.3575e-05,  7.3554e-06,
         2.6277e-05,  1.7775e-06, -1.8411e-05, -2.1330e-05, -9.3238e-06,
        -2.7118e-05, -6.4060e-06, -4.2715e-05, -6.7161e-05,  1.8514e-05,
         9.7179e-06, -9.8672e-06,  2.5746e-05,  1.6748e-05,  4.3609e-06,
         1.8197e-05,  3.2401e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.4228e-06,  5.5530e-02,  8.6000e-06,  1.4016e-05,  4.8125e-05,
        -2.9723e-05,  7.2643e-06,  1.4186e-05, -1.7813e-05,  1.8937e-05,
         2.4859e-05, -1.2225e-06, -2.5620e-05, -2.5637e-05,  1.7782e-06,
        -2.8139e-06,  1.3888e-05,  3.0739e-05,  9.6539e-06,  3.4263e-05,
         2.8205e-06, -3.4584e-05,  1.9395e-05, -2.9599e-05, -1.6318e-06,
         1.6754e-05, -1.5015e-06, -1.7284e-05,  4.0780e-06,  1.9868e-05,
         3.4614e-05,  2.8009e-06,  4.4794e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8112e-05,  1.0771e-01,  5.1305e-05, -9.3525e-07, -5.7790e-05,
         4.0980e-05,  5.3990e-05,  1.0133e-05, -4.5549e-05, -8.7294e-05,
         1.1320e-05, -6.6780e-06, -2.5350e-05, -8.4105e-05,  5.0865e-05,
        -8.4271e-06,  7.1760e-05,  2.0315e-05, -7.4989e-05,  4.4150e-05,
         1.5656e-05, -5.1112e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1806e-04,  9.1826e-02,  1.2342e-05,  2.2220e-05,  7.6863e-06,
        -1.9249e-05, -1.4707e-05,  4.8709e-05, -2.5843e-05, -3.8425e-05,
        -1.2062e-05,  2.3342e-06, -6.0719e-06, -2.3532e-05, -3.4460e-05,
        -5.3323e-05,  8.9617e-06,  2.1252e-05,  3.2554e-05, -2.7987e-05,
        -4.1211e-05,  1.0547e-05,  8.9917e-06, -3.5589e-05, -1.0798e-05,
         7.9030e-06,  4.3805e-05, -6.8089e-05, -5.1684e-05,  3.2413e-06,
        -5.6680e-05,  2.5837e-05,  2.4372e-05, -4.3008e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3874e-04, -1.6540e-01, -7.9157e-05, -9.1907e-05, -4.3065e-05,
         5.6194e-05,  3.2982e-05,  7.2095e-05,  2.4330e-04,  7.7862e-05,
         6.5923e-05,  2.3694e-04,  1.2561e-04, -4.4400e-05,  2.5885e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5841e-04, -1.5213e-01, -3.9561e-05, -8.4666e-06,  3.7799e-05,
        -5.3271e-05,  5.6898e-05, -4.0569e-05,  1.3694e-04,  3.2442e-05,
         1.6097e-04, -6.9708e-05,  1.4385e-04,  1.8477e-04, -9.2058e-05,
         5.4084e-05,  1.5980e-04,  1.0781e-04,  9.8838e-05,  8.5010e-05,
         3.0885e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3539e-05, -1.3933e-01,  3.1257e-05,  4.7038e-07,  1.4949e-05,
        -2.6722e-05,  1.8394e-05,  1.0977e-05, -1.3020e-05, -1.0782e-05,
        -1.5994e-06, -2.0580e-05, -2.8774e-05,  3.5827e-05, -1.7977e-05,
        -3.9663e-05,  9.9893e-06,  1.1598e-05, -1.1127e-05,  7.2326e-05,
        -1.5741e-05, -3.7909e-05,  1.2445e-05,  1.5223e-05, -2.0665e-05,
        -8.5083e-06, -5.6143e-06, -3.3427e-05,  4.0013e-05, -9.3907e-06,
        -2.9209e-05, -1.0385e-05,  4.5562e-05,  1.8887e-05, -5.2821e-05,
        -2.9305e-06,  1.5534e-05, -1.1388e-05,  1.5942e-05,  4.6915e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1214e-04, -1.8942e-01,  1.6950e-04,  1.2708e-04, -8.1621e-05,
         3.2477e-06, -2.7348e-05,  8.5674e-05, -4.5708e-05, -3.3265e-05,
        -2.9558e-05, -1.3706e-04, -9.4520e-06, -1.3227e-05,  6.0864e-05,
        -2.8676e-05,  3.5263e-05, -3.8307e-05, -2.0209e-06, -7.2000e-05,
         1.8088e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3579e-05,  9.7695e-02,  1.0012e-04,  4.8040e-05,  4.3402e-05,
        -2.7533e-05,  1.8757e-05,  2.5125e-05,  2.1190e-05, -6.2539e-07,
        -1.0246e-04, -1.2175e-05, -5.8091e-05,  8.5766e-05,  5.8730e-05,
         1.2311e-05,  4.2209e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5150e-05,  8.9668e-02, -6.3051e-05,  2.6776e-05,  1.9276e-05,
        -8.9084e-05, -2.4466e-05, -6.2963e-05, -2.0939e-05, -9.1158e-06,
        -6.3286e-05, -5.8881e-05, -1.0097e-05, -5.5073e-05, -1.0345e-05,
        -7.9660e-05, -8.4045e-05, -1.2093e-05,  1.1944e-05,  8.0440e-05,
        -5.4039e-05, -8.2859e-05, -3.9529e-05,  6.0399e-05, -1.6365e-05,
        -1.6535e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0663e-04,  6.6028e-02,  6.1571e-05,  5.6587e-06,  2.8363e-06,
        -5.3152e-05,  1.4518e-05, -1.5632e-05, -5.6573e-05, -5.3248e-05,
        -9.4257e-06, -2.6329e-05, -6.2877e-05, -1.1392e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9710e-05,  1.3414e-01,  1.4019e-05, -4.5091e-06,  6.0449e-05,
         1.2321e-05,  1.7037e-05,  6.3198e-05,  3.8227e-06,  2.9248e-05,
        -7.1040e-05,  1.8104e-05,  5.4301e-05,  1.5877e-05, -2.4026e-05,
         4.2816e-06, -9.8046e-07, -4.9904e-05, -4.4675e-05, -5.7877e-06,
        -3.8698e-05,  2.3479e-05, -5.0771e-05, -6.1440e-05, -5.6865e-05,
         2.1315e-05,  2.8413e-05, -2.9111e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #150: [tensor([-2.2887e-04,  1.8584e-01,  2.0744e-04, -9.1094e-05,  1.2846e-04,
         8.4094e-05, -5.3842e-05, -3.1136e-04, -1.7509e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1019e-04,  1.4656e-01, -1.9671e-05, -1.0620e-05, -6.4965e-05,
         5.1022e-05,  3.7218e-05, -5.8651e-05,  3.5062e-05, -2.9017e-05,
        -8.1371e-05, -3.5975e-05,  7.3083e-05, -4.2525e-05, -7.6131e-05,
        -2.7918e-05,  2.6281e-05, -4.4119e-05,  1.7153e-05,  2.6089e-05,
         1.1433e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.0940e-05,  5.2837e-02,  5.2767e-05, -1.8700e-05,  1.3962e-05,
        -8.3715e-06,  8.8771e-06,  3.2109e-05,  3.9831e-05,  1.7073e-05,
         2.2057e-06, -2.4560e-05, -3.0606e-05, -1.6036e-05, -3.2582e-06,
        -4.0862e-05,  6.6082e-06,  3.2947e-06,  4.2396e-05,  4.0275e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8466e-05,  2.1365e-01, -1.9573e-05,  8.9785e-05, -3.5201e-05,
         7.9502e-05, -9.2785e-05,  1.9955e-05, -1.6570e-05, -3.2172e-05,
        -8.9154e-06,  1.9245e-04, -1.6726e-04, -6.1622e-05, -2.9028e-05,
         1.0081e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7745e-06, -1.6622e-01,  4.9734e-05,  1.7193e-04, -3.3852e-05,
        -6.0017e-05, -5.1501e-05,  9.2798e-05, -3.0480e-05,  7.0773e-05,
        -1.1278e-04, -1.1672e-04, -1.4814e-04,  1.4341e-04, -9.4468e-05,
        -4.5950e-05,  5.0962e-05, -6.7673e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7957e-04,  7.0072e-02,  4.3551e-05, -3.9123e-05, -4.4407e-05,
         2.8195e-05, -2.5131e-05, -3.7044e-05,  3.4843e-05, -4.3276e-05,
        -8.9517e-06,  4.2987e-05,  7.7067e-06,  8.5286e-06,  6.2758e-05,
         8.6999e-05,  4.5240e-05, -1.2447e-05, -7.0970e-06,  1.3053e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9867e-04,  1.4008e-01, -6.3379e-05,  2.3164e-05,  8.4706e-05,
        -1.5856e-05, -1.2163e-04,  7.1426e-06,  2.8361e-05, -6.5134e-05,
         1.5306e-04,  1.4090e-05,  4.6069e-05, -4.9246e-05,  1.1617e-04,
        -7.5596e-05, -1.2005e-05, -5.4494e-05,  2.8799e-05, -2.9223e-05,
        -3.3414e-05,  4.6788e-05, -3.8307e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8789e-05,  1.2176e-01, -3.7023e-06, -2.4522e-05, -5.4076e-05,
        -5.0907e-05,  7.6265e-06, -1.5941e-05,  1.3962e-05, -4.4784e-05,
        -8.1449e-06, -1.0754e-05, -1.5663e-05, -2.5701e-06, -4.4353e-05,
        -7.6447e-05, -8.9892e-06, -8.3437e-06,  1.8843e-05, -6.8067e-08,
        -1.0382e-04, -7.5150e-05, -7.4660e-05, -8.1262e-06, -7.4141e-05,
        -3.7060e-05,  2.1459e-05, -3.9115e-05, -6.2769e-05,  5.6808e-07,
         1.8809e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8093e-04, -1.2412e-01, -1.2278e-05, -6.1062e-05, -5.7822e-05,
         7.7933e-06,  5.6974e-05, -7.6066e-05, -3.4322e-06,  2.4064e-05,
        -5.3955e-05, -4.3795e-05, -5.8387e-06, -2.6388e-06,  5.9872e-06,
         5.8332e-05, -2.6469e-05, -4.2329e-05, -1.0644e-05, -7.0497e-06,
         1.2757e-05, -1.6076e-05,  1.8514e-05, -2.7302e-05, -3.0998e-05,
         3.9284e-05,  6.6102e-06, -2.3659e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.6116e-05, -4.6044e-02, -6.3744e-05, -1.2722e-05, -4.3253e-05,
         9.2845e-06, -4.1970e-05,  3.1787e-05, -2.4262e-05, -2.6208e-05,
         4.7930e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6862e-04,  2.0322e-01, -2.9562e-05,  6.3769e-05, -1.9452e-05,
         4.6983e-05, -1.2216e-04,  7.2526e-05, -9.6253e-05,  3.3840e-05,
        -8.5711e-05,  2.3924e-05, -2.2372e-06, -4.6466e-05, -1.7286e-05,
        -6.1273e-07,  1.5688e-04, -6.6396e-05,  5.1588e-05,  8.8527e-05,
         2.2017e-05, -7.0501e-05, -8.0142e-05,  3.7570e-05, -1.3610e-04,
        -2.0858e-05, -1.3210e-04, -1.0457e-04, -5.4473e-06,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3045e-04,  1.4001e-01, -5.1062e-06,  2.2388e-05,  6.8298e-05,
         3.8958e-05, -6.4729e-05, -5.9000e-06, -3.5532e-05,  3.0807e-05,
        -1.9706e-05, -2.9007e-05,  3.9603e-05, -4.6098e-05, -1.1070e-05,
         1.1798e-04,  7.5440e-05,  9.5402e-05, -2.8570e-05,  2.6321e-05,
        -2.9239e-06,  3.2495e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 1.6768e-04,  1.0702e-01,  8.0213e-05, -4.2554e-05, -9.0149e-05,
        -7.7560e-05,  8.6887e-07, -2.9497e-05,  3.1660e-05, -1.4202e-04,
        -8.4616e-05, -6.5858e-05, -5.8278e-05, -8.1226e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9859e-04,  1.8472e-01, -7.4427e-05,  6.0334e-05,  2.1901e-05,
        -7.1783e-05, -5.1134e-05,  2.5313e-05,  9.6345e-05,  2.4814e-05,
         1.4070e-04, -3.9689e-05,  4.5564e-06,  7.4214e-05, -9.7453e-06,
        -7.5699e-05,  4.0765e-05, -1.6770e-05,  1.1809e-05, -4.5917e-05,
         6.3555e-05, -2.2050e-05,  2.7900e-05, -4.3298e-06,  3.7142e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9840e-04,  1.2170e-01, -6.6679e-05,  2.6972e-05,  1.1512e-05,
        -3.9784e-05,  1.4212e-05,  1.5926e-05,  2.4301e-05, -4.1630e-05,
        -1.0718e-05, -8.2297e-06,  5.3260e-05,  6.4090e-05,  2.0972e-05,
         1.2368e-05, -7.0677e-05,  4.7772e-05,  1.3429e-05, -1.1785e-04,
        -7.5900e-06, -3.2844e-05, -6.3298e-06, -3.6419e-06,  7.4960e-06,
        -7.7458e-05, -1.7129e-05, -3.4767e-05,  2.2734e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1444e-05,  7.2833e-02,  3.5033e-05, -3.7138e-05,  4.1306e-05,
         1.0626e-04,  5.5394e-05, -4.9125e-05, -4.3771e-07, -1.5794e-05,
         1.1931e-05,  1.3756e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1087e-04,  4.8240e-02, -6.9837e-06, -2.7373e-05, -3.3803e-05,
         4.7272e-05,  9.1808e-06,  2.0640e-05,  2.9405e-05, -5.9089e-05,
        -1.8210e-06, -1.5651e-05, -6.7364e-05, -2.1213e-05,  1.6443e-05,
        -1.8456e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0853e-05,  6.3226e-02,  3.9593e-06,  3.3367e-06,  2.1405e-05,
        -9.1884e-06, -1.9114e-05, -4.8350e-06, -8.7499e-06, -2.2897e-05,
         3.9576e-05,  2.8872e-05,  4.6288e-05, -2.8591e-05, -3.0698e-05,
        -1.7302e-05,  1.2321e-05, -2.1538e-05, -1.2035e-05, -1.6769e-05,
        -3.2106e-05, -9.2850e-05, -1.2995e-05, -4.5556e-06, -1.7620e-05,
         4.3034e-06,  4.8371e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5247e-05, -6.6089e-02, -2.2900e-06, -3.8994e-06, -5.5334e-05,
        -1.4082e-05, -5.4235e-06,  1.3897e-05, -2.4063e-05, -5.1761e-05,
        -5.8671e-05, -5.7026e-05, -1.5119e-05,  1.6350e-05, -4.9829e-05,
        -3.1840e-05,  8.3419e-06, -8.9512e-06,  5.6079e-06, -2.0711e-05,
        -3.8485e-05,  1.2460e-05, -8.6301e-05, -3.3002e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.4521e-05, -1.2531e-01, -5.4747e-06, -4.6638e-05,  4.8892e-06,
         1.8221e-05,  1.3828e-05, -7.5865e-07, -2.2136e-05,  2.1491e-05,
         1.2071e-05, -1.4224e-05, -4.2439e-05,  4.5382e-05, -1.5728e-05,
        -3.1402e-05,  6.2772e-07,  5.3242e-06, -6.8484e-06, -4.1614e-05,
        -2.2708e-05,  1.9426e-05, -9.4699e-06,  2.7953e-05, -3.6862e-05,
        -4.9547e-05,  2.2862e-05,  5.1148e-05, -1.8197e-05, -1.5539e-06,
         6.9217e-06, -3.0159e-06, -1.9699e-05, -5.3045e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0753e-04, -1.5603e-01, -1.2711e-05, -5.4522e-05, -3.4374e-05,
         1.6164e-05,  4.6829e-05,  9.3923e-06,  2.2604e-06, -2.1358e-05,
        -1.9887e-05, -2.3611e-05,  1.8894e-05,  2.6836e-05, -7.5295e-05,
         3.5072e-05, -2.3135e-05, -3.1139e-05, -6.2322e-05, -5.7202e-05,
         2.2524e-05, -1.6514e-05, -5.5860e-05, -5.7272e-05, -1.1290e-05,
         2.0338e-05,  1.2283e-05, -3.7221e-05,  3.2689e-05,  5.3543e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4394e-04, -1.3335e-01, -6.0924e-05,  3.7290e-05, -1.0384e-04,
        -7.6884e-05, -9.3661e-06,  5.1428e-05, -1.4704e-05,  2.0531e-05,
         2.7834e-05, -3.7484e-05, -4.5522e-06,  2.4294e-05,  6.2205e-05,
         3.2168e-05, -2.9564e-05,  3.5664e-05,  7.5887e-05,  6.7342e-06,
         1.1711e-05, -7.5931e-05, -8.0636e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4384e-04,  1.8326e-01,  7.5882e-05,  9.8851e-08, -3.4434e-06,
        -2.2851e-05, -1.1480e-05,  2.7035e-05, -9.9758e-05,  5.2585e-05,
        -8.8965e-06,  2.3659e-05,  9.7263e-05,  3.3981e-05, -3.9160e-05,
        -4.0643e-05, -1.7289e-07,  5.6180e-05, -2.9878e-05,  2.8145e-05,
         3.7048e-05,  2.5428e-05, -1.1234e-06,  1.7916e-05, -2.7651e-05,
        -7.4718e-05, -5.1420e-05, -3.2929e-05, -1.2464e-05, -3.2354e-05,
        -5.8115e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.7227e-05, -8.3846e-02, -1.5240e-05, -6.4170e-06, -2.2835e-05,
         2.8673e-06,  6.6068e-06,  5.5168e-06,  1.1981e-05,  2.0855e-05,
         3.9416e-05,  1.1041e-05, -1.2676e-05, -2.5382e-05,  7.9682e-06,
        -7.3672e-06,  1.2599e-05, -1.4970e-05,  2.0389e-06,  5.0755e-06,
         4.4398e-06, -8.9626e-06, -1.2109e-05,  2.3238e-05, -2.5936e-05,
        -1.0171e-05,  2.2744e-05, -7.7812e-06, -1.0562e-05, -3.1461e-06,
        -3.3564e-05,  3.5104e-05, -1.1914e-05,  1.4711e-05, -1.7048e-05,
        -4.0940e-05, -5.8566e-06, -7.1935e-06, -1.8949e-05,  1.2497e-05],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 9.5704e-05, -1.2019e-01,  8.3197e-05, -1.2699e-05,  6.4699e-05,
        -1.0291e-04,  4.2030e-05, -1.0977e-04, -4.3802e-05,  1.3996e-04,
        -8.8514e-05, -6.5358e-05, -3.3410e-05, -5.1952e-06, -6.2723e-05,
         6.5573e-06,  8.8386e-05,  5.0359e-05, -1.5980e-05, -5.0640e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1062e-04,  1.3175e-01, -2.4861e-04, -1.5721e-04, -3.8852e-04,
         8.3792e-05,  7.7598e-05,  4.7524e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0381e-05,  8.9335e-02,  7.9650e-05,  4.6075e-05,  7.5464e-05,
        -4.0255e-05, -3.2656e-05, -1.6143e-04,  9.1004e-05, -4.7460e-07,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.8589e-05, -9.5431e-02, -1.3292e-05,  1.2500e-05, -6.3515e-06,
        -7.3048e-06,  1.8770e-05, -8.6412e-06, -1.4154e-05, -8.0890e-06,
        -1.5947e-05,  2.6701e-05, -6.4288e-06,  1.6055e-05,  6.1533e-06,
         8.7369e-06, -4.7810e-05,  1.2251e-05, -4.4929e-05, -1.5193e-05,
         1.7411e-06, -1.2733e-08,  3.4920e-05, -2.4151e-05,  6.7930e-05,
        -5.6224e-06,  3.7040e-05,  2.0172e-05, -6.1366e-06,  5.3882e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5827e-05, -1.1213e-01, -6.0432e-05,  6.9284e-06, -1.3655e-05,
        -1.0751e-05, -5.1759e-06,  4.6642e-05,  7.4930e-05, -2.0430e-05,
         4.3114e-06, -8.9256e-05, -2.5281e-05, -2.0662e-05, -3.4933e-05,
        -5.1675e-05,  6.7928e-05,  1.8349e-05, -2.3880e-05, -2.7708e-05,
        -8.0829e-06, -1.6020e-05,  3.8406e-05, -1.1121e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8290e-05, -1.3510e-01, -1.0370e-05,  2.3081e-06, -4.9632e-05,
        -5.7237e-05,  4.9721e-05, -3.8969e-05, -1.1020e-05,  6.9433e-06,
         4.3683e-05,  7.1407e-05,  4.4345e-06, -3.6765e-05,  5.1935e-05,
        -3.9898e-05, -5.5829e-05, -6.4303e-05, -4.9257e-05,  4.7015e-05,
         3.2973e-05, -2.4524e-05, -1.3101e-05, -2.6932e-05,  1.1125e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0256e-04, -6.9145e-02, -1.4097e-05,  4.1703e-05,  3.6945e-05,
        -3.7468e-05, -1.2308e-05,  1.7461e-05, -4.0426e-06, -1.7052e-06,
        -3.8782e-05, -2.1425e-05, -1.4529e-05, -2.2080e-05, -1.1324e-05,
         1.4784e-05, -1.4412e-05, -1.2176e-05, -1.8176e-05,  7.8842e-07,
        -1.3344e-05, -1.7890e-06, -8.0332e-06, -2.7259e-05, -2.9975e-05,
        -2.8364e-07, -1.9661e-05,  5.1125e-07, -1.3697e-05,  8.4255e-06,
         3.9144e-05,  1.1963e-05, -9.8500e-06,  2.7948e-05, -1.4066e-05,
        -1.5184e-05, -1.3211e-05, -5.9042e-06, -5.2222e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8731e-05,  5.3897e-02,  3.5353e-05, -5.9781e-05,  9.1328e-05,
        -7.4072e-05, -7.4036e-05,  9.8795e-05,  3.7930e-05, -1.4751e-05,
        -1.1962e-05, -3.7611e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3734e-04,  1.2569e-01,  9.1730e-05, -8.1690e-05, -5.7899e-06,
         1.8900e-04,  2.4908e-05, -1.9140e-05,  3.3516e-05,  6.2969e-05,
        -7.2670e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6419e-05, -1.2237e-01,  7.2828e-05,  1.5274e-05, -1.8745e-05,
         5.8621e-05,  3.0111e-05,  1.7396e-05,  5.6035e-05,  2.3558e-05,
        -5.1227e-05,  1.5489e-05,  3.9846e-06,  2.7714e-05,  3.9479e-05,
        -4.0018e-05, -1.0456e-05,  1.0836e-04,  7.5255e-05,  3.0437e-05,
         7.2485e-06,  1.3252e-05,  3.7943e-05,  1.0049e-05, -8.1011e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1556e-04, -1.3389e-01,  5.7730e-05, -6.6000e-05, -2.3191e-05,
         5.2300e-05,  3.3274e-05, -3.3641e-05,  3.6353e-05,  8.0678e-05,
         1.9288e-05,  5.2110e-05, -4.4717e-05, -4.0405e-05, -2.1541e-05,
        -3.3134e-05,  8.5052e-05, -1.7778e-05, -2.9514e-05, -1.2653e-04,
        -1.0076e-05,  2.9403e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0457e-06,  5.6296e-02,  2.3769e-05,  1.2632e-05,  1.4483e-05,
         5.8655e-06,  1.4834e-05,  2.1102e-05,  3.7958e-05, -4.1665e-06,
         4.4673e-05,  1.6763e-05, -4.6180e-06,  2.1782e-05, -3.3063e-05,
         1.6955e-05, -1.5316e-05,  1.1238e-05,  1.1265e-05,  1.0526e-05,
        -9.9654e-06,  2.1908e-07,  4.8098e-05,  2.8619e-05, -5.1756e-06,
         1.5537e-05,  2.7372e-05,  4.0366e-05,  1.2760e-05,  4.4801e-06,
        -1.2353e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #300: [tensor([-3.4907e-05,  1.8212e-01,  7.7162e-06, -1.2940e-05,  1.7545e-05,
         1.6650e-05,  1.5559e-05,  2.2065e-05, -6.8510e-07, -4.1720e-06,
        -1.6030e-05, -4.1136e-06, -5.4697e-07, -2.3868e-05,  2.4124e-05,
         2.3371e-05,  1.3640e-05, -1.0769e-05, -6.2683e-06,  2.9600e-05,
        -4.1290e-05, -2.3867e-05,  2.5307e-06,  1.7407e-05,  3.6250e-06,
         1.1912e-05,  3.5585e-07,  2.0801e-05,  2.6069e-06,  2.0623e-05,
        -2.5335e-05, -1.2893e-05, -2.3384e-05, -2.8563e-06,  1.8384e-05,
        -4.0071e-05,  1.6788e-05,  1.2380e-06, -2.1646e-05,  6.7106e-06,
         1.1708e-05,  3.7845e-05,  1.5789e-05,  1.2061e-05,  4.0088e-05,
         3.9143e-06, -4.2237e-06,  4.1127e-05,  2.6176e-06,  1.9154e-06,
         6.6041e-06,  2.0125e-05, -1.5626e-05, -1.8147e-05, -1.6446e-05,
         4.9146e-06,  1.8344e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.3267e-05,  7.2573e-02,  2.5249e-05,  3.4499e-06,  2.0927e-05,
         2.1761e-05,  3.8131e-05,  7.8792e-05, -1.2191e-04,  7.8149e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6863e-04,  6.9046e-02, -2.2934e-05, -3.9384e-05, -2.3345e-05,
         9.8854e-06, -1.6230e-05, -1.1679e-05, -1.9622e-06,  7.6692e-06,
         1.1218e-05, -1.7665e-05, -6.1984e-05, -6.2738e-06, -2.7700e-05,
         3.4376e-06, -2.4006e-05, -2.9228e-05,  1.9825e-05, -9.7796e-06,
         2.6103e-05, -6.7561e-06,  5.3502e-05,  3.7072e-06, -1.2923e-05,
        -1.0304e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5608e-04,  1.4301e-01,  6.2101e-05, -9.9877e-05, -5.2663e-05,
         7.5209e-05,  2.5117e-05,  1.7918e-05,  1.4900e-04, -6.6476e-05,
        -5.0087e-05,  2.1181e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9901e-05,  1.0757e-01, -1.1105e-04, -4.8310e-05, -1.5650e-05,
        -4.8579e-05, -4.9662e-06, -2.9113e-06, -7.5111e-06, -6.0654e-05,
        -1.6827e-04,  1.0490e-05, -2.3939e-05, -6.3551e-05, -1.8263e-05,
        -4.3527e-05,  5.8892e-05, -4.0803e-05,  3.5250e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.6683e-05, -1.1611e-01, -2.0408e-05,  1.6385e-05, -2.2685e-05,
        -8.4942e-06,  2.9795e-05,  1.9446e-05,  5.4127e-05, -1.6180e-05,
         1.9778e-05, -3.8209e-06, -1.9637e-05, -7.3852e-05,  3.0454e-05,
         5.7070e-05,  5.3437e-05,  5.9281e-05,  4.7100e-05, -1.7971e-05,
         2.3691e-05,  2.8860e-05,  5.7592e-05, -4.1165e-05,  4.2675e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3056e-04, -1.3913e-01, -7.1782e-06, -5.4297e-05,  8.2110e-05,
        -3.2481e-05, -1.0298e-04, -1.4840e-05, -4.7331e-05, -8.2673e-05,
         1.7519e-04,  1.0290e-04,  2.2439e-05, -6.7836e-05, -9.2168e-05,
        -9.3382e-05,  3.6419e-05,  4.2217e-05,  9.5782e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5159e-05, -1.1497e-01, -1.9552e-05, -3.1318e-05, -3.2721e-05,
         1.9618e-05,  5.0211e-06,  2.1977e-05, -1.9356e-05, -3.2307e-05,
        -3.1080e-05, -1.9210e-05, -1.2710e-06, -3.1012e-06,  3.3636e-05,
         1.3359e-05,  1.0171e-05, -1.8469e-05,  1.2091e-05, -1.2748e-05,
         2.5191e-05,  1.5085e-05, -2.7020e-05, -1.7454e-06, -9.3567e-06,
        -4.9221e-05, -2.0021e-05, -4.2151e-05, -2.5808e-05, -2.1809e-05,
        -1.3221e-06,  4.3790e-06,  1.2575e-05,  1.7878e-05, -1.4019e-05,
         1.1376e-05,  1.3460e-05,  6.4158e-06, -1.9944e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2029e-05,  9.1610e-02, -5.1149e-05,  1.6144e-04, -9.5775e-06,
        -1.1579e-04, -4.3249e-05,  4.6278e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0027e-05,  1.1360e-01,  1.1100e-05, -5.1638e-05,  1.9815e-06,
         4.6757e-05,  4.2532e-05, -3.9304e-05, -7.4908e-05, -1.2062e-05,
        -1.7383e-05, -3.4045e-05, -1.8310e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1690e-04, -1.0534e-01,  1.7592e-05,  9.5041e-05, -6.3006e-06,
        -2.3018e-07, -2.6042e-05, -1.4509e-05,  4.2261e-07, -1.4983e-05,
         7.2479e-07,  3.0364e-05,  5.4179e-06, -4.3372e-05,  3.0325e-05,
         3.4845e-06,  8.4881e-06,  4.2202e-05,  1.7381e-05,  1.1807e-05,
         3.7808e-06,  1.1444e-05, -1.3102e-05,  4.0469e-05, -7.5504e-06,
        -1.8425e-05, -1.1389e-04, -2.9326e-05,  4.0065e-05, -3.5562e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2978e-04,  5.4634e-02,  4.4144e-05, -2.8254e-05,  6.7115e-06,
        -7.1540e-06, -7.8260e-06,  2.6584e-05, -5.4073e-06, -4.7100e-05,
        -6.0516e-06, -5.4811e-06,  5.2672e-06,  1.8947e-05,  5.0099e-05,
         2.3189e-05,  1.1528e-05,  2.0427e-05,  1.9176e-05, -2.5688e-05,
         1.1723e-05,  1.0305e-05, -1.0345e-06, -4.8800e-07,  3.9083e-05,
         1.7825e-05,  3.0050e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.6151e-06,  1.8678e-01,  5.0804e-05, -6.2219e-05, -1.7725e-05,
         5.8406e-05,  7.5110e-05, -9.9456e-05, -5.5295e-05, -4.1201e-05,
        -3.0209e-05, -1.0945e-05,  1.8212e-05,  2.8953e-05, -7.6001e-05,
        -6.7249e-05,  5.5645e-05, -5.5243e-05, -8.6847e-05, -6.8551e-05,
        -5.1580e-05,  1.2372e-05,  7.2518e-05,  4.5251e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4291e-04, -6.0543e-02, -1.2941e-05,  2.9156e-06,  5.3128e-06,
         4.8832e-06, -7.0228e-06,  2.6634e-05,  2.0894e-05,  2.8069e-05,
         3.6601e-05,  2.0478e-05,  2.9272e-06,  4.7753e-05,  1.8644e-06,
        -4.7883e-05,  1.3600e-05,  2.7295e-05,  2.6881e-05, -7.6108e-06,
         7.4170e-06,  2.9546e-05,  2.1060e-05, -4.4233e-06,  1.9516e-05,
         1.7393e-05,  2.1016e-05,  1.9263e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2655e-04,  2.2332e-01, -1.3885e-04, -7.2664e-05,  4.9725e-06,
         1.9630e-04,  1.5344e-04, -1.7031e-04,  8.8669e-05, -8.4829e-07,
         1.1715e-04,  2.2148e-05, -9.1226e-05, -8.1170e-06,  3.1765e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1165e-05,  6.6990e-02, -1.5853e-05, -1.5733e-05,  5.5140e-07,
        -3.1906e-05, -1.6192e-05, -2.3309e-05, -1.1458e-05, -2.5984e-05,
         2.9403e-06, -2.1806e-05, -6.1471e-06,  6.7735e-06,  1.0387e-05,
        -5.0324e-06, -9.1159e-06,  1.0541e-06,  2.1938e-05, -1.2623e-05,
        -1.4480e-05, -1.4154e-05, -2.1254e-05,  3.0985e-05, -4.5452e-06,
         1.3481e-05,  5.5704e-06, -2.6511e-05, -3.0769e-05, -1.9512e-05,
        -1.3973e-05, -8.2735e-06,  1.0236e-05, -9.1080e-06, -1.5555e-05,
        -1.5739e-05,  2.2063e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6104e-05, -8.9546e-02, -7.0296e-06, -5.8397e-05, -2.7838e-05,
        -3.0697e-06, -5.6972e-06, -3.6543e-05,  4.5165e-05, -5.3452e-05,
        -5.7997e-05, -3.6974e-06, -1.0652e-05, -7.6935e-06,  3.2370e-05,
         8.0080e-06,  3.5127e-05, -2.2417e-05, -5.2252e-05,  3.3243e-05,
         6.6488e-06, -2.2745e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9120e-05,  1.1644e-01, -2.9241e-05,  1.1084e-04, -1.5774e-05,
         7.2899e-05,  3.8958e-05,  1.5989e-04, -3.8708e-05,  1.4343e-04,
        -9.5139e-05, -4.8836e-07,  1.9944e-06,  5.5841e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4010e-05,  7.0418e-02,  5.8206e-05,  3.5316e-05, -4.0018e-05,
         4.1012e-05,  4.4596e-06, -2.9356e-05, -1.8947e-05, -1.5376e-05,
        -3.6075e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1831e-05,  2.3053e-01, -4.8300e-05,  8.2601e-05,  1.2106e-04,
        -1.8249e-04,  1.1738e-04,  2.7447e-05, -2.5050e-04,  1.8827e-04,
         2.1914e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.9181e-05,  1.3740e-01, -6.1835e-07, -7.8237e-05, -1.9740e-05,
         1.0710e-04,  3.3045e-05, -5.7534e-05,  6.7012e-05, -2.9244e-05,
        -1.6363e-06, -3.4487e-05, -7.7887e-05,  6.9828e-05,  1.2298e-05,
        -2.2582e-05, -1.0989e-04,  3.4594e-05, -2.9504e-05,  4.2401e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6893e-04, -1.5522e-01, -4.4283e-06,  1.1281e-05, -4.1949e-07,
        -1.1931e-05,  3.9487e-05, -7.2729e-06,  4.7122e-06, -1.6987e-05,
         3.1753e-05,  2.6069e-05,  5.8511e-05, -2.9364e-05,  2.6676e-05,
        -9.1890e-06, -5.7661e-06, -2.5675e-05,  2.6271e-05,  2.5218e-05,
         1.3619e-05,  2.5931e-05,  1.8723e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3679e-05, -1.2415e-01, -1.6438e-05, -7.0767e-05, -2.2490e-05,
        -1.4324e-05, -2.2874e-05, -3.2428e-05,  9.5583e-06, -2.2092e-05,
        -2.7953e-05,  4.2323e-06,  5.5764e-05, -4.0876e-05,  4.7889e-05,
         7.1409e-05, -2.0072e-05, -6.5968e-06, -7.0694e-07,  5.2254e-05,
        -5.8176e-05,  6.9036e-07, -1.3612e-06,  6.0545e-05,  8.1147e-06,
        -2.5250e-05,  3.3904e-06, -1.6444e-06,  7.7116e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2855e-05, -1.1171e-01, -6.7248e-06, -3.3413e-05, -6.7479e-06,
        -5.5344e-06, -7.5659e-06, -1.8256e-05,  1.0735e-05,  8.3723e-06,
        -1.8566e-05,  3.0295e-06, -2.3148e-05, -7.3536e-06, -3.2516e-05,
         8.4084e-06, -5.4043e-05, -1.8854e-05,  5.0556e-06,  3.4184e-05,
        -3.2134e-05,  2.4547e-05, -2.9659e-06, -7.1147e-05, -8.1209e-06,
        -2.7202e-05,  3.5173e-06, -1.7473e-05,  4.1208e-05, -2.5266e-05,
        -5.4810e-05,  9.6883e-06, -2.0365e-05,  1.5999e-05, -6.6715e-05,
        -7.5167e-05, -3.0088e-05,  1.8462e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([-7.8489e-07,  1.3457e-01,  6.8918e-05, -1.3803e-05,  5.9963e-05,
         5.2864e-05, -2.8948e-06,  4.1500e-05, -1.0031e-04, -5.0371e-06,
        -7.9874e-05,  9.8273e-06,  1.0586e-05,  2.3347e-05,  3.2448e-05,
         8.6859e-05,  7.5989e-05,  3.2867e-05,  5.1230e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.4041e-05,  1.3806e-01,  7.2309e-05,  6.2101e-05, -3.3316e-05,
        -8.6440e-06,  1.2718e-04, -9.5501e-05,  1.1847e-04, -5.9753e-05,
         6.3237e-05,  2.3681e-05,  3.7369e-05,  5.4199e-05,  1.3909e-04,
         4.1237e-05, -2.1434e-05, -3.8099e-05, -4.2229e-06, -3.1547e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0818e-04, -8.1321e-02, -8.4626e-05, -1.3596e-05, -1.1994e-04,
         2.8896e-05,  7.6191e-06,  1.4120e-05, -6.0103e-05, -4.7704e-05,
         1.8625e-05, -3.7985e-05, -1.5523e-05, -6.3662e-06, -7.0922e-06,
         3.8767e-06, -4.9570e-05, -6.2737e-05, -2.4948e-05, -8.3489e-05,
        -8.2614e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2545e-05,  1.2546e-01, -5.2240e-05,  7.4040e-05, -5.0519e-05,
        -7.2270e-05,  1.5861e-05,  1.3432e-05,  3.1377e-05, -6.3951e-05,
        -7.3144e-05,  2.4612e-05, -6.6184e-05,  8.6779e-07,  6.8338e-05,
        -5.4036e-06, -1.7421e-04, -3.8520e-05, -5.3087e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.2594e-05, -1.1478e-01,  2.7292e-05,  3.4295e-05,  2.6682e-05,
         1.7752e-05, -4.6802e-06,  1.5439e-05, -3.0067e-05,  1.4514e-05,
         2.7332e-05,  1.6406e-05, -5.0662e-06,  6.9534e-06,  1.6639e-06,
        -4.8334e-06,  4.9895e-06, -2.0648e-05, -3.8910e-06, -7.0748e-06,
         3.1249e-05,  5.7876e-06,  1.0661e-05,  1.4794e-05,  3.4617e-05,
        -2.7576e-05,  1.0505e-05,  3.7944e-05,  4.4027e-06, -3.4064e-05,
         1.5315e-05,  1.6097e-05,  4.0326e-05,  2.7610e-05,  5.7344e-06,
         5.5456e-05,  1.7185e-05,  2.5480e-05,  1.9799e-05,  2.5783e-05,
         3.9864e-05,  3.8469e-05, -2.7793e-05, -1.0542e-05,  7.7404e-06,
        -1.0560e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2008e-04, -1.4752e-01,  4.0630e-05,  1.9784e-05,  1.0289e-05,
        -3.0336e-05,  4.1563e-06, -4.3426e-05, -4.6734e-05, -1.2411e-05,
        -1.8475e-05, -3.4583e-05, -2.9453e-05, -3.0177e-05, -4.4975e-05,
        -1.2422e-05, -2.1718e-05,  2.4686e-05,  1.2282e-05, -2.5436e-05,
        -2.6772e-07,  5.2152e-05,  4.2268e-06, -4.4245e-05, -1.4755e-05,
        -4.8937e-05, -6.8298e-05, -1.9763e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0372e-04,  2.4523e-01,  2.1370e-05, -1.8934e-05, -3.3481e-05,
        -1.6758e-05,  5.5911e-05,  8.3840e-05,  9.2707e-06, -3.2454e-05,
        -1.0429e-04, -3.1122e-06,  7.8896e-05, -4.8971e-05, -6.4761e-05,
        -1.4183e-05, -9.9185e-05,  2.3352e-06,  4.5100e-05, -1.1637e-04,
        -1.1763e-04,  5.5610e-05, -3.0244e-05,  3.5823e-05, -3.3158e-05,
        -4.5445e-05, -2.3416e-05,  3.3675e-05,  5.2582e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1234e-04, -8.3503e-02,  2.8712e-05, -1.2092e-05,  1.7419e-05,
         5.0884e-05,  4.2390e-06, -6.9345e-06, -1.5198e-05, -2.1700e-05,
        -6.0909e-06,  2.9222e-05,  3.2768e-05,  2.0318e-05, -5.4903e-05,
        -3.7622e-05, -2.0300e-05, -4.5866e-05, -1.9643e-05,  1.2543e-05,
         4.3462e-06, -2.9337e-05, -2.2422e-05,  9.9977e-06, -3.2518e-05,
        -7.0463e-05, -1.0514e-06, -1.8565e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7754e-05, -1.2055e-01, -1.0683e-05, -1.8034e-05, -2.6369e-05,
        -8.9584e-06,  6.6496e-06,  1.0755e-06, -5.0988e-05,  4.2250e-05,
         4.3839e-05, -9.8113e-06,  3.0972e-05, -1.0437e-06,  4.0292e-05,
         3.6326e-05,  1.1969e-05,  2.0426e-06,  3.4896e-05,  5.2418e-06,
         1.1258e-05, -6.0164e-05,  3.1136e-06,  6.7623e-06,  1.7774e-05,
         3.0852e-05, -1.8440e-05, -3.0333e-05,  2.0564e-05,  8.7374e-06,
         1.3968e-05, -3.5442e-06, -4.8391e-06,  8.1466e-06, -2.5759e-05,
         2.7354e-05,  1.3106e-05, -1.7601e-05,  3.9340e-06,  1.1726e-05,
        -1.5336e-05, -1.7499e-05, -5.4670e-06, -2.3326e-05,  1.1170e-05,
        -1.4096e-07,  1.3980e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.8328e-05, -5.4279e-02, -1.8106e-05, -8.8711e-06, -1.4686e-05,
         5.2600e-06, -8.2166e-06, -7.4404e-06, -1.0442e-05, -6.3668e-06,
        -7.9625e-06, -9.5150e-06, -9.8723e-06, -3.3616e-06, -6.1870e-06,
        -8.8047e-06, -2.7850e-07, -1.8551e-05, -1.3572e-05,  9.1902e-08,
        -1.5607e-05,  1.4841e-05,  5.4656e-06, -1.1641e-05, -6.0032e-07,
        -1.4029e-05,  5.3974e-06, -1.1732e-05, -8.6335e-06, -1.6801e-05,
         4.4816e-06, -1.2605e-05,  1.8788e-06,  1.3242e-05,  7.4375e-06,
        -4.1490e-06, -1.2163e-05, -4.8825e-06,  7.7879e-06, -1.0817e-05,
        -1.4102e-05, -2.1154e-05, -1.8899e-05, -1.6446e-05, -1.7420e-05,
        -8.8826e-06,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.1111e-06, -1.3578e-01, -4.3609e-05,  1.4042e-05, -3.0576e-05,
        -7.4183e-05, -4.0701e-05, -4.7458e-05, -7.6182e-05, -2.3995e-06,
         4.1765e-07,  7.1681e-06,  1.0243e-05,  1.0291e-04,  3.2646e-05,
        -3.0661e-05, -3.8395e-05, -1.0134e-05, -9.0278e-05, -5.4311e-05,
        -2.9788e-05,  3.8677e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1752e-05, -1.4189e-01,  5.0639e-05,  5.3748e-05, -2.6492e-05,
        -1.4189e-05,  2.5853e-05, -2.4732e-05,  3.0207e-06,  2.1615e-06,
        -2.0967e-06,  7.3568e-05,  2.0125e-05,  6.3712e-06,  8.5263e-06,
         3.6918e-05,  1.0088e-04,  1.1686e-05,  3.8191e-05, -6.9141e-06,
         2.9127e-05,  1.4209e-05, -1.9964e-05, -6.1180e-05,  1.4622e-05,
        -2.5478e-05,  1.7878e-05,  1.4109e-05,  1.2587e-05,  1.1164e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
