Iter #50: [tensor([ 3.2335e-03, -3.0730e-04,  6.7140e-04, -5.5677e-04, -1.3264e-04,
         4.3100e-04, -2.6933e-04, -5.7562e-04,  6.2163e-05, -4.1388e-03,
         2.8022e-04,  1.9068e-05, -5.9760e-04, -1.1512e-04,  1.4377e-04,
        -1.0389e-03, -4.4493e-04,  2.6901e-04, -3.4934e-04,  1.9290e-04,
        -1.5786e-04, -9.2706e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1881e-03,  4.5074e-03, -4.8074e-04, -5.4819e-04,  4.1561e-03,
        -6.3163e-04,  4.7283e-04, -5.6996e-04,  4.2406e-03,  8.1173e-04,
        -1.8759e-04,  2.4653e-04,  5.4741e-04, -1.7213e-04,  1.5475e-03,
         4.5511e-03, -1.1733e-04, -9.3392e-04,  3.1391e-04,  5.4728e-04,
         2.7651e-04, -4.0899e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3104e-04, -2.1695e-03,  1.9035e-04,  3.2260e-04, -2.6486e-03,
         2.7472e-04, -1.8485e-04,  3.9496e-04, -1.6847e-03, -3.2438e-04,
        -7.1298e-05, -1.8586e-04,  7.9561e-05, -2.5612e-03,  9.8033e-05,
        -7.9537e-04, -4.6321e-06,  2.8325e-05, -2.4022e-03, -3.8131e-04,
        -2.7152e-04,  1.8260e-04,  8.8038e-05,  1.1340e-04, -2.9026e-06,
        -1.4289e-04,  1.2961e-06, -3.3148e-04, -9.1521e-05, -5.1743e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1344e-03, -3.0014e-03,  2.0180e-04,  3.9606e-04, -3.8723e-03,
         2.5226e-04, -4.0538e-04,  2.6521e-04, -3.2440e-03, -5.5622e-04,
        -2.1491e-05, -1.5903e-04, -1.7055e-04, -2.8195e-03, -2.3866e-05,
        -1.2666e-03, -4.6482e-05, -4.2510e-04,  1.6423e-04, -4.1979e-04,
        -6.8715e-04, -3.8956e-04, -2.5323e-06,  2.4883e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0796e-03, -3.8346e-03,  2.1078e-04,  7.0319e-04, -4.0820e-03,
        -1.3112e-04, -2.4383e-04, -9.8434e-05,  1.1218e-04, -2.9723e-04,
        -1.4053e-03, -4.3575e-04, -1.5068e-03, -7.4304e-05, -1.6266e-05,
        -9.5214e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.5410e-03,  5.3909e-03, -8.4733e-04, -1.2782e-03,  5.3548e-03,
         2.0329e-04,  2.0779e-04,  2.3374e-04, -1.3310e-04, -7.4027e-05,
         1.5535e-03, -3.3927e-04,  2.3916e-03,  1.1162e-04, -1.4983e-04,
         1.1682e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1104e-03, -4.4311e-03,  6.7210e-05,  6.6067e-04, -4.2720e-03,
        -5.8339e-05, -2.9408e-04, -2.0965e-04,  2.1529e-05, -1.0199e-04,
        -1.2192e-03, -5.0335e-05, -1.6180e-03, -7.0003e-05,  9.9675e-04,
        -4.4768e-04,  2.0361e-04,  1.9271e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.9401e-03,  3.1315e-03, -3.2461e-04, -6.0930e-04,  1.9333e-04,
         9.4378e-05,  1.3128e-03,  3.6650e-05,  4.8754e-04, -2.2416e-04,
         1.3409e-05,  7.1069e-04, -4.1181e-04,  1.4766e-04, -1.3494e-04,
        -9.7962e-06,  3.3705e-04, -1.1295e-04, -1.6000e-04,  1.0308e-03,
        -4.3330e-05,  6.3286e-04,  1.4804e-04,  1.0132e-03,  6.2491e-04,
         6.3980e-05,  5.4114e-05,  5.5616e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8626e-03,  2.6100e-03, -7.5710e-05, -4.1861e-04,  4.8830e-05,
         6.2075e-05,  7.9879e-04,  3.9183e-05,  3.0178e-04, -7.7466e-05,
         2.6243e-05,  7.2917e-04, -3.8517e-04,  1.0110e-04,  1.0282e-06,
        -1.9593e-04,  2.7992e-04,  2.0736e-04, -1.3772e-04,  6.6844e-04,
         6.6817e-05, -5.9376e-05,  2.6118e-04,  2.3978e-05,  1.5840e-04,
         2.0522e-04,  5.8477e-05,  3.8714e-04, -2.0522e-05,  4.4679e-04,
        -4.9535e-04,  7.0940e-04,  2.0616e-04,  1.0452e-04, -1.3395e-04],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1657e-03, -3.3832e-03,  5.6297e-04,  5.9284e-04, -3.4172e-04,
        -1.9954e-04, -8.7299e-04, -9.0562e-05, -2.8303e-04, -1.9941e-04,
        -1.4813e-04, -7.9756e-04,  1.6123e-04, -2.3607e-04,  1.9514e-04,
         9.7626e-05, -5.5688e-04, -1.1079e-04, -7.8292e-05, -3.3964e-03,
         2.5591e-05, -6.5449e-06, -5.1219e-04, -1.5064e-04,  1.0115e-04,
        -1.2203e-04, -1.9169e-04, -4.0426e-05, -4.3207e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1885e-04, -3.5561e-03,  2.5710e-04,  3.5032e-04, -3.3749e-03,
         1.1682e-04, -1.8311e-04, -5.0147e-04,  8.1756e-07, -1.3065e-03,
        -3.6892e-04,  3.8409e-05, -3.2315e-04, -2.7786e-03,  2.8706e-05,
        -1.1734e-03, -6.3163e-05,  4.9591e-04, -5.9685e-04, -4.7404e-04,
        -7.1736e-05, -8.6089e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2590e-03, -4.2287e-03,  2.6314e-04,  7.2129e-04, -3.6973e-03,
         2.0621e-04, -2.9126e-04, -6.4059e-04, -1.6011e-04, -1.3206e-03,
        -4.4463e-04,  1.3983e-04, -2.7696e-04, -1.1271e-03,  1.9483e-04,
        -1.5005e-03, -5.2775e-04,  1.0982e-04,  9.4556e-05, -6.1585e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.6488e-06, -5.8176e-03,  1.3167e-05,  5.8482e-05, -9.7453e-05,
         9.5013e-04, -5.5330e-03,  5.1715e-05,  1.1740e-04,  1.7886e-05,
        -2.4857e-04, -8.8635e-06, -9.8433e-05,  3.1887e-04,  3.4089e-06,
         4.7054e-05,  4.3430e-05,  6.7076e-05, -1.5308e-03,  1.5621e-05,
        -1.4227e-03,  2.1254e-04,  8.9555e-04, -5.4904e-03, -2.5866e-04,
         2.3912e-04, -6.8584e-05,  1.9238e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1335e-03, -5.7581e-03,  1.4568e-04,  7.7687e-05, -2.3342e-04,
         8.5406e-04, -5.1345e-03,  5.9495e-05, -5.3681e-06, -2.2719e-04,
        -3.8348e-04, -3.0999e-04, -4.2185e-04,  6.1816e-05,  1.1487e-05,
        -9.9885e-05, -2.3874e-05,  3.9204e-05, -6.0177e-03, -1.8972e-05,
        -1.2242e-03, -3.3285e-05,  1.6812e-04,  2.5517e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3529e-04, -6.5302e-03, -1.4967e-04,  4.3662e-06,  8.0229e-05,
         1.1451e-03, -6.0207e-03,  1.9592e-05,  1.4041e-04, -6.7659e-05,
        -4.2216e-04, -1.2652e-04, -2.5778e-04,  7.2557e-05,  1.8980e-04,
        -1.3049e-04,  1.2015e-04,  3.2218e-05, -1.7038e-03, -9.4871e-05,
        -1.6768e-03, -1.8231e-04,  1.0834e-04, -6.1186e-03,  3.1379e-04,
        -4.0218e-04,  5.8316e-05, -1.9537e-04], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.4494e-04, -6.6211e-03,  3.1196e-04, -2.9662e-04, -1.0343e-04,
        -1.3797e-03, -5.4237e-06, -1.1423e-04, -5.8967e-03,  1.6153e-05,
         1.5732e-04, -5.2700e-03, -3.4573e-05, -1.8279e-03,  3.3288e-04,
        -4.3429e-04,  1.1117e-04,  9.3670e-05, -5.2542e-03, -5.0811e-06,
        -8.4844e-05,  2.0999e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2852e-03, -5.5767e-03,  8.9634e-05, -2.7036e-04, -1.3792e-04,
        -1.5178e-03,  1.8251e-04,  1.5207e-05, -5.8787e-03, -1.1031e-04,
        -1.5002e-04, -5.8726e-03, -1.4689e-04, -1.6864e-03,  3.5203e-05,
        -1.1630e-04, -1.6274e-03,  2.4215e-04, -9.9762e-05, -6.0971e-03,
         1.2478e-04,  6.7980e-06, -3.0023e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4501e-03, -6.0914e-03,  2.5481e-04, -3.0935e-04, -4.2742e-05,
        -1.3973e-03,  5.6773e-05,  1.8188e-05, -5.4609e-03,  2.3572e-04,
        -1.1460e-04, -5.8700e-03,  7.6156e-05, -1.4687e-03, -4.2749e-05,
         5.3399e-04, -5.6412e-03, -3.2679e-04, -8.9054e-05, -1.0186e-04,
         1.9046e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3158e-03,  8.4118e-03, -3.9030e-04, -1.4720e-04, -2.3657e-04,
         1.5749e-03, -5.9737e-04, -1.3213e-03, -8.7066e-05, -2.6905e-05,
         8.2377e-03, -3.7279e-04,  3.5708e-04, -1.8268e-04, -2.9725e-04,
         2.7280e-03, -5.0493e-04,  1.8393e-03, -9.9485e-05,  1.5484e-04,
        -1.8917e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0363e-03,  7.4668e-03, -9.7168e-05, -1.2819e-04,  8.7233e-05,
         1.5478e-03, -4.7501e-04, -7.4303e-04, -9.0930e-05, -5.7424e-05,
         6.2281e-03, -1.3075e-04,  5.2865e-04,  4.5346e-05, -1.7847e-04,
         1.8076e-03, -1.4796e-04,  4.9415e-04,  1.9299e-03,  5.4008e-04,
        -1.8476e-04,  5.5652e-04, -1.3388e-04, -1.6661e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.2921e-03,  8.1550e-03, -4.9753e-04, -4.8457e-05,  6.2287e-05,
         1.6233e-03, -1.6455e-04, -1.1735e-03,  1.2336e-04,  1.4468e-04,
         9.1057e-03, -5.3242e-05,  6.5343e-04,  1.6318e-04,  7.6896e-05,
         1.8386e-03, -9.4356e-05, -3.0781e-04, -3.9231e-04, -1.1422e-04,
        -2.4726e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0007e-03, -7.1309e-03, -1.1060e-04, -2.8081e-04, -6.3997e-05,
         3.1978e-04,  7.1242e-04,  2.5980e-05,  1.3663e-04, -7.3270e-03,
        -1.8463e-04, -4.4679e-04, -9.0707e-04,  9.8173e-04, -1.0300e-04,
         1.1579e-04, -1.7544e-03,  1.0944e-04, -1.7789e-03, -7.4999e-03,
         4.9699e-06, -2.7050e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5727e-03, -8.3432e-03,  8.9706e-05, -7.2212e-05, -1.6846e-04,
         4.7259e-04,  1.1232e-03,  4.2697e-05, -5.3550e-05, -7.7522e-03,
        -1.6486e-04, -1.9679e-04, -7.2812e-04,  1.2075e-03, -3.7640e-04,
         7.5077e-05, -2.1109e-03, -1.9043e-04, -2.1948e-03, -7.0219e-04,
         1.2773e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6441e-04,  1.1294e-02,  3.7636e-04,  3.7545e-04, -3.6955e-05,
        -7.1380e-04, -1.7172e-03,  2.0811e-04,  2.8150e-06,  1.1312e-02,
         3.8909e-04,  6.0435e-04,  1.0976e-03, -1.4413e-03,  5.5557e-04,
         2.5953e-05,  2.9179e-03, -3.3305e-04,  2.5562e-03,  1.2891e-04,
        -6.4592e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 9.6244e-04, -8.9125e-03, -3.9010e-05,  6.1228e-04, -8.7883e-03,
         1.1473e-04, -1.9171e-04, -2.9994e-05, -9.4205e-04, -1.1864e-05,
         2.0675e-05, -6.7450e-04, -7.4563e-03,  1.5183e-05, -4.0801e-05,
         3.2535e-06,  8.4284e-05,  9.3889e-05,  1.8486e-05, -8.3536e-03,
        -1.9275e-04,  7.6951e-05,  2.9927e-05, -8.6945e-03,  1.6440e-04,
         6.1514e-04, -7.9417e-03,  7.8435e-05, -1.8903e-04, -2.1498e-05,
        -2.4364e-04, -2.8968e-04,  1.0238e-04, -2.2670e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.5443e-04, -1.0236e-02, -1.9078e-04,  6.6142e-04, -8.2559e-03,
         2.5458e-04, -1.8444e-04, -1.2747e-04, -9.4643e-04, -5.8691e-05,
         4.8496e-06, -6.0869e-04, -7.5034e-03,  2.0515e-05, -2.2341e-04,
        -4.7840e-05,  5.6846e-06,  3.1902e-05,  3.7813e-05, -6.9315e-03,
        -1.4049e-04,  6.2910e-05, -6.4062e-05, -8.0078e-03,  7.5882e-05,
        -7.5588e-05, -5.3637e-04,  5.9339e-05,  4.2269e-05,  6.2607e-04,
        -9.1653e-03,  2.5996e-05,  7.2201e-05,  6.6092e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0087e-04, -9.6247e-03,  7.7800e-05,  5.3986e-04, -7.6657e-03,
         2.2865e-04, -1.8051e-04, -1.5377e-04, -8.1508e-04, -7.1569e-05,
         3.4562e-05, -3.7949e-04, -7.5943e-03,  1.1878e-05, -2.0520e-04,
         8.4080e-05,  1.0836e-04, -2.8952e-05,  4.2236e-06, -7.5872e-03,
        -2.2402e-04, -3.2328e-05,  5.9960e-05, -7.8478e-03,  1.2242e-04,
         7.2680e-04, -7.0262e-03,  4.4276e-05, -1.1418e-04, -1.0383e-04,
        -1.2669e-04, -7.3359e-04, -1.5003e-04, -5.0246e-06,  1.1150e-04,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4344e-03, -1.0084e-02,  8.5441e-05,  6.0925e-04, -8.3559e-03,
         1.5144e-04, -1.7196e-04, -1.5451e-04, -1.1251e-03, -4.7491e-05,
        -5.5175e-05, -5.9145e-04, -9.9062e-03,  6.4221e-05, -9.0091e-05,
         1.5600e-05,  9.5192e-05,  1.1140e-05, -3.8500e-05, -8.1916e-03,
        -3.3230e-04, -4.1322e-05,  1.0374e-04, -2.0570e-03,  1.5383e-05,
        -1.1983e-03, -8.3338e-03, -2.5003e-04,  7.9110e-05, -5.2517e-05,
         1.1037e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.7111e-04, -8.6942e-03,  4.8624e-05,  6.9076e-04, -9.3834e-03,
         1.8119e-04, -1.8947e-04, -2.0585e-04, -1.0945e-03, -8.7155e-05,
         7.3587e-05, -6.7764e-04, -7.3636e-03, -4.7067e-05, -1.4609e-04,
        -8.4675e-05,  7.3788e-05,  3.7450e-05, -4.2867e-05, -6.7385e-03,
        -2.1020e-04,  3.9970e-07,  2.8958e-05, -1.0874e-02, -9.9743e-05,
        -1.5969e-03,  1.4149e-05,  3.9976e-04, -4.2761e-04,  6.0342e-05,
         1.1198e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0367e-04, -6.1328e-03, -5.6640e-05,  5.4567e-04, -7.2858e-03,
         2.1692e-04, -1.8464e-04, -1.6903e-04, -7.4567e-04,  1.0984e-05,
        -5.1203e-05, -6.1992e-04, -5.4677e-03, -4.4755e-05, -3.9454e-05,
        -2.7527e-05,  5.9106e-05,  5.7041e-05, -1.9000e-05, -6.0511e-03,
        -2.0366e-04,  6.4101e-06,  4.9507e-05, -7.3558e-03,  1.3614e-04,
        -7.8000e-04, -6.4905e-03, -9.3928e-05, -5.7010e-05,  6.9698e-05,
        -5.9484e-05,  1.2741e-04, -6.5302e-03, -5.3276e-05, -2.0420e-04,
        -3.6180e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2639e-04, -9.9715e-03,  3.6652e-05,  8.1601e-04, -9.5767e-03,
         2.4388e-04, -1.6375e-04, -2.7450e-04, -8.1095e-04, -8.4867e-05,
         7.8473e-06, -4.8264e-04, -7.9035e-03, -4.0495e-05, -9.6296e-05,
         8.1600e-05, -2.3280e-05,  6.3381e-05,  8.5680e-05, -7.8257e-03,
        -1.9137e-04,  5.8929e-05, -1.7573e-04, -2.1328e-03,  9.2952e-05,
        -1.2455e-03, -1.4319e-04, -8.5457e-03, -1.6029e-04, -8.3097e-05,
        -3.0087e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0147e-03, -8.0778e-03,  1.6053e-05,  6.5045e-04, -7.6945e-03,
         1.5267e-04, -2.2709e-04, -1.5500e-04, -1.0130e-03, -1.3111e-04,
        -6.7558e-05, -6.0547e-04, -8.6886e-03,  1.4720e-05,  6.8938e-05,
        -7.8120e-06,  4.5064e-05,  1.2150e-04, -5.6598e-06, -7.2931e-03,
        -3.7868e-04,  4.4578e-05,  1.2108e-05, -8.8598e-03, -2.6176e-07,
        -1.6391e-03, -1.6410e-04, -2.9035e-05,  3.8184e-04, -3.0214e-04,
         1.5882e-05, -1.4194e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5796e-04, -1.3927e-02,  1.0033e-04, -1.4570e-03, -5.0841e-05,
        -1.3189e-03,  1.0724e-05, -5.8127e-04, -1.1695e-02, -2.5210e-04,
         2.6784e-04, -5.7055e-05,  1.0861e-04, -5.2217e-05, -2.5226e-03,
         4.6413e-05, -1.8672e-03, -2.1516e-04, -1.4362e-02, -4.4575e-04,
         7.9559e-05, -3.4742e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1082e-05, -1.4951e-02,  1.9767e-04, -1.7402e-03, -2.2541e-05,
        -2.4387e-03,  1.5824e-04, -3.2872e-04, -1.6186e-02, -7.9945e-05,
         4.1204e-04, -9.4774e-05,  5.4969e-05,  1.4635e-04, -1.6329e-02,
         6.1987e-05, -2.5056e-03,  6.9510e-05,  1.9485e-04,  1.2364e-04,
         1.1946e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3503e-05, -2.3413e-02,  1.5644e-04, -2.6457e-03,  5.0950e-05,
        -2.7694e-03, -9.0458e-05, -6.3041e-04, -1.7223e-02, -2.2022e-04,
         3.7227e-04, -3.9125e-04, -6.1492e-05, -7.0461e-05, -3.5736e-03,
        -1.5950e-04, -5.1290e-04, -3.2885e-04, -7.8987e-04,  1.6937e-04,
         1.2072e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6607e-04,  1.5528e-02,  1.1661e-05, -8.4826e-05,  7.6552e-04,
        -1.9176e-05,  4.8805e-04,  1.7248e-04,  1.4717e-02, -6.1004e-05,
        -3.1552e-04, -6.4558e-05,  1.9051e-04, -1.2864e-05,  2.4836e-03,
         6.3590e-05,  3.1288e-05,  2.7979e-03, -8.2034e-05, -8.3506e-04,
         3.0562e-04,  1.2263e-04, -3.2562e-04,  1.2411e-04,  9.2524e-05,
         2.4674e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 2.3080e-04,  2.2082e-03,  1.8720e-04,  3.2824e-05, -1.5336e-05,
        -4.8750e-04,  3.2130e-06, -1.5560e-04,  4.2136e-05,  3.1176e-05,
         3.4435e-06,  2.2673e-04,  7.3249e-05,  1.4556e-03,  4.0186e-05,
         6.1288e-06, -1.3190e-05,  2.7150e-03,  2.5288e-04,  1.7563e-04,
         1.7816e-03,  6.6678e-05,  4.4218e-05,  2.9344e-04,  7.0760e-05,
         1.1989e-04,  1.8414e-04,  2.2017e-04,  5.9479e-05,  1.0171e-04,
        -1.2434e-05,  1.7243e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8273e-04,  1.3901e-03,  2.2489e-04, -7.3095e-05, -2.3323e-04,
        -9.0337e-04, -6.5168e-05, -1.8336e-04, -2.5800e-04, -1.2056e-05,
        -6.0066e-05,  2.7053e-04,  1.3314e-04,  2.2816e-03, -1.6852e-04,
        -9.0948e-07, -3.0318e-05,  4.4185e-02,  4.1585e-04,  1.9014e-03,
         1.0270e-05,  3.3168e-04,  1.5434e-04,  2.4224e-04, -1.1901e-05,
        -1.0543e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7806e-04,  1.5276e-03,  2.3262e-04,  1.1722e-04,  1.0970e-05,
        -5.8983e-04,  1.0896e-04, -5.4020e-05, -4.5773e-05, -2.3694e-05,
         1.7567e-06,  2.4348e-04,  6.8139e-05,  1.7598e-03,  3.1377e-05,
         1.0071e-04,  3.2561e-06,  2.8639e-03,  4.2748e-04,  1.6874e-04,
        -2.0043e-07, -5.4596e-04,  9.6637e-05, -1.6679e-04, -3.2260e-06,
         2.1515e-03,  2.4343e-03,  9.5219e-05, -5.4068e-05,  2.6470e-03,
         1.1643e-04,  8.0853e-06, -4.6661e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1850e-03,  1.8658e-04, -2.4236e-03,  1.2416e-04, -4.4445e-04,
        -5.9546e-02, -2.9158e-04,  5.1891e-07, -2.2628e-08, -3.0417e-03,
         3.2163e-05, -3.0932e-03,  3.2026e-04,  1.4113e-04,  5.0759e-05,
         2.2824e-05,  2.9112e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0778e-03,  1.4852e-04, -3.1212e-03,  4.8541e-05, -3.1806e-04,
        -4.8994e-02, -3.3737e-04,  9.4077e-05, -5.1523e-05, -3.8942e-03,
         4.4474e-06, -3.1350e-03,  3.1159e-05,  3.3334e-05,  1.5425e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.8436e-04, -3.9275e-05, -4.5178e-03, -3.0907e-05, -4.6895e-04,
        -6.4399e-02, -3.1847e-04,  4.5239e-06,  5.3377e-05, -4.0579e-03,
        -1.5104e-04, -4.8364e-03, -6.5561e-05, -6.5312e-05, -2.4270e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3886e-04, -2.0976e-02, -3.2047e-05, -2.1510e-05,  4.1002e-04,
        -1.6046e-02, -1.0257e-05, -7.6905e-05, -3.5209e-05, -1.6172e-02,
        -4.6398e-05,  3.6642e-04, -1.0584e-02,  8.5376e-06, -1.3220e-04,
        -3.8881e-04, -1.9354e-05, -1.4257e-05,  3.5143e-04,  8.7901e-05,
        -2.2537e-04, -3.4634e-05, -1.3942e-02, -2.3408e-05, -2.4151e-05,
        -1.4838e-05,  1.0375e-04,  5.7450e-05,  1.2785e-04, -1.4189e-04,
         1.2811e-04, -1.4158e-04, -6.5494e-05,  1.8923e-05,  8.7735e-06,
         1.4732e-04,  4.6551e-05, -1.8184e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6457e-04, -2.0567e-02, -8.9155e-05, -9.3321e-05,  3.8360e-04,
        -1.9770e-02,  4.1656e-05, -1.6019e-04, -1.0064e-04, -1.7828e-02,
         7.0538e-05,  4.5478e-04, -8.0195e-03, -5.2070e-05, -1.4048e-04,
        -2.8733e-04,  2.8836e-05, -3.0674e-05,  3.6692e-04,  7.2418e-05,
        -2.1361e-04, -3.2260e-05, -1.7767e-02,  8.3697e-05,  3.4627e-06,
        -1.0869e-04,  7.1214e-05,  5.6214e-05,  3.3967e-05, -2.7125e-04,
         1.1848e-04, -1.4404e-04,  3.9091e-05, -1.7852e-05,  1.1341e-04,
         1.2984e-05, -2.9359e-05,  1.1653e-04, -3.3362e-04,  2.1768e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3374e-04, -1.5145e-02,  2.2496e-05,  1.6725e-05,  3.7372e-04,
        -1.8405e-02,  2.2025e-05, -1.9507e-04, -9.5668e-05, -1.5911e-02,
         3.6595e-05,  3.1655e-04, -9.6975e-03, -3.7494e-05, -3.3315e-05,
        -2.6830e-04,  5.4077e-05,  7.8449e-06,  3.4116e-04,  3.0857e-05,
        -1.7889e-04, -2.3156e-05, -1.8717e-02, -5.7154e-05,  4.0951e-05,
         1.1253e-05,  8.4131e-05,  8.7031e-05,  3.4531e-05, -1.9611e-04,
         1.1129e-04, -1.0921e-04,  2.6772e-05, -3.9718e-05, -6.3128e-06,
        -8.1171e-05,  9.6701e-06,  9.9606e-05, -1.5268e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0033e-03, -1.4255e-02,  5.7456e-05,  4.1968e-05,  5.4699e-04,
        -1.5223e-02, -3.2100e-06, -3.9473e-05, -1.3000e-04, -1.4297e-02,
         1.9327e-05,  4.5014e-05, -3.9397e-05,  1.1156e-04, -7.5695e-04,
        -1.2770e-02,  3.2381e-05,  6.5394e-04, -1.0986e-02,  1.7569e-05,
        -1.5210e-05, -1.0792e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1740e-04, -1.4551e-02,  4.3518e-05,  1.6686e-05,  5.1416e-04,
        -1.2517e-02,  2.6951e-05, -5.3064e-05, -2.9081e-05, -1.3490e-02,
         1.8400e-05,  1.6096e-05,  1.3573e-05, -1.3852e-02,  2.1817e-05,
         5.0095e-04,  1.2285e-04, -3.2760e-05, -5.0328e-05, -8.3312e-06,
        -4.2732e-04, -1.8200e-04, -9.2320e-03,  8.3490e-05, -3.9597e-05,
         4.3955e-05,  4.5026e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6897e-04, -1.2091e-02,  1.0997e-04,  6.2461e-05,  4.6727e-04,
        -1.1393e-02,  2.1466e-05, -2.3255e-06, -1.3009e-04, -1.2870e-02,
         4.0442e-05,  2.1083e-05,  1.9481e-05,  3.0339e-06,  9.4647e-05,
         6.4425e-05,  5.0109e-04, -7.9275e-03,  3.9165e-05,  2.2259e-05,
        -5.0709e-04, -9.5227e-05, -9.2166e-03,  6.0127e-05,  1.3931e-04,
         1.7451e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.3803e-03, -9.5900e-05, -2.9821e-05,  3.5477e-04, -2.3606e-04,
        -2.8328e-04, -1.2803e-04,  1.1124e-01, -1.7564e-05,  2.2664e-04,
        -5.6657e-05,  3.6649e-05, -1.0886e-04,  5.0628e-03,  1.3193e-04,
         9.0546e-05, -5.7998e-05, -1.2803e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4547e-04,  1.0089e-04,  9.9687e-06,  3.0158e-04, -2.3284e-04,
        -3.9873e-05, -2.7817e-05,  1.1014e-01, -2.2101e-04,  4.0064e-05,
        -5.1932e-05, -1.7135e-04,  8.5167e-05, -1.4359e-04, -1.4011e-04,
         4.0241e-04,  5.0244e-04, -8.4624e-05, -6.4961e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0977e-04, -5.6337e-03,  1.4934e-05, -2.7796e-05, -1.0407e-02,
         9.7300e-05, -1.5864e-05, -6.7649e-06, -1.1916e-02,  8.5232e-05,
         2.2840e-05,  6.6765e-06, -1.5162e-02,  2.5780e-05, -1.3512e-05,
         1.7482e-05,  6.1326e-05,  1.5337e-05, -1.3885e-02,  3.6399e-05,
         3.3809e-05,  4.5809e-07,  5.7321e-05, -7.8650e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3339e-04, -1.3800e-02,  3.5629e-05, -2.0569e-04, -2.0525e-02,
         1.2215e-04, -6.5175e-06,  5.7791e-05, -2.7943e-02,  4.0164e-05,
        -3.2537e-05, -4.7870e-06, -2.1998e-02,  3.9069e-05, -6.9711e-05,
         2.2017e-06, -9.0761e-05,  1.1304e-04,  2.3210e-07, -3.3881e-05,
        -8.6952e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.6602e-04, -2.2530e-02,  6.9463e-05, -2.2957e-04, -1.5919e-02,
         1.2290e-04,  3.5067e-05,  1.3302e-04, -1.0813e-02,  1.3898e-05,
        -1.6296e-05, -7.8826e-05, -1.7489e-02,  6.7760e-05, -3.3581e-04,
        -4.1950e-06,  1.4231e-04, -1.6146e-02,  5.0442e-05,  5.0184e-05,
         4.9769e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3855e-04,  7.7450e-05, -6.5087e-06,  1.9027e-05,  6.1341e-05,
         2.8201e-03,  8.7316e-05,  7.0824e-05, -7.1809e-05,  2.0268e-03,
         2.1748e-05,  2.4710e-07, -5.7098e-05,  1.6860e-06,  9.5571e-05,
         3.5978e-05, -4.1489e-05,  1.2397e-04,  2.0488e-05,  4.0048e-05,
         6.1591e-05, -2.7183e-04,  3.1307e-06,  1.4585e-04, -3.0817e-05,
        -5.2996e-05,  3.5441e-05,  6.2032e-05, -1.4921e-06,  3.3355e-05,
         9.5095e-05,  4.3699e-03, -1.1443e-05,  8.6271e-06, -6.0354e-05,
         1.2123e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5128e-05,  3.6202e-05,  4.3803e-05, -5.9902e-05,  7.5940e-05,
         3.1611e-03,  1.0088e-04,  3.6829e-05, -1.2179e-04,  3.8646e-03,
         7.4104e-05,  1.2003e-05, -2.8760e-05,  9.6815e-05,  1.6694e-04,
         1.0464e-04, -1.8030e-05,  7.2400e-05, -2.5087e-05, -1.5880e-05,
         1.6463e-04, -2.8932e-04,  2.4891e-05,  1.3676e-04,  3.4673e-06,
        -3.5511e-05,  9.4024e-05,  6.0229e-05,  3.2094e-03, -8.0354e-05,
         1.6831e-04,  1.7718e-04,  4.2081e-05, -1.6531e-04,  2.0568e-04,
        -1.4677e-05, -1.0813e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6445e-04,  8.6596e-06, -1.7605e-05, -1.5104e-05,  6.2929e-05,
         3.8380e-03,  5.5571e-05,  9.3271e-05, -7.9209e-05,  2.9807e-03,
        -2.4906e-05,  4.1864e-05, -2.6533e-05, -6.7539e-05,  7.8174e-05,
        -1.4140e-05,  9.2469e-06,  9.0762e-05, -1.3115e-05,  5.5797e-05,
         4.9980e-05, -2.9857e-04, -2.7275e-05,  1.1251e-04, -6.1623e-05,
        -4.5603e-06, -3.7070e-07,  1.0126e-05,  3.0296e-03,  2.2419e-04,
        -3.0388e-04,  3.0551e-04, -4.2488e-05,  8.5699e-05, -2.9563e-05,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4947e-04, -2.9856e-06, -9.7339e-06,  4.1047e-05, -2.9559e-05,
        -1.7176e-04, -2.9368e-02,  7.2507e-05, -8.7473e-05, -5.8813e-05,
        -4.5939e-05, -3.4428e-04, -2.4417e-05,  1.7506e-05, -1.7234e-02,
        -3.7690e-05, -3.1644e-05,  3.6575e-05, -2.9375e-02,  3.6244e-05,
         1.5124e-05, -6.3387e-05, -1.8344e-03, -1.6196e-04, -2.4888e-05,
        -1.8330e-04,  6.0855e-05, -8.5757e-05, -1.7053e-02,  2.9329e-05,
        -1.8755e-04, -4.2483e-05,  2.8458e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9404e-04, -1.8531e-04, -6.3376e-05,  1.0619e-04,  8.7168e-05,
        -2.2100e-04, -5.0838e-02,  4.7157e-05, -1.0347e-04, -1.8286e-05,
        -4.8555e-05, -6.2730e-04,  6.3745e-06,  3.6448e-05, -3.8151e-02,
         9.7920e-05, -3.9425e-05, -6.9242e-05, -3.4543e-02,  9.3100e-06,
        -7.0105e-04, -8.5277e-05, -4.1050e-05, -1.6385e-06,  5.8621e-05,
        -4.5847e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.8437e-04, -5.6998e-05,  4.6289e-07, -4.4865e-05, -9.8764e-06,
        -2.8143e-04, -3.4528e-02,  4.4327e-05, -1.1534e-05,  2.4887e-05,
         7.0150e-06, -2.5686e-04,  3.9331e-05,  8.0282e-05, -2.9512e-02,
        -6.9483e-05, -4.8313e-05, -7.7692e-05, -2.3562e-02,  2.7815e-05,
        -5.5754e-05,  7.5276e-05, -9.6534e-06,  3.9646e-05,  3.6598e-04,
        -9.2854e-04,  8.1176e-05, -3.5850e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7473e-03, -3.3843e-02, -1.0955e-04, -3.8217e-07,  6.9308e-06,
        -2.5970e-05,  1.4440e-04, -2.0949e-02,  1.9115e-05, -1.1070e-04,
         5.2126e-05, -5.4536e-05,  3.9802e-04, -1.2582e-02,  4.3299e-05,
        -7.0910e-06,  8.4340e-05, -1.1822e-03,  9.0758e-05, -2.9376e-04,
         3.8733e-06, -3.3836e-02,  7.2713e-05, -2.8078e-04, -1.4497e-04,
        -4.4266e-05,  8.1355e-06, -2.3031e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-9.0039e-04, -9.2875e-05,  2.9975e-05, -4.8507e-05,  6.0737e-05,
        -4.1879e-02, -6.3972e-05,  1.0203e-05,  1.3096e-04,  1.1967e-05,
         1.7787e-06,  4.2979e-05, -8.5147e-05,  6.7291e-05, -1.9750e-05,
         8.6331e-06, -3.3747e-05,  5.9333e-05, -1.5235e-03,  4.1002e-05,
         4.1968e-04, -3.1246e-02,  6.5516e-05, -6.1342e-05,  9.2961e-06,
         1.0643e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.5455e-04, -7.2865e-03, -3.7230e-06, -5.5779e-05, -1.1301e-02,
         4.3747e-05, -1.8168e-05, -6.8615e-06, -1.4426e-05, -9.7016e-06,
         4.5581e-05,  2.6750e-05, -9.1774e-06, -1.4455e-06, -9.9648e-07,
        -3.2853e-05,  8.9798e-06, -3.1897e-04,  1.9482e-05, -1.9716e-05,
         2.5924e-06, -2.8166e-03, -1.8207e-05,  2.8048e-05, -8.1843e-05,
        -8.2200e-03,  1.8291e-05,  1.1008e-05,  1.5932e-05,  4.9890e-05,
        -1.1337e-05,  6.6603e-07,  1.3609e-06, -6.1414e-06, -7.9109e-03,
         8.4004e-06, -2.5403e-05, -5.7650e-03,  2.7661e-05, -1.7532e-05,
         1.2644e-05, -3.0368e-05,  1.5738e-05,  5.5393e-07,  3.0778e-05,
         1.8512e-05,  6.9352e-06,  8.1770e-06, -6.4507e-05,  3.5123e-06,
        -7.8581e-04,  1.2441e-05, -2.2241e-05, -9.9572e-03, -1.3098e-05,
         3.9479e-05, -3.3178e-05, -9.4973e-03,  2.2620e-05,  8.2853e-06,
         6.9356e-06,  3.5944e-05, -1.3965e-05,  2.4518e-05,  2.4481e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8822e-04, -5.8664e-02, -9.6349e-05, -2.4283e-04, -3.2208e-02,
         4.7688e-05, -1.5843e-04, -3.7252e-05, -1.9359e-05,  2.3455e-05,
         5.5627e-05,  6.7642e-05, -8.1823e-05, -6.9906e-05, -5.2057e-05,
        -1.5733e-04, -8.1595e-05, -3.5247e-03, -1.0199e-05, -6.3063e-05,
        -1.2989e-04, -4.5584e-02, -1.3993e-04, -2.5888e-05, -4.0886e-04,
        -2.5029e-02, -5.7217e-06,  3.6309e-05, -6.2795e-05,  1.1486e-04,
        -3.6693e-05, -6.0225e-05,  6.7554e-06,  1.3457e-05, -3.4509e-03,
        -1.1905e-05, -1.6107e-05, -9.1020e-05, -3.4283e-06, -7.5995e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3077e-04, -4.2489e-02, -3.6494e-05, -1.8459e-04, -1.7090e-02,
         3.7792e-05, -9.3368e-05, -2.8562e-05,  1.0940e-05, -1.5197e-05,
         8.1272e-05,  9.9342e-05, -1.2409e-04, -5.1910e-05, -2.6309e-05,
        -9.4178e-05, -1.0587e-04, -1.0511e-03,  2.1285e-05, -1.9390e-06,
        -9.7461e-05, -2.1414e-02, -1.2128e-04, -4.6251e-05, -3.1943e-04,
        -5.6127e-02, -1.7445e-05, -2.2121e-05, -1.0662e-05,  8.2607e-05,
        -3.7630e-05, -1.1480e-05, -1.2312e-05, -2.6247e-05, -4.5941e-03,
        -7.3191e-05, -1.1879e-04,  1.8504e-05, -1.7714e-04, -1.1329e-04,
        -5.5804e-03, -2.0796e-05, -4.4291e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7454e-03, -6.3932e-05, -2.2717e-05, -9.0929e-05, -1.6890e-03,
         2.4181e-04,  4.5936e-05, -8.3833e-05,  5.4468e-05, -6.2756e-02,
         1.7563e-04,  3.1551e-05, -7.3616e-05, -2.3735e-05,  1.3507e-04,
         7.9809e-05,  4.6241e-05, -3.8703e-02,  9.6851e-05,  9.3889e-05,
         1.9957e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7348e-04, -1.7435e-04,  1.7584e-05, -2.9305e-05, -2.6498e-03,
         2.6956e-04,  2.8288e-04, -4.3828e-05,  1.9090e-05, -4.8115e-02,
         7.3283e-05,  8.2913e-05, -3.1381e-04,  7.3922e-05, -7.1137e-02,
         2.1466e-05, -2.0832e-05,  2.4002e-04,  1.4335e-04, -1.8041e-05,
         5.5716e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8647e-04, -1.9148e-05, -2.4467e-05,  2.9946e-05, -3.1030e-03,
        -1.0192e-05,  1.2739e-04, -1.0204e-04, -1.6003e-04, -3.9844e-02,
        -2.7320e-05, -5.8522e-06, -6.9356e-05, -1.1753e-05, -6.2738e-02,
        -1.2804e-05, -7.9842e-06,  9.6419e-05, -3.5694e-04,  5.9036e-04,
        -3.7721e-03,  2.6516e-05,  3.8407e-05, -3.6514e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1933e-03, -1.6566e-04,  4.8898e-05,  5.2981e-05,  6.2680e-05,
        -5.1061e-04, -4.3641e-02,  1.7020e-04,  6.9074e-04, -1.6613e-04,
         9.9295e-06,  4.3315e-05, -4.3564e-03,  2.2723e-05,  3.4997e-04,
        -1.0060e-03, -2.2557e-05, -7.8110e-02,  1.8563e-04, -7.8900e-05,
         6.2913e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8441e-04, -3.7692e-04,  1.6109e-04, -2.5831e-05,  2.0418e-04,
        -6.0713e-04, -1.8190e-01,  2.6135e-04,  7.8831e-04, -8.0333e-05,
        -1.3709e-04, -2.0795e-04, -1.1615e-02,  1.7770e-04, -1.0135e-04,
        -1.3429e-04,  2.2712e-06,  6.8552e-05, -1.4881e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4340e-03,  1.0794e-05, -8.8857e-06, -2.6921e-06,  6.8766e-05,
        -1.1479e-04, -4.2293e-02,  4.5860e-05,  5.8606e-04, -1.1693e-05,
        -2.2560e-05, -6.3743e-06, -2.2984e-03,  8.2531e-07,  8.2769e-05,
        -3.6464e-04,  1.0679e-04, -3.7618e-02, -5.2130e-05,  7.3902e-05,
        -2.1542e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4948e-04, -8.7547e-03, -2.5497e-06, -2.6386e-05, -2.3902e-05,
         6.6974e-05, -1.2508e-02, -2.6870e-05,  7.7398e-05, -1.0377e-02,
         2.2207e-05, -4.2117e-05, -6.6813e-06, -7.9165e-03,  1.7481e-05,
         2.4857e-05, -4.8288e-05, -4.2896e-05,  3.4585e-05, -3.0081e-03,
        -2.5481e-05,  2.1011e-06,  3.9478e-05, -1.9109e-05, -7.8909e-03,
         1.8123e-05, -1.5653e-05, -1.4569e-05,  6.0632e-05, -7.6732e-03,
        -1.4257e-05, -9.3692e-06, -8.3133e-03, -1.7483e-05, -2.7424e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2286e-03, -2.2345e-02, -4.9958e-06, -4.1830e-05, -6.1708e-05,
         7.2456e-05, -1.2798e-02, -1.6549e-05,  3.3148e-05, -1.7968e-02,
         1.4220e-05, -4.3619e-05,  3.3575e-05, -3.5669e-03,  4.7888e-05,
        -3.2299e-06, -3.1490e-06, -2.4319e-05,  5.2296e-05, -6.4541e-03,
        -3.7987e-05, -1.2655e-05,  6.2973e-06,  3.0029e-06, -9.0976e-03,
         4.8468e-06, -4.1818e-05,  1.4500e-05,  5.2057e-05, -1.2596e-02,
         3.2849e-05, -3.3608e-05,  6.5254e-05, -1.3366e-02, -1.8596e-05,
        -2.2052e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #350: [tensor([-5.6704e-05, -1.5454e-04, -1.4884e-05,  5.1046e-03,  5.1567e-05,
        -1.3407e-04,  8.9298e-05, -4.5234e-06,  1.5072e-01,  4.3860e-05,
        -5.7434e-05,  4.1646e-05,  1.1375e-04,  4.0508e-05, -1.2309e-05,
         1.4085e-04, -6.6775e-05, -5.7744e-05,  7.8318e-05,  5.8224e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1837e-04, -1.5620e-04, -8.9097e-06,  4.8130e-03,  7.9974e-05,
        -3.8639e-05,  3.0849e-05, -6.9466e-05,  4.8677e-02, -2.4295e-05,
        -5.6750e-05, -1.9891e-05,  1.1140e-05,  6.4226e-05, -3.2203e-05,
         8.3505e-05,  1.3613e-02, -4.5236e-05, -6.5597e-06,  1.2698e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2994e-03, -2.2059e-02, -1.2639e-04,  5.8846e-06,  5.6076e-04,
        -5.7866e-02,  3.9595e-05, -4.7822e-05, -4.3355e-04,  6.6370e-06,
         6.7758e-05, -1.1649e-04,  1.2984e-04, -2.3821e-03,  2.6632e-04,
        -8.0767e-05,  8.0774e-05, -4.9931e-02, -2.4973e-05, -3.6960e-05,
        -4.4589e-05,  4.9135e-05, -2.8392e-02,  2.1328e-06,  8.4135e-05,
        -1.4275e-04, -4.6747e-05,  1.3375e-04,  5.6101e-05,  4.7527e-05],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5248e-04, -4.3478e-02, -9.3756e-05,  6.3063e-05,  4.6429e-04,
        -6.0084e-02, -2.0987e-05, -5.7838e-05, -3.7511e-04, -3.0246e-05,
         5.4202e-05, -1.9083e-04,  4.3499e-05, -1.3801e-03,  1.9650e-04,
        -3.9201e-05,  6.8367e-05, -3.5322e-02,  7.7025e-05, -5.3954e-05,
        -7.4715e-06, -2.5508e-05, -3.8655e-02, -6.9582e-05, -5.2400e-05,
         1.8628e-05, -8.2700e-05, -1.1762e-05,  1.9740e-05,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6494e-04, -1.8978e-02, -2.5577e-06,  1.6939e-05,  4.7321e-04,
        -3.3264e-02,  1.2869e-06, -4.5613e-05, -2.4711e-04,  3.6454e-05,
         5.2719e-05, -1.8946e-04,  1.4964e-04, -8.0894e-04,  1.9513e-04,
        -3.3873e-05,  1.0263e-04, -3.5818e-02, -1.5257e-05,  8.3733e-06,
        -4.9480e-05, -1.4000e-05, -3.9127e-02, -6.7172e-05,  4.4099e-05,
         5.9080e-05, -1.0422e-05, -1.9978e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0886e-04,  1.6817e-04, -6.2669e-05,  1.0087e-04,  1.3122e-06,
         1.9550e-04, -2.8370e-05, -6.0676e-05, -5.9452e-05,  3.0426e-04,
         6.6182e-05,  3.7566e-04,  1.2544e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.0038e-04, -1.8742e-05, -1.9826e-04,  5.3169e-05,  2.8783e-05,
         2.3820e-05, -6.6377e-05,  4.0629e-05,  6.5092e-04,  2.6182e-04,
         1.7729e-05,  9.3939e-05,  7.4295e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1136e-03,  2.8972e-05, -1.8457e-04,  1.0353e-04, -1.1323e-05,
         1.5713e-04, -1.1640e-05,  3.9719e-05,  2.8992e-04, -1.9264e-04,
         1.4788e-04, -5.8802e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.4870e-04,  1.2121e-04, -1.1039e-04,  1.8004e-04, -1.0439e-04,
         4.7729e-04,  1.2141e-01, -6.4396e-05, -7.2603e-04, -3.9277e-05,
        -4.5094e-05, -6.8894e-05, -3.7074e-05,  1.8877e-02, -9.2068e-05,
        -7.6270e-05,  4.5272e-05, -9.9601e-05, -1.4877e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2859e-04,  1.4978e-04, -2.3211e-04,  6.2011e-05, -9.1743e-05,
         4.9751e-04,  7.8275e-02,  3.4595e-05, -5.3294e-04,  9.1163e-05,
        -1.4636e-04, -9.4489e-05, -7.2388e-06,  1.3179e-02, -6.3354e-05,
         5.3262e-05,  9.1212e-03,  2.0035e-05, -5.2298e-05,  1.5106e-04,
         1.7194e-04,  1.4527e-04,  5.8953e-06, -8.2094e-05, -6.9658e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0327e-03,  1.2475e-05,  2.1485e-04, -1.1780e-04,  1.0219e-04,
        -5.0791e-04, -5.1843e-02,  9.2871e-05,  6.4792e-04,  3.6334e-05,
         8.4132e-05, -9.8184e-05,  8.7209e-05, -7.6183e-03,  4.6711e-05,
        -3.9559e-06, -9.9752e-02,  7.3620e-05,  6.3383e-04, -1.7851e-04,
         4.9053e-05, -9.5771e-05,  1.3645e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5346e-04,  6.2045e-05, -1.0525e-04,  3.5754e-05, -5.8527e-05,
         1.1380e-03, -1.5635e-05, -2.6139e-04, -5.1830e-05,  5.5941e-05,
         1.2938e-04, -3.2273e-05, -5.9649e-05,  4.3147e-06, -4.4846e-05,
         2.0887e-02, -4.1805e-05,  2.4703e-03, -6.1435e-05,  4.6672e-05,
         1.9978e-05,  4.8012e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #400: [tensor([-1.2613e-03, -2.2099e-02,  5.3364e-05, -2.8698e-04,  7.3488e-05,
        -4.8498e-05,  1.1668e-05, -1.7698e-05,  2.9710e-05, -4.9794e-02,
         3.7915e-05,  1.1889e-05,  2.6000e-05,  5.3102e-05, -3.7635e-02,
         5.2859e-05, -2.9005e-04, -3.7698e-05,  1.2720e-04, -1.9543e-02,
        -8.2163e-06, -4.9600e-05,  3.0774e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.9486e-04, -5.2734e-02, -5.8336e-05, -2.3468e-05, -8.3475e-07,
         2.7777e-05, -1.3793e-06,  1.0397e-05,  4.9534e-05, -3.4568e-02,
         6.3296e-05,  8.3226e-06,  5.7917e-05,  8.7739e-07, -3.3480e-02,
        -7.0107e-05, -1.9484e-05, -3.1803e-05,  8.0681e-05, -5.8997e-04,
         8.5349e-05,  2.3292e-05, -4.5666e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7347e-04,  3.6582e-02,  1.2343e-04,  3.4269e-04,  6.4918e-05,
         8.6570e-05,  1.3104e-04, -3.0201e-06, -1.0959e-04,  5.5047e-02,
        -3.5401e-05,  6.2581e-05, -3.4109e-05, -4.1503e-05, -8.3125e-05,
         8.0546e-04,  7.4781e-02,  3.5409e-05, -5.8060e-05,  2.5887e-04,
         2.0153e-05, -1.1884e-05, -6.2976e-05, -6.3709e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3101e-05, -4.8949e-02, -2.7379e-06, -9.7099e-05,  9.5487e-05,
         8.3701e-06,  2.1549e-05, -4.3635e-05,  5.3862e-05, -2.1293e-02,
         8.5369e-05, -3.6613e-05,  1.1770e-05,  6.5208e-05, -2.3443e-02,
         2.7992e-05, -1.4422e-04, -1.2664e-05,  6.1602e-05, -1.0623e-03,
         5.8978e-05, -3.7938e-05, -1.6923e-02,  1.4917e-05, -7.1038e-06,
         2.2925e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6583e-04,  9.9036e-03,  1.2581e-04,  2.2673e-06,  1.8784e-04,
         1.2086e-04, -4.5133e-05, -9.5399e-05,  2.3645e-04,  7.4745e-03,
        -5.2657e-05, -3.9214e-05,  4.2287e-05,  1.2630e-01, -7.1003e-05,
         4.4174e-04,  2.4432e-04,  6.1623e-05,  2.5672e-04, -1.4611e-05,
         2.0842e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1837e-04,  7.4271e-03, -4.0639e-05,  3.9100e-05,  2.3981e-04,
         9.3189e-05, -6.8902e-05, -1.2453e-06,  2.0073e-04,  1.1859e-02,
        -8.3457e-05,  1.5046e-05, -1.0847e-04,  1.3931e-01, -7.0476e-05,
         9.9343e-04, -4.2093e-05,  9.3961e-05,  1.9179e-05,  1.2945e-04,
        -3.5002e-05, -2.0701e-05, -5.8241e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5328e-04,  9.7383e-03,  1.3783e-04,  6.9179e-05,  1.4661e-04,
         9.6118e-05,  6.0651e-05,  1.1483e-04,  1.9681e-04,  1.8809e-02,
        -2.1084e-04, -3.7219e-05,  3.6724e-06,  9.8940e-02,  2.1678e-04,
         5.4574e-04,  1.2211e-04, -1.6593e-04, -4.7955e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.7830e-04,  3.3291e-05, -3.2179e-06,  5.7207e-05, -8.3592e-05,
         9.8419e-02, -4.4120e-05,  1.0576e-05,  1.4850e-06, -8.5291e-05,
         3.1800e-03,  5.1433e-06,  3.7720e-05,  2.4033e-02, -4.2832e-06,
        -2.4233e-05,  3.9406e-05, -3.2351e-04,  1.8246e-02,  6.0985e-06,
         3.6128e-05, -9.8928e-05,  2.0419e-05, -5.1414e-05, -5.5465e-06,
         5.5670e-05,  7.0863e-05,  6.1811e-03, -2.0006e-05,  1.3325e-05,
         1.9453e-05,  9.0484e-03, -9.7401e-05, -2.4375e-05, -4.1249e-05,
         2.7252e-04, -6.2094e-05, -6.8587e-06,  5.6888e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8163e-03, -4.3314e-06,  1.1194e-06, -1.1937e-05,  9.0552e-05,
        -6.7835e-02,  2.4683e-05, -6.6364e-05, -2.2831e-05,  2.9526e-05,
        -2.3671e-03,  3.8315e-05,  7.9867e-06, -2.5129e-02,  1.0097e-04,
         2.9163e-05, -5.1880e-06,  3.0396e-04, -2.8212e-02, -5.8429e-06,
        -7.4847e-05,  1.0823e-04,  1.2966e-05, -4.1237e-05, -3.2141e-05,
         3.3546e-05,  1.1890e-05, -7.8295e-03,  2.5731e-05,  5.2635e-05,
         3.5135e-05, -2.6469e-05, -1.5627e-05, -1.4722e-02,  8.2962e-06,
         7.6840e-05, -6.0866e-02,  4.8020e-05, -4.1251e-05, -1.8451e-04,
         2.6656e-05, -1.5962e-05, -4.9787e-05,  2.5208e-05, -2.5738e-05,
         2.6390e-06, -5.4317e-04, -4.4885e-05, -5.7229e-05, -3.4033e-05,
         8.0761e-07,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.4528e-04, -4.9686e-05, -2.1339e-05,  4.9787e-05,  4.0046e-05,
        -5.8321e-02,  6.1945e-05, -1.3137e-05, -3.4520e-05,  2.0170e-05,
        -3.1787e-03,  2.9964e-05, -1.2416e-05, -2.2256e-02,  7.2224e-05,
         3.1185e-05,  3.6051e-06,  2.2788e-04, -2.5790e-02,  1.5529e-05,
        -2.3955e-06,  8.0397e-05,  7.1127e-05, -2.9102e-05,  1.1149e-05,
        -2.5015e-05,  1.2658e-05, -7.2677e-03,  8.3122e-05,  2.3814e-05,
         8.9884e-05, -4.3201e-05, -4.9579e-05, -5.9502e-03, -5.0536e-05,
         6.1457e-05, -3.7494e-02,  4.6845e-05,  2.1657e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9934e-04,  1.3987e-05, -1.6095e-05, -8.3921e-06, -1.0683e-04,
         5.7994e-02, -3.1783e-05,  4.0860e-06,  1.3710e-05, -4.0315e-05,
         2.2472e-03, -2.0835e-05,  5.1983e-05,  2.4369e-02, -1.3506e-04,
        -4.8892e-05, -1.4652e-05, -3.6285e-04,  1.1831e-02, -1.1481e-06,
         4.3326e-05, -8.2626e-05,  1.6720e-05,  6.1661e-06, -6.4576e-07,
         5.7982e-05, -2.1183e-05,  8.0410e-03, -6.2647e-05,  3.7157e-05,
         1.0370e-05, -1.5897e-05,  7.4621e-04,  6.0466e-02, -8.8321e-06,
        -1.9276e-05,  5.9413e-06, -9.0413e-05,  1.0201e-02, -2.0038e-05,
         7.3181e-06, -2.6097e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1553e-04, -1.2981e-05,  1.2140e-05, -5.3165e-05,  7.3414e-06,
        -5.2203e-02,  1.3976e-06, -7.3679e-06, -4.4297e-05, -1.5171e-05,
        -6.9457e-04,  3.0049e-05, -1.9557e-05, -2.2218e-02,  2.0721e-05,
         2.8856e-06,  4.3716e-06,  2.5137e-04, -8.7323e-03,  1.7035e-08,
        -1.5783e-05,  4.1276e-05, -3.1553e-05, -3.7500e-05,  3.8710e-07,
        -5.1216e-05, -2.8180e-05, -2.8951e-03,  2.3738e-05, -3.7141e-06,
         1.1667e-05, -3.1724e-02,  1.4956e-06, -2.0132e-04, -2.4348e-05,
        -1.0523e-04, -6.1747e-05,  5.4435e-05, -1.3594e-04, -5.8968e-03,
         2.4823e-05,  3.8213e-06, -1.9699e-05, -2.0978e-05, -1.4259e-04,
        -2.4068e-05,  1.8657e-05, -3.7352e-02, -6.5751e-06, -1.5217e-04,
        -3.5300e-05,  1.7770e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.5276e-03, -3.3346e-02, -1.2512e-05, -7.3188e-06,  1.7734e-05,
         5.6549e-05, -4.7709e-05,  2.6499e-05, -2.5004e-05,  8.9412e-05,
        -1.0536e-02,  2.3218e-05, -3.7531e-05,  2.8532e-04,  1.7325e-05,
        -4.5285e-05, -4.7911e-02,  1.2172e-05,  2.0792e-05,  7.0849e-05,
        -2.2168e-02,  4.4397e-05,  1.6163e-05,  2.3528e-05, -2.3646e-02,
        -3.3538e-05,  2.7597e-04,  1.9906e-05,  4.6966e-05,  6.9560e-05,
        -9.8928e-03,  1.9281e-05, -3.6318e-06, -6.1685e-05,  3.6508e-05,
         1.7781e-05, -2.0644e-02,  5.0890e-05,  1.2570e-05,  5.9734e-06,
         3.7303e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8316e-03, -6.0859e-02, -4.0343e-05, -5.8277e-05, -4.2251e-05,
         1.4182e-05, -3.4033e-05, -9.6046e-05, -2.9254e-06,  7.5575e-05,
        -2.2937e-02, -1.9245e-05, -1.0873e-05,  1.9673e-04, -6.4736e-06,
         1.6397e-05, -2.9901e-02,  9.6242e-06, -4.7430e-06,  7.5852e-05,
        -3.0000e-02, -2.4160e-05,  4.8668e-05, -2.5383e-05, -2.6112e-02,
        -2.8865e-05, -2.4794e-05,  2.4147e-06, -2.1184e-03, -6.8038e-05,
         1.2531e-05,  1.5268e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0252e-03, -2.2255e-02,  8.1048e-05, -4.7279e-05,  9.9392e-06,
         6.4797e-05, -8.1885e-06, -5.9961e-06, -9.7075e-06,  8.4482e-05,
        -3.2680e-02,  5.6770e-06, -4.3425e-05,  1.9380e-04, -7.7507e-06,
        -2.1676e-05, -4.3130e-02,  7.1290e-05,  3.9755e-05,  1.5473e-05,
        -4.3864e-02, -2.5890e-05,  2.7000e-05,  2.5437e-05, -4.0692e-02,
         7.9841e-05, -2.3586e-05,  1.8085e-04, -4.4727e-05, -6.3696e-06,
        -1.7666e-02,  4.5422e-05, -2.7096e-06,  1.8557e-04, -5.3024e-06,
         1.1974e-05, -1.0954e-05,  5.1992e-05, -2.3352e-05,  9.1137e-06,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4823e-05, -9.1789e-02,  1.5555e-05, -3.0756e-05,  1.6482e-05,
         4.3699e-05,  4.7530e-05,  7.0734e-05,  8.4148e-05,  7.4249e-05,
        -2.7273e-02,  4.3048e-05, -2.1035e-05,  3.4380e-04,  1.2142e-05,
         6.3297e-05, -4.2856e-02,  1.5089e-05, -9.7607e-06,  6.6010e-05,
        -2.8412e-02, -8.2374e-05, -1.6597e-05,  3.7423e-05, -2.0511e-02,
         5.0565e-05, -4.2051e-04,  4.5391e-05, -1.2459e-04, -5.3783e-05,
         6.4902e-05, -1.0991e-05, -1.5063e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3665e-04, -2.9558e-02,  9.7271e-06, -3.5953e-05,  3.9395e-05,
        -3.8392e-02,  1.7852e-05, -2.5748e-05, -4.0075e-05, -1.5870e-02,
         1.2899e-05,  3.1344e-05,  8.6214e-05, -1.1129e-03,  4.9985e-05,
         1.3736e-05, -3.5728e-05, -2.1094e-02, -4.7586e-05, -2.5033e-05,
         1.6530e-05,  5.7347e-06, -6.9553e-05,  3.2093e-06, -1.6067e-05,
        -6.2874e-05, -1.6692e-05,  4.0479e-07, -1.0583e-02,  2.1902e-05,
        -2.9291e-08,  1.0869e-06,  4.5607e-05,  1.5768e-04, -1.3978e-02,
         2.4608e-05, -5.4718e-06, -5.5515e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1640e-03, -1.8604e-02,  3.5285e-06, -4.0176e-05,  4.8068e-05,
        -2.9608e-02,  1.2186e-05, -2.8007e-05,  8.0523e-05, -4.4991e-02,
        -1.3348e-07,  9.8007e-06,  3.1736e-04, -5.9510e-04,  5.4707e-05,
         6.6056e-06,  3.9124e-05, -2.9766e-02,  2.3449e-05, -1.7193e-05,
         3.3503e-05,  2.7303e-05, -6.4091e-03,  8.5642e-05,  3.2998e-05,
         2.6171e-05,  6.3959e-05,  1.0921e-04, -5.8602e-03, -2.2318e-05,
         3.3106e-07,  2.0562e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9649e-05, -2.3212e-02,  3.8906e-05, -2.9629e-05,  7.4310e-05,
        -3.3643e-02,  5.9554e-06, -7.6661e-05,  7.9935e-06, -2.6896e-02,
        -1.9881e-05,  8.4215e-05,  1.4397e-04, -5.8827e-04,  5.2648e-05,
         3.7478e-05,  2.8565e-05, -1.8659e-02,  1.5192e-06,  4.5848e-05,
         3.3931e-06,  2.8973e-05, -2.4330e-02, -5.0161e-05,  4.0781e-05,
         3.0026e-04, -1.8014e-03, -1.8333e-05,  5.9368e-05, -1.4762e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3035e-04, -9.2151e-02, -9.2721e-06, -9.2161e-05, -1.0557e-06,
        -9.2846e-05, -7.6404e-02,  9.2220e-06,  1.1260e-04, -7.6231e-05,
         1.6984e-04, -6.2033e-05, -6.8750e-05, -3.9970e-05, -3.3418e-02,
        -3.6987e-05,  4.0938e-06, -3.8002e-05, -2.6570e-05, -1.0237e-04,
         1.4098e-04, -9.8312e-07, -2.6987e-02, -7.9835e-06,  1.1229e-04,
         2.0360e-05, -7.9796e-05,  7.6072e-07,  1.2217e-04,  5.4084e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2728e-03, -8.9654e-02,  1.3630e-04,  3.7047e-05, -3.1551e-05,
        -7.3395e-05, -5.4458e-02,  6.9968e-05,  9.1401e-05, -1.0144e-04,
         1.7325e-04, -5.0313e-05,  4.4626e-05, -4.5360e-05, -3.2944e-02,
        -1.4583e-05, -8.6259e-05,  4.6387e-05,  6.5423e-05,  6.3299e-05,
        -3.1828e-05,  9.0188e-05, -3.6690e-02,  9.3253e-05,  1.4217e-04,
        -9.0828e-05, -1.0447e-04, -2.6836e-05,  9.3216e-06,  4.8532e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.7541e-04, -4.9225e-02, -3.0859e-05,  6.2104e-05, -1.1206e-04,
        -1.2224e-04, -4.5269e-02,  1.8485e-04,  6.4349e-05,  8.6200e-07,
         9.0294e-05, -7.1670e-05, -1.6574e-05, -2.4396e-05, -8.2915e-02,
         2.4488e-05, -5.3715e-05,  3.2230e-05,  7.3416e-05,  1.9163e-05,
         4.4535e-05, -5.3240e-05, -5.1217e-02,  1.2800e-04,  1.3991e-04,
        -1.1952e-04, -1.3549e-05,  1.0944e-04,  4.6044e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2855e-04,  2.1479e-01,  1.6901e-04,  2.3903e-04, -8.3557e-05,
         1.4779e-05, -6.9519e-05,  4.5736e-04, -7.2291e-04,  1.3004e-02,
         2.3569e-06,  2.4063e-04,  5.7189e-05,  1.9560e-02, -9.8094e-05,
        -2.3597e-04,  9.7632e-04,  2.0617e-04, -1.4701e-05, -2.1315e-04,
        -1.3495e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1134e-03, -8.0919e-02, -1.1972e-04, -8.0383e-05,  8.0207e-06,
        -3.4196e-05, -8.9555e-06, -4.6296e-04,  4.6414e-04, -1.0826e-02,
         1.0783e-05, -5.1621e-06, -1.7716e-06, -1.0461e-01,  4.5086e-05,
         2.1379e-05, -8.3104e-04,  7.4638e-05,  4.5000e-04, -2.2292e-03,
         4.3172e-05,  7.8752e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 2.2034e-04,  6.3567e-02,  2.6221e-04, -6.6156e-05,  9.0547e-02,
         3.1308e-05,  2.4548e-04,  5.3010e-05,  3.9427e-05, -9.0270e-06,
        -4.4151e-05, -6.6778e-05,  5.6276e-05, -8.5733e-06,  3.9518e-06,
         9.6090e-02, -7.5425e-05,  1.8462e-05,  1.3378e-04, -2.3320e-05,
        -2.7483e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.9475e-04,  1.4591e-01, -3.7059e-05, -3.5659e-05,  6.8695e-02,
         1.4884e-04, -2.5978e-05, -1.1888e-04, -5.6182e-05,  6.1725e-04,
        -6.6191e-05, -1.8597e-04, -2.4006e-05, -5.3318e-05, -2.1854e-04,
         3.7293e-02, -2.5329e-05,  8.2178e-05,  2.8617e-05,  5.7468e-04,
         1.1557e-06,  4.0554e-05, -2.4670e-04,  3.3420e-05,  1.7370e-05,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1945e-05, -2.6801e-05, -1.2468e-05,  1.8889e-05, -1.1082e-04,
         1.8791e-04, -4.4482e-06, -5.1671e-05,  1.2950e-01,  3.7621e-05,
         2.1677e-04,  6.0254e-05, -3.6892e-05,  3.7441e-05,  2.0702e-03,
        -2.1424e-04, -7.6519e-05,  2.4704e-05,  2.1500e-06, -7.0752e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0727e-05, -4.5467e-05, -1.2405e-04, -1.7428e-04, -2.0801e-04,
        -9.2185e-05, -1.3950e-04,  2.1793e-04,  1.8191e-01, -1.6258e-04,
         7.3186e-05, -1.2720e-04, -1.8191e-04,  6.4906e-05,  1.6274e-03,
         6.6453e-05, -1.2630e-04, -5.9288e-05, -2.9444e-05, -6.9207e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8912e-04, -5.4743e-05, -3.9408e-05, -6.4328e-05, -3.3452e-04,
         1.8696e-04, -2.7814e-05, -6.7247e-05,  3.1549e-02, -1.3232e-04,
         1.3522e-04,  3.8432e-05, -1.3754e-04, -1.4681e-04,  6.1373e-04,
        -3.5945e-05,  1.6013e-01, -2.2027e-04,  6.2396e-05,  3.5389e-05,
        -1.3106e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7412e-05, -3.6112e-02,  1.8503e-05, -8.0807e-05, -1.5545e-02,
        -9.8123e-05, -5.7792e-05, -1.5355e-04, -2.7471e-06, -6.2852e-05,
         6.9235e-05, -6.4251e-02,  8.7537e-05, -1.2514e-04, -7.3853e-02,
        -6.4248e-05,  6.3138e-05, -7.9333e-05,  8.0175e-05,  8.6874e-06,
        -7.9204e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4860e-04, -5.0888e-02,  4.4160e-05, -1.4217e-04, -7.1847e-02,
        -1.3144e-04,  8.4450e-05, -1.5784e-04, -2.1402e-04, -1.6334e-04,
         1.3301e-06, -9.0585e-02, -1.6723e-05, -1.9995e-04,  1.8980e-04,
         1.4616e-06, -1.0385e-04, -6.2072e-05, -7.9394e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5741e-05, -8.8575e-02,  1.1380e-04, -4.2917e-05, -5.4662e-02,
        -2.8574e-04, -1.1980e-04, -7.8453e-05, -5.0462e-05,  6.4677e-06,
        -9.8924e-06, -1.0217e-01, -2.5533e-05, -1.1037e-04, -4.5742e-05,
        -1.4179e-04, -2.6945e-05, -1.3937e-05, -8.3895e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1751e-04,  1.8146e-01,  2.0831e-05,  1.3289e-04, -9.9950e-05,
        -1.6617e-04, -1.5607e-04,  1.2150e-04, -2.1152e-05, -4.1323e-05,
         1.7342e-05, -3.2028e-05, -1.9064e-05,  2.0497e-04, -6.0785e-05,
         4.3730e-05,  5.4468e-05, -6.2170e-05,  3.2883e-02, -1.0627e-04,
         4.9695e-04,  1.1207e-04,  5.5847e-06,  1.3271e-04, -2.0709e-05,
         4.2744e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4147e-04,  1.2343e-01,  1.1735e-05,  1.8192e-04, -1.0901e-04,
        -4.2697e-05, -9.1207e-06,  2.1455e-04, -1.5658e-04, -4.5508e-05,
         1.3747e-04, -6.8539e-05, -2.0135e-05,  1.4827e-04, -8.2082e-05,
         7.4056e-05,  5.3653e-05, -1.4780e-05,  4.7272e-02, -2.7687e-05,
         1.0912e-03, -6.9456e-05,  9.1977e-05,  6.7072e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.0606e-04,  1.5236e-01,  2.1359e-05,  3.0616e-04,  4.0519e-05,
        -8.8851e-06, -2.2457e-04, -5.1236e-05, -1.7049e-04, -1.3040e-04,
         1.4637e-04, -1.6798e-04,  1.1675e-04,  3.4454e-04, -2.0198e-05,
        -1.7185e-04, -4.5817e-08,  1.2269e-04,  1.4613e-01,  4.6058e-05,
         2.6476e-04, -4.9169e-05, -1.7868e-04, -2.5867e-04, -8.6197e-05,
         5.6130e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.5728e-04, -5.9366e-02, -4.3285e-05,  8.0636e-06,  1.3111e-04,
        -1.3404e-05,  8.3614e-05, -1.0947e-04, -1.2798e-01, -1.0863e-04,
         1.8816e-04, -8.9544e-06, -4.2522e-05, -1.0223e-02, -1.8373e-04,
         7.7655e-05,  1.1294e-04,  1.3669e-04,  5.1095e-04, -8.5718e-03,
        -9.9590e-05, -3.4753e-05, -6.8345e-05,  2.0019e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([-4.6225e-05,  2.0995e-06,  2.8942e-04, -1.5662e-05,  3.7701e-05,
        -8.0600e-05, -3.0935e-05, -4.8696e-06, -8.5212e-05,  6.6184e-06,
        -2.1451e-05,  3.0078e-05,  3.7963e-02, -7.3474e-06,  1.9916e-03,
         1.0569e-04, -8.9660e-05, -1.3754e-04, -5.0433e-05, -1.3227e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3558e-04,  6.4971e-05,  1.4196e-04, -1.1719e-04, -1.1937e-04,
        -4.1080e-04, -3.2056e-05, -1.7785e-04,  2.2451e-05,  4.7160e-05,
        -3.4944e-05, -1.1195e-04, -2.6589e-04,  8.3881e-04,  1.4720e-01,
        -9.0351e-05,  4.1004e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0707e-03, -1.5469e-01, -7.1484e-05, -2.0444e-05,  4.1927e-05,
         6.7836e-06, -8.3052e-05, -3.6307e-02,  2.0615e-04, -1.4141e-04,
        -4.7146e-06,  5.6035e-06,  2.6128e-05, -1.0182e-04,  1.5342e-04,
         2.5931e-06,  7.7474e-06, -3.3913e-02,  4.2364e-05,  1.0771e-04,
         5.0264e-05,  1.0875e-04,  3.8188e-05, -2.8043e-05,  3.3099e-05,
         5.2950e-05, -1.4351e-02,  3.8559e-05, -1.9242e-04,  2.1399e-04,
        -4.8373e-03,  2.2567e-05,  3.2809e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4017e-04,  4.6409e-02, -8.9364e-05,  2.8251e-04, -3.8311e-05,
         5.3453e-05,  2.4710e-05,  9.5484e-02, -1.4041e-04,  4.5127e-05,
        -2.7391e-05,  7.9139e-04, -1.1102e-04,  5.8277e-05, -8.9660e-05,
         1.4115e-05, -3.3070e-05,  6.8960e-02, -1.0269e-05, -1.5075e-04,
        -4.3484e-05, -1.2876e-04, -1.2854e-04, -1.1248e-05, -2.2548e-05,
        -6.5010e-05,  1.7240e-02, -4.9378e-05, -1.7188e-05, -3.8007e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.1444e-04,  1.6428e-01,  5.4536e-05,  2.9152e-04, -1.4765e-04,
         1.6313e-05,  5.9317e-05,  2.5798e-02, -1.3022e-04,  1.7568e-04,
         3.6636e-05,  9.1098e-04, -1.3554e-06,  4.2943e-05, -7.0323e-05,
         5.4283e-05, -9.5210e-06,  3.8459e-02,  4.4884e-05, -1.0673e-04,
         2.4595e-05, -1.5312e-04, -6.2502e-05, -4.1959e-05, -1.0820e-05,
        -4.3004e-06,  2.3767e-02,  7.8646e-05,  1.2234e-04, -5.3766e-05,
         8.3205e-03, -2.0192e-04,  2.5800e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.7664e-05, -5.1218e-05,  1.7440e-05, -5.1296e-04, -6.1244e-05,
        -1.0167e-01,  5.0079e-06, -2.0446e-05, -2.5294e-04, -3.8793e-02,
         8.0068e-05, -2.2830e-05, -6.6357e-06, -3.8470e-06,  1.4302e-05,
        -6.1831e-03, -4.3706e-05, -2.4680e-04,  8.0431e-05, -2.2259e-02,
        -5.5936e-05, -8.1940e-05,  1.0614e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0290e-04,  2.5969e-05,  6.1669e-05, -6.3929e-05, -5.1827e-05,
        -3.8160e-02,  6.6049e-05,  4.3721e-05, -3.4702e-06, -4.2479e-02,
         4.0116e-05,  2.1407e-05, -2.9580e-05,  2.8092e-05,  3.8002e-05,
        -1.0867e-02,  5.6411e-05, -5.6355e-06,  3.9143e-05,  1.0093e-04,
         3.8479e-05, -1.2364e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0751e-03, -1.3485e-04, -6.4436e-05, -2.8627e-04, -1.7476e-05,
        -5.7241e-02,  2.5015e-05, -4.7174e-06, -1.0600e-03, -1.0774e-01,
         3.0156e-05, -5.0933e-05, -5.0292e-05, -4.0296e-05, -5.0817e-05,
        -1.2333e-02, -5.7499e-05, -2.2879e-04,  2.5030e-05, -1.9467e-04,
        -6.2744e-05, -8.7815e-05, -3.7576e-05, -4.4350e-06,  7.1625e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.2840e-04, -5.4208e-02,  4.3515e-05, -1.8863e-06, -9.2339e-03,
         1.6683e-04, -6.9383e-05,  1.3087e-05, -6.1588e-05, -8.3960e-02,
         2.3295e-05,  1.5452e-05,  2.1907e-05,  8.7438e-06,  6.4709e-05,
        -4.4605e-03,  2.6679e-05, -3.0113e-05,  8.8034e-05, -1.4027e-04,
         1.5282e-05,  4.5621e-05,  5.6292e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2676e-04, -2.7320e-02,  4.4566e-05, -6.4528e-05, -5.6696e-02,
         3.9227e-05,  2.8101e-05, -3.4322e-05, -2.6777e-05, -6.1242e-02,
        -1.0610e-04,  2.9134e-05,  1.0259e-04, -2.3809e-05,  4.8523e-05,
        -9.8312e-03, -2.3964e-05, -1.5385e-04, -1.5896e-02,  1.7083e-05,
         1.6767e-06, -2.3651e-05,  3.2848e-05,  2.8187e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7472e-05, -6.9395e-02, -8.2367e-05, -1.1032e-04, -2.3881e-02,
         6.1701e-05,  5.8291e-06,  3.8832e-05, -9.2584e-05, -1.2500e-01,
        -1.8439e-05, -7.6316e-05,  1.6133e-04, -5.8509e-05,  1.2428e-04,
        -6.0134e-03, -8.6351e-05, -1.6696e-04,  1.8733e-04, -2.6314e-02,
         1.9354e-04, -7.1305e-05,  4.3537e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8256e-04,  2.1411e-01, -2.2170e-04, -3.6550e-05, -9.9627e-05,
         3.6619e-02, -3.4510e-04, -1.6174e-04,  9.6058e-05,  3.3047e-02,
        -9.7223e-06, -6.9071e-06,  2.6939e-04,  2.0901e-04, -2.6015e-04,
        -8.9850e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #600: [tensor([ 1.4087e-03, -4.2555e-02, -9.3319e-06,  2.5877e-04, -3.8565e-02,
        -8.5831e-05,  7.9816e-06, -3.7154e-05, -1.5081e-04, -6.7298e-02,
        -8.9603e-05, -3.3541e-05, -5.4804e-05, -6.2720e-05, -8.1508e-05,
        -7.4048e-06, -1.0020e-05, -4.9267e-05, -1.2953e-02, -8.1779e-05,
        -7.0065e-05, -8.5817e-05,  7.0230e-05, -3.8362e-02, -1.2164e-05,
         1.9157e-05, -4.3652e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1987e-04,  4.3828e-02,  7.1349e-05, -4.1382e-05, -3.5021e-05,
         1.0149e-05, -2.5450e-05,  1.4779e-04, -6.7222e-05,  8.3579e-05,
         3.6003e-06,  3.2567e-05, -8.2188e-06, -3.9044e-05,  9.5514e-03,
        -1.5021e-05,  4.5572e-05, -1.1976e-05,  1.2440e-04,  5.1251e-05,
         4.0364e-05,  1.5871e-02,  5.3901e-05,  1.7465e-05,  5.3477e-06,
        -1.8493e-05,  3.9969e-02, -4.3929e-05,  8.0213e-05, -1.0587e-04,
        -1.9952e-05,  7.6358e-02,  5.5906e-06, -1.0220e-04,  2.9481e-02,
        -2.4390e-05, -1.0271e-05, -2.0807e-05,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4480e-04, -1.7957e-01, -9.4217e-05, -7.8908e-05,  4.0331e-05,
        -2.4142e-05,  1.1809e-04, -9.7566e-05, -5.1039e-05, -4.0985e-04,
        -9.2445e-05, -1.9803e-05, -9.1281e-06, -1.4516e-04, -5.5405e-03,
        -6.2492e-06, -1.6657e-04,  1.9917e-04, -1.5685e-04, -1.2374e-04,
        -7.3488e-05, -1.2441e-02, -6.5045e-05, -1.8014e-05, -6.0667e-05,
        -2.0367e-05, -9.2531e-02,  3.4236e-05, -1.1489e-04,  3.2171e-05,
        -8.8453e-05, -1.4651e-02,  5.4970e-05, -4.4920e-06,  7.5876e-05,
        -1.7701e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.1956e-04, -4.0780e-02, -1.7034e-05, -1.0070e-05,  1.1781e-05,
        -1.7309e-05,  4.4287e-05, -1.6533e-05, -9.0983e-06, -2.7213e-04,
         1.5726e-05, -4.3512e-05, -7.2095e-06,  5.6748e-05, -1.4675e-02,
         1.1120e-05, -3.4879e-05,  6.3302e-05, -2.9919e-05,  3.5181e-06,
         2.8899e-05, -4.4572e-03,  3.5454e-05,  1.1046e-05, -6.6538e-05,
        -3.5174e-05, -6.2192e-02,  4.2883e-05, -7.6043e-05, -2.2250e-05,
        -6.6527e-05,  2.2281e-05, -6.2911e-02, -3.2209e-05,  6.3753e-05,
         1.1547e-05,  1.6490e-05,  4.1990e-05, -6.5550e-05,  2.6872e-06],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8898e-04, -1.0495e-01, -1.3282e-05,  1.6499e-04, -3.0219e-02,
         3.2430e-05, -1.6614e-05,  8.7957e-05, -1.4553e-04, -8.1519e-05,
        -9.7510e-05, -1.1634e-04, -6.5367e-02, -8.7740e-05,  3.3631e-05,
         7.1713e-05,  5.4983e-05, -4.6206e-02, -7.3576e-05,  1.6817e-04,
        -1.9296e-02, -3.5641e-05, -2.5436e-06, -1.0054e-04, -5.0605e-05,
        -9.4241e-05,  2.1054e-05, -2.2210e-04, -1.9190e-02,  1.5065e-05,
         3.9080e-05,  4.2807e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0553e-03,  1.0196e-01,  2.7093e-05, -3.3568e-04,  3.4188e-02,
         1.0990e-04,  2.2696e-05, -2.0429e-04,  7.4034e-05, -4.6690e-05,
         2.0375e-04, -1.5846e-04,  7.8854e-03,  5.8892e-05,  3.1422e-05,
        -7.0098e-05,  1.3883e-05,  1.0723e-01,  4.9840e-05,  2.0487e-05,
         2.8115e-05, -1.0169e-04, -6.9333e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0825e-03, -1.5916e-01, -9.3292e-05,  1.9948e-04, -4.8403e-02,
        -1.7991e-05, -3.9441e-06,  4.5300e-05, -6.9821e-05, -3.7894e-05,
        -1.7400e-04, -1.0634e-05, -1.3942e-02, -4.6737e-05,  5.0680e-06,
        -4.1276e-05, -3.4999e-05, -3.3650e-02, -1.0932e-04,  1.8145e-04,
        -3.4267e-02, -1.1174e-04, -5.8212e-06,  3.4882e-05, -2.3007e-05,
         1.8222e-05, -2.0121e-04, -4.7820e-05, -6.2142e-05, -2.3537e-04,
        -9.8704e-05, -3.0277e-05,  3.4841e-07, -3.1764e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.5387e-04, -3.0558e-05, -2.2188e-04, -9.8255e-06, -1.7359e-04,
        -7.1400e-02, -9.8367e-05,  8.2742e-05,  7.2353e-05, -6.1708e-02,
        -9.0559e-05, -8.2823e-02,  2.3637e-05,  2.7470e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8138e-04,  1.8452e-05,  2.1247e-04,  1.7958e-04, -1.0333e-04,
         1.0949e-01, -3.0367e-05, -1.0174e-04, -6.3044e-05, -1.2701e-04,
        -4.6932e-05,  5.1073e-02,  3.0845e-06,  5.2484e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8168e-04, -9.8046e-05, -2.5347e-04, -2.9617e-05, -7.7275e-05,
        -3.4154e-02, -9.2680e-05, -1.6599e-04,  2.3412e-05, -6.4347e-02,
        -3.1348e-05, -8.4191e-02,  3.9767e-05,  5.8939e-04, -1.9625e-02,
        -1.1150e-04,  8.1632e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.7573e-04, -1.2402e-04, -5.3857e-05, -1.0277e-04, -1.9294e-04,
        -1.6014e-04, -2.7023e-05, -1.6702e-04,  1.5937e-01,  1.1066e-05,
         5.4897e-05, -1.1669e-04, -1.3912e-04,  1.0453e-04,  2.3497e-05,
         2.9454e-06, -1.5239e-04,  1.7527e-02,  1.1045e-04, -6.7583e-06,
         6.4382e-05, -2.6115e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5978e-04, -6.9397e-05, -7.4351e-05,  4.1695e-05, -7.6731e-05,
        -1.7721e-04, -4.4085e-05, -4.0034e-05,  3.2264e-02, -5.2102e-05,
         5.7008e-05, -8.9782e-05,  1.0614e-05, -6.7402e-06,  6.4854e-06,
        -3.0451e-05, -4.5908e-05,  8.6075e-02, -6.9560e-05,  2.3888e-02,
         7.3980e-05,  2.6907e-05, -3.9990e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<SumBackward1>)]
Iter #650: [tensor([ 5.9225e-04, -1.6754e-04, -1.3567e-04, -2.0683e-04, -6.4860e-05,
        -1.2389e-04, -3.0197e-02,  2.6228e-04, -9.7046e-07,  1.0430e-04,
        -8.0389e-02, -2.7923e-04, -1.2469e-01,  3.9788e-05, -4.8287e-04,
        -4.8699e-05, -8.4036e-05,  1.6766e-06, -3.0646e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7994e-04, -6.6299e-02,  4.5469e-05,  3.8034e-05, -5.0468e-02,
         1.0759e-05,  6.8670e-05,  7.8186e-05, -1.5574e-05,  1.3545e-04,
         2.9554e-04, -4.4525e-03,  3.1184e-05,  1.1449e-04, -6.8091e-02,
         1.0995e-04,  7.1410e-05, -2.4602e-05,  8.7370e-05,  3.1500e-05,
         8.1011e-05, -1.6982e-02, -7.7005e-05, -1.3955e-04, -5.7307e-05,
         2.9776e-05,  1.9114e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.6767e-04, -3.5767e-02, -3.1178e-05, -1.8194e-05, -6.0096e-02,
        -3.0269e-05, -3.5935e-05,  2.7130e-05, -5.0441e-06,  5.6419e-05,
         6.2791e-05, -3.3228e-04,  3.5831e-05,  5.8346e-05, -3.2200e-02,
        -8.4695e-05,  6.4616e-05, -1.5659e-05,  1.3148e-05,  2.9950e-05,
         5.9955e-05, -2.3930e-02, -7.7357e-05, -7.0348e-05, -3.2268e-05,
        -4.0316e-02, -1.1746e-05,  2.7269e-05,  2.3482e-05, -1.9525e-05,
        -2.9835e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.5666e-05, -6.3608e-02,  6.6150e-05, -3.0956e-05, -2.1726e-02,
        -6.2797e-05, -3.2253e-06,  8.9101e-08, -8.9166e-05,  6.4541e-05,
         2.4534e-04, -3.3529e-03, -6.7620e-06,  2.8648e-05, -1.2535e-01,
         4.3327e-05,  3.3769e-05, -6.3023e-05,  4.9098e-05, -6.0303e-05,
        -2.8579e-05, -1.2843e-02, -3.1582e-05, -1.1269e-04, -3.5303e-02,
        -7.0845e-05, -6.9371e-05, -1.2571e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.0392e-04, -8.2684e-06, -1.2580e-05,  3.0272e-02,  5.0619e-05,
        -1.4586e-04,  3.1392e-02,  4.8343e-05,  1.9928e-04,  5.2270e-05,
         6.7398e-05,  2.5141e-02, -1.0088e-04,  1.7313e-05,  1.1436e-04,
         1.1816e-04, -2.9068e-05,  1.2101e-05,  8.8927e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.6476e-04,  4.8910e-05,  8.2809e-05,  2.6809e-02, -6.0046e-05,
        -1.1732e-05,  2.2476e-02, -4.1984e-05,  1.6684e-04,  4.2461e-06,
         1.2643e-05,  8.6149e-02,  7.1623e-05,  7.3784e-05,  7.7031e-02,
         3.6170e-05,  1.0803e-04, -6.3304e-05,  9.8127e-05, -7.3166e-05,
         2.4402e-05,  1.5574e-04,  1.3012e-04,  1.0467e-05, -3.5128e-05,
        -3.3627e-05,  8.4028e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3228e-05,  4.2173e-05,  9.4231e-05,  2.6008e-02, -3.9202e-05,
         1.3183e-04,  4.1337e-02, -3.3171e-05,  6.8694e-05, -5.0114e-06,
        -5.1138e-05,  6.7221e-02, -4.3619e-05,  1.6587e-05,  3.0115e-02,
         4.5345e-05,  2.3937e-05,  1.9352e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.7827e-04, -7.2160e-05, -1.7961e-07,  9.1823e-06, -5.3523e-05,
        -2.6004e-04,  2.1407e-02, -7.4648e-05,  3.4032e-05, -2.9707e-06,
        -8.4857e-06,  1.2574e-05, -1.1028e-05,  1.0974e-04,  1.2991e-02,
         3.1530e-06,  1.4516e-06,  1.2266e-02, -6.0943e-06,  2.5085e-04,
         1.5446e-02, -1.0373e-04,  3.0930e-05, -1.7310e-07,  1.1198e-05,
        -2.5957e-06,  3.9618e-02, -1.8056e-05,  2.4734e-04, -4.7703e-05,
         7.2259e-07, -2.7105e-04,  5.2886e-02, -8.6096e-05,  2.0375e-05,
         9.7422e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.8353e-05,  2.9844e-06, -4.3442e-05,  7.2524e-05, -4.9400e-05,
        -2.0003e-04,  2.2982e-02, -5.9718e-05, -3.4945e-05,  9.0357e-05,
        -2.0833e-05, -9.5187e-06, -1.8295e-05,  5.8789e-05,  2.0425e-02,
         4.0963e-06, -2.3442e-05,  1.3122e-02, -8.4834e-07,  3.5282e-04,
         3.3023e-02,  1.1566e-05,  4.3696e-05,  2.7305e-05,  1.1540e-05,
         1.5963e-04, -3.8881e-06, -7.4091e-08, -3.3150e-05,  3.0483e-05,
        -4.2781e-05,  2.2751e-05,  2.8279e-05, -1.1469e-05, -3.8645e-06,
        -6.2650e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0543e-03,  3.1282e-06,  2.0122e-05,  3.9494e-05,  1.0978e-05,
        -1.7008e-05, -3.6804e-05,  1.5099e-02, -2.9464e-05,  5.5495e-06,
         1.1342e-04,  3.6315e-06, -7.8461e-06, -3.5004e-04,  8.4310e-02,
        -2.0146e-05,  2.0210e-05, -1.0668e-04, -9.6987e-06, -1.2363e-05,
        -1.1997e-04,  1.0345e-04, -7.4015e-05, -1.2453e-05, -1.7113e-05,
         2.2084e-05, -6.2071e-05,  5.3354e-05,  4.3312e-02,  3.7434e-06,
        -3.8866e-05,  1.1010e-05,  5.0286e-02,  4.0968e-05,  5.4979e-05,
         1.8981e-05,  2.0294e-05, -2.6899e-05,  5.5766e-02, -4.6377e-05,
         2.0429e-05,  1.2548e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2245e-03,  6.1956e-05,  1.0309e-04,  4.6464e-05, -2.2413e-06,
         5.2520e-05,  2.0469e-05,  1.4514e-02, -9.8678e-07,  5.8697e-05,
         3.8376e-04,  4.1938e-05,  7.3056e-05, -1.5687e-04,  3.4221e-02,
        -4.6955e-05,  3.3242e-05, -1.8102e-05,  5.0756e-05,  2.5826e-05,
        -1.7371e-05, -2.4557e-05,  1.2241e-05,  3.6723e-05,  8.2704e-06,
         7.0455e-05,  9.4518e-05,  1.2593e-05,  9.5168e-02, -1.2282e-05,
         2.4578e-05,  2.1105e-05, -1.0110e-04,  1.6241e-05,  1.6344e-05,
         2.3307e-02,  4.1581e-05, -5.4720e-06,  2.5762e-02,  8.2176e-05,
         3.5335e-05,  3.8556e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7732e-04, -1.9641e-05,  5.0896e-06, -2.3898e-05, -1.3121e-06,
        -5.7563e-05, -1.3421e-05, -1.1115e-02, -3.3561e-05, -6.7025e-05,
        -3.4040e-04,  2.4255e-05, -2.1732e-05,  2.2320e-04, -3.5845e-02,
        -3.7850e-05,  2.5052e-05,  5.8448e-05,  3.9661e-05, -4.2044e-06,
         1.0421e-05,  1.8186e-05,  8.4183e-05, -1.8633e-05,  6.8557e-06,
         3.8074e-05,  8.3636e-06, -3.8733e-05, -6.2925e-02,  1.9490e-05,
        -2.1926e-05, -2.9315e-05, -3.8001e-05, -9.3756e-05,  3.5346e-05,
        -9.1960e-06,  5.6042e-05, -3.1345e-04, -1.0493e-04,  2.5233e-06,
        -3.2676e-05, -1.4751e-02, -4.5878e-06,  5.2485e-05,  3.0699e-05,
        -1.5568e-01,  6.2013e-05,  5.2301e-05, -2.7711e-05], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #700: [tensor([-9.1662e-04, -1.4431e-05,  1.4227e-04, -3.5416e-05, -2.1867e-05,
         9.5904e-05,  4.1556e-02,  8.0573e-05, -2.7461e-06, -4.1142e-05,
        -5.4749e-04,  3.3051e-02, -3.9826e-05,  9.0065e-05,  6.5739e-02,
        -1.0888e-04,  5.1181e-04, -3.1646e-05, -4.9450e-04, -5.4947e-05,
         3.3021e-05, -3.0221e-05, -7.9731e-05,  1.4221e-01,  3.9895e-05,
        -2.2752e-05, -4.6717e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.2502e-04,  1.2649e-02, -4.5097e-05, -9.4461e-05,  6.7867e-02,
        -6.2677e-05,  1.3244e-04,  1.3588e-05,  1.6035e-01,  1.0066e-05,
         7.2829e-05,  4.5842e-05,  9.2581e-05,  1.7626e-05, -3.1249e-05,
         1.5818e-04,  1.0496e-05, -6.4548e-06,  8.6715e-05, -1.3476e-07,
         7.3821e-05, -1.9479e-05, -9.2995e-05, -2.2284e-05,  1.0936e-05,
         4.9331e-06, -4.1081e-05, -2.6780e-05, -2.3259e-05, -6.4668e-05,
         3.3879e-05,  5.1085e-06,  3.8037e-05,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.0549e-04, -1.1603e-02,  6.5588e-05,  2.4863e-05, -2.5793e-02,
        -1.2433e-04, -1.4173e-04, -6.6825e-05, -2.0297e-02,  1.6302e-05,
        -3.3941e-05, -6.9611e-05,  6.6402e-05, -7.4420e-02,  4.7851e-05,
        -4.0605e-05,  2.5753e-05, -1.1086e-01, -7.6566e-05,  1.6028e-05,
         1.7787e-04, -3.0689e-02, -1.4156e-04,  2.3141e-05,  6.6789e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.3855e-04, -1.6113e-02,  8.5509e-06,  6.0838e-05, -5.4005e-03,
        -6.4065e-05,  8.1795e-05,  7.7960e-06, -1.0610e-01, -2.1386e-05,
         1.0116e-05, -3.8341e-06,  3.5284e-05, -6.8667e-03, -6.5809e-05,
        -3.6007e-05, -3.7069e-06, -3.0892e-02, -1.0830e-04, -4.0192e-05,
        -6.5480e-07, -5.2525e-05,  2.9810e-05, -2.6766e-02, -8.6939e-05,
        -1.9669e-05,  4.9599e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3058e-04, -1.1941e-01,  1.0569e-05, -5.5603e-05,  1.9330e-04,
        -3.0412e-02,  3.1550e-05, -2.6248e-05, -1.8086e-05,  3.0847e-05,
         2.3422e-04, -1.4268e-03, -5.8196e-05, -6.6280e-05, -2.1549e-02,
        -3.2001e-05,  1.3036e-05,  1.5810e-05, -3.5212e-02,  8.7282e-06,
         6.2088e-05,  9.9172e-06, -1.5557e-05,  1.6570e-05,  1.1093e-05,
        -3.8460e-03, -4.1847e-05, -5.5491e-05, -1.4820e-05, -9.2493e-03,
         1.6020e-05, -5.1434e-06,  7.5916e-05,  4.8594e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9082e-04, -5.5268e-02, -3.3215e-05, -9.1323e-05,  2.3258e-04,
        -1.0073e-01, -3.0718e-06, -6.0174e-05, -1.0065e-03, -1.5423e-05,
         5.2247e-05, -1.5214e-02, -1.5428e-04, -8.0950e-05, -9.4663e-02,
        -2.4547e-05, -5.6211e-05, -1.5408e-05, -8.1461e-03, -1.8916e-05,
        -2.2295e-05,  1.4479e-05,  8.3636e-06, -5.9643e-05, -2.7448e-05,
        -2.4572e-02, -2.7444e-05, -2.4706e-04, -4.0834e-05, -2.9034e-02,
        -6.0136e-05, -1.8241e-05, -3.0926e-05,  2.5066e-07], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.9243e-04, -9.2348e-02,  1.9516e-06, -1.2299e-05,  6.6693e-05,
        -3.4750e-02,  3.5944e-05, -3.7241e-06, -3.7245e-05, -1.4555e-05,
         1.8576e-04, -2.2250e-03, -7.8333e-05, -4.8757e-05, -3.2306e-02,
         3.7397e-06,  3.0197e-05,  1.2821e-05, -2.0782e-02, -8.6885e-05,
        -1.2384e-06,  2.6645e-05,  2.0125e-05, -6.7708e-06,  9.3009e-06,
        -8.7105e-03, -3.6190e-05,  4.1746e-05,  6.4996e-05,  6.6287e-05,
         8.0045e-06,  3.9866e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4341e-04,  1.3321e-02,  7.2484e-05, -3.1055e-04,  3.8391e-02,
         1.6959e-04, -1.1017e-04, -3.5389e-05, -3.5542e-04, -3.6121e-05,
        -1.0506e-04, -9.3870e-05, -4.1584e-05, -1.3824e-05, -4.7311e-06,
        -4.9713e-05,  1.6695e-06,  1.1181e-01,  7.0890e-05,  2.0026e-04,
         3.6971e-05, -3.1854e-04, -1.0611e-05, -4.8895e-05,  5.3363e-02,
         5.1646e-06,  1.0650e-04, -4.9697e-05,  1.2650e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.9225e-04,  3.7672e-02, -4.2470e-05, -2.9535e-04,  2.7315e-01,
        -7.1298e-05, -1.7502e-04, -1.6708e-04, -3.0594e-04, -1.1208e-04,
         5.8501e-05, -2.3969e-04, -4.0315e-05, -8.3035e-05,  5.3164e-05,
         1.1126e-04,  3.4349e-05, -3.3324e-04,  6.5941e-04, -3.6069e-06,
        -2.3008e-05, -1.2360e-04,  1.9936e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6259e-05,  3.3751e-02,  6.4930e-05, -4.3828e-04,  2.2248e-01,
         6.7452e-05, -9.1963e-06,  1.0630e-04, -4.6313e-04,  7.3123e-05,
         9.2054e-05, -1.1419e-05, -1.2442e-04,  1.0553e-04,  2.1983e-05,
         5.8438e-05,  5.6783e-05,  7.9864e-02, -2.3277e-04,  1.9148e-04,
         1.7482e-04, -2.2216e-05, -6.7864e-05, -1.0287e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1029e-03,  1.8484e-05, -7.3479e-05, -1.3431e-02, -5.8773e-05,
        -4.0524e-05,  8.1809e-06,  4.3479e-05,  7.9725e-05,  6.4124e-05,
         2.6882e-04,  1.2465e-04, -1.1336e-04, -1.7198e-04, -1.8082e-01,
         3.1073e-04, -1.3797e-04, -1.4060e-05, -2.7225e-04, -1.2515e-04,
         5.4624e-05, -9.4321e-05,  7.0213e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.0839e-04,  8.8176e-05,  2.9076e-05,  3.1513e-02, -7.8318e-05,
         5.7670e-05,  1.5242e-04,  5.1653e-05, -1.6238e-04, -1.2013e-06,
         2.1382e-05,  7.6737e-05, -4.9501e-05, -1.4294e-05,  1.3682e-01,
         2.9550e-05, -5.8868e-06,  3.6005e-05,  2.3153e-04,  1.0441e-04,
        -7.3988e-05,  1.1143e-04, -2.3508e-05,  8.5919e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
