Iter #50: [tensor([ 3.9741e-03, -1.0468e-03,  7.8590e-05,  6.5996e-05, -1.2873e-04,
         4.5744e-05, -1.3086e-04, -8.0310e-05,  5.0515e-06,  1.3404e-04,
         7.2329e-05,  6.8180e-05, -8.7211e-05, -1.9515e-04, -5.7293e-05,
         6.1799e-05,  9.7392e-05,  1.0673e-06,  7.1122e-05,  7.7076e-05,
         3.1589e-05, -3.2714e-05,  1.1308e-04,  3.3628e-05, -3.1217e-05,
         1.1308e-04,  3.3665e-05, -4.3093e-05,  1.6850e-05, -1.0305e-04,
         6.0060e-06, -2.3151e-04, -6.5512e-05, -8.2755e-05,  1.9070e-04,
        -4.4248e-05,  5.1578e-05, -2.7963e-05,  7.5654e-05, -1.0306e-04,
         1.1108e-04, -2.4647e-04,  1.6658e-05,  9.9451e-05,  5.8926e-05,
         2.2290e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1050e-03, -2.6978e-03, -2.7545e-04, -7.9758e-05, -1.1579e-04,
         4.7671e-04, -1.9298e-04, -7.8192e-05, -2.4136e-04,  1.0056e-04,
        -4.0421e-04,  1.0149e-04,  3.7392e-04, -2.5573e-04, -1.8898e-04,
        -1.2118e-04, -1.0935e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7049e-04, -8.0909e-04,  2.3920e-05,  4.0250e-05,  4.8812e-05,
         1.0237e-04, -8.6677e-05, -2.9676e-05,  1.0711e-04,  3.7903e-05,
        -8.1759e-05,  2.1733e-05, -3.6750e-05, -2.1134e-05,  1.1315e-04,
         1.1214e-04,  7.4302e-07, -5.8574e-07,  3.3305e-05,  1.0900e-04,
         6.5211e-05,  1.2859e-06,  7.4648e-05, -5.9190e-05, -1.8653e-05,
        -2.0716e-05, -5.1832e-06,  2.0866e-05,  1.0465e-04, -3.9917e-05,
         1.3392e-06,  5.4384e-05, -2.8981e-05, -1.4216e-04, -5.6343e-05,
         1.1858e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.8735e-04, -2.7513e-03, -9.1948e-05, -1.9337e-04, -3.9998e-05,
         3.9492e-04,  7.0637e-06, -6.1839e-05, -7.3487e-05,  1.2155e-04,
        -5.8384e-05,  9.0396e-06,  4.9512e-05, -3.5939e-04, -1.1872e-04,
        -9.8531e-06,  4.6915e-05,  7.9460e-05,  7.9990e-05,  2.6193e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2638e-03, -1.7295e-03, -9.7458e-05, -6.4063e-05,  4.4317e-05,
        -1.2817e-04,  4.4092e-09, -2.1816e-05, -5.6256e-05,  7.1434e-05,
         2.6021e-05, -2.0624e-04,  2.2217e-04, -1.0665e-04,  7.1906e-05,
         8.1199e-05, -9.9008e-05,  8.1814e-05, -2.0663e-04, -1.1358e-04,
        -4.1342e-05, -5.9855e-06, -3.4681e-04,  1.9603e-05,  1.1056e-04,
        -7.9488e-05,  5.2128e-05, -1.0301e-05, -2.3376e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3941e-03, -1.4411e-03, -6.5798e-05, -4.9827e-06, -9.4563e-05,
        -9.3849e-05,  4.7367e-06, -2.9345e-05, -8.9344e-05, -2.5792e-05,
        -1.1025e-04,  4.0655e-05, -1.0195e-04,  7.8724e-05,  1.4602e-04,
         8.6459e-05,  2.4348e-05, -2.5487e-04, -2.7093e-04, -2.8888e-05,
        -5.2025e-06,  2.9662e-04,  4.7241e-05,  1.5123e-04, -3.4532e-05,
        -1.5945e-04, -9.8729e-05, -1.4476e-05, -2.1954e-05, -8.9961e-05,
        -6.7341e-05, -1.4469e-04, -1.2188e-04, -2.5616e-05, -9.5168e-05,
        -4.6470e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6978e-03, -3.0632e-03, -6.6474e-04,  3.3485e-04,  1.2130e-04,
        -6.7221e-05,  2.5249e-04,  9.9704e-05, -1.1996e-04,  9.8697e-06,
        -6.1757e-04, -4.6203e-05,  9.1319e-05,  1.9902e-04,  2.6064e-05,
        -4.2953e-06,  6.6015e-06,  5.0837e-05, -5.3736e-05, -2.1397e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.1122e-03, -1.3766e-03, -1.7419e-04,  9.7719e-05,  1.0821e-04,
        -2.8287e-05,  1.2782e-04,  6.2481e-06, -1.5549e-04, -1.1434e-05,
         6.2109e-05, -6.2466e-05,  5.2225e-06, -2.3474e-05,  6.7190e-05,
         1.3008e-05, -2.1811e-04, -9.6709e-05, -1.1298e-04, -9.1960e-06,
         1.9630e-05,  4.7626e-05, -2.1418e-04,  3.6290e-05,  7.1796e-05,
         1.9364e-05, -8.2730e-05, -1.6165e-05,  1.7746e-05, -1.1610e-04,
        -4.2289e-05, -1.8348e-05, -5.7533e-05, -8.5779e-05,  5.9659e-07,
         1.3769e-04, -6.8749e-05, -5.0249e-05,  5.8740e-05, -7.5978e-05,
        -1.7902e-04,  8.9711e-05, -1.4821e-05, -7.0031e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.5347e-03, -9.5191e-04,  8.0435e-05, -7.5905e-05,  7.5538e-05,
        -2.6889e-05,  1.7713e-05,  6.9891e-05, -1.7927e-05,  5.0119e-05,
        -3.8529e-05,  4.9554e-05, -5.3626e-05, -1.5781e-04,  7.3653e-06,
        -2.5313e-06, -1.2995e-05,  4.4343e-06,  6.7764e-05,  1.7974e-05,
         1.4639e-04,  8.5414e-06,  1.5592e-05, -1.4086e-04,  1.3571e-04,
         1.6185e-04,  3.4466e-05, -1.7916e-04, -5.9632e-05, -1.0604e-04,
        -2.3643e-05, -2.2407e-05,  1.4375e-04, -9.0060e-05, -2.0647e-04,
         1.7527e-06,  1.6023e-04, -1.7486e-04,  6.7803e-05,  2.6810e-06,
         3.6755e-05, -6.4579e-05,  5.5599e-05,  1.3502e-06,  6.9385e-05,
         6.8428e-05,  2.8415e-06,  1.6080e-05,  1.2406e-04,  4.6431e-05,
         5.1915e-05,  2.6515e-05, -4.9453e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0210e-03, -1.7522e-03,  1.3681e-04, -3.4941e-06, -1.1913e-04,
        -3.5101e-05, -5.2883e-05,  1.0491e-04,  1.2376e-04,  3.9567e-05,
         1.0166e-04,  2.0318e-04,  2.6464e-04,  2.3200e-05, -1.1454e-05,
        -1.1309e-04, -2.9221e-05,  6.4663e-05,  8.3954e-05,  1.4359e-04,
         2.2350e-05,  1.6703e-04,  6.9201e-05,  9.4211e-05,  1.4324e-05,
         1.7272e-04,  1.7506e-04,  5.1471e-05, -1.8488e-04, -1.0615e-04,
         6.6631e-06,  6.3327e-05,  3.1044e-05, -3.4031e-05, -1.5518e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.2039e-04, -1.2314e-03,  5.2822e-05, -5.2283e-05,  9.6836e-05,
        -2.9271e-04,  2.7693e-04,  1.2368e-04,  3.5628e-04,  1.1384e-04,
         1.8516e-04, -3.8516e-05,  1.3391e-04,  8.1865e-06, -9.5508e-05,
         1.1851e-04, -5.7250e-05,  3.5341e-05,  1.4770e-04,  1.2650e-04,
         1.5326e-04,  2.0453e-05,  1.1304e-04, -1.7361e-04,  3.2140e-05,
         2.6453e-04, -3.0931e-05, -4.2247e-05,  1.5363e-04, -5.1515e-05,
         2.9072e-05, -1.7157e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.2116e-03, -1.4413e-03, -1.3181e-04,  5.7990e-05, -1.3530e-04,
        -3.8533e-05,  5.9184e-05, -1.6690e-04, -2.3968e-06,  2.9075e-04,
         7.5432e-05, -5.2172e-05,  3.2870e-05, -2.9609e-04,  1.2642e-04,
         9.4055e-05,  1.0513e-04, -1.5280e-04, -2.8911e-05, -3.4709e-05,
         1.6154e-04,  7.7778e-05,  1.0599e-05, -1.8707e-04, -1.1206e-04,
        -5.6866e-05, -1.0051e-04, -8.9152e-05, -7.9347e-05, -6.1764e-05,
        -5.9986e-05,  6.2464e-06, -5.7368e-05, -6.1213e-05, -2.3824e-04,
         4.8665e-05,  4.0691e-06, -2.2495e-05, -4.8801e-05,  4.3199e-05,
        -1.1370e-05,  5.4219e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([-3.2880e-03,  2.5879e-02,  8.7479e-05,  1.4053e-04, -1.8934e-04,
        -6.3943e-05,  2.4379e-05, -6.8787e-05,  8.9269e-05, -6.6584e-05,
        -2.4628e-04, -2.4738e-04,  3.1134e-05, -3.6893e-04, -4.0307e-05,
         3.8914e-05, -1.2446e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5538e-03, -1.9500e-02,  6.7632e-05, -7.5122e-05,  6.6106e-05,
         1.1664e-04,  8.0960e-06,  5.6173e-05,  1.2889e-04,  1.5922e-04,
         8.6962e-05, -6.6906e-05,  1.2765e-05,  6.8379e-05,  2.9363e-04,
         5.7251e-05, -7.1671e-05,  8.3304e-06, -2.3742e-05,  3.6381e-05,
         6.0483e-05,  7.2203e-05,  9.9582e-05,  1.5484e-04,  2.5304e-05,
         9.6178e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9590e-03,  2.7245e-02, -1.7555e-04, -2.0082e-05,  6.5478e-05,
        -4.4664e-04,  2.1149e-04, -4.3130e-04,  8.9052e-05,  1.0666e-04,
         1.2806e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4763e-03,  1.2388e-02,  3.5840e-05, -4.6941e-06,  3.0605e-04,
        -5.1418e-05, -1.0352e-04,  4.5911e-05, -1.2354e-04, -3.1112e-04,
         6.0606e-05, -3.8773e-05, -2.9668e-04,  1.5041e-04,  8.9155e-05,
         4.7253e-05,  1.8628e-06,  5.3162e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0463e-03,  1.4712e-02, -1.8916e-04, -9.8169e-05, -2.6979e-05,
        -1.2451e-04, -3.7488e-05, -8.5147e-05, -1.5155e-05, -5.5454e-05,
         1.2026e-06, -6.7795e-05, -1.0913e-04,  3.9685e-05, -7.9983e-05,
        -4.6805e-05, -6.5629e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4890e-04, -8.4768e-03,  4.0540e-05, -4.6280e-05, -1.0144e-05,
         2.0251e-05, -7.8559e-08,  1.3803e-05, -8.4886e-05, -1.3359e-05,
         2.5663e-06, -3.5486e-05, -5.1508e-05,  1.1674e-04,  1.8538e-05,
         5.9173e-05, -2.9304e-05,  6.7908e-05, -6.5625e-05, -1.4926e-05,
        -4.5051e-05,  3.2773e-05, -4.5700e-05, -1.6019e-05,  1.1078e-05,
        -3.3792e-05, -7.3787e-05, -6.9402e-05, -3.6413e-05, -2.0591e-05,
        -4.1056e-05,  5.7735e-05, -5.9382e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0553e-03, -1.7243e-02,  4.2947e-05,  3.6485e-05,  1.2790e-04,
         9.3922e-05,  3.9748e-04, -3.4926e-06, -4.5199e-05, -9.6936e-05,
         8.9106e-06,  3.2263e-06,  1.5642e-04,  2.1372e-04, -1.1916e-04,
         3.7333e-04, -6.0820e-05,  1.0157e-04,  2.4612e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.6209e-04,  2.4930e-02,  1.3223e-04, -1.0296e-04, -4.6920e-05,
        -2.6736e-04, -2.3285e-04,  2.1495e-05,  9.8071e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1038e-04, -2.2296e-02, -1.3094e-04, -1.8779e-05,  3.2551e-04,
        -2.6614e-04,  3.5485e-04,  5.9596e-05, -2.8698e-05,  4.1184e-04,
        -8.2558e-06,  1.2298e-04, -3.6516e-05, -5.5590e-05,  2.8829e-04,
         3.5051e-04,  5.8814e-05,  7.9187e-05,  4.7602e-05, -6.7630e-05,
         1.1614e-04, -1.5939e-05, -3.6460e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7270e-04, -8.2910e-03,  1.7552e-05, -1.5991e-05,  3.9816e-06,
         1.2580e-06,  1.0530e-05,  8.3181e-05, -2.0374e-05, -1.2904e-05,
         6.1284e-06,  2.6867e-05, -1.3858e-05, -3.7251e-05, -4.8392e-05,
         7.8235e-05,  2.8081e-05, -2.5346e-05, -5.2414e-05, -1.9311e-05,
        -1.0367e-04,  1.4917e-05, -1.1830e-04, -1.6443e-05,  3.6829e-05,
        -4.2593e-05, -2.7326e-05,  4.8901e-05,  8.2187e-06,  4.8745e-05,
         2.6060e-05,  6.7557e-05, -6.5786e-05, -5.9836e-05,  8.6281e-06,
         4.4602e-05,  2.6002e-05, -6.2955e-05,  4.2707e-05,  4.8631e-05,
        -4.8801e-05,  4.7629e-05, -1.2239e-05, -4.8115e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5701e-03, -1.2649e-02,  8.7090e-05, -8.2406e-05,  3.9435e-04,
         9.9831e-05, -4.4447e-05,  4.3319e-05, -6.6787e-05, -6.8953e-05,
         1.1220e-05,  9.7090e-05,  1.7015e-05,  7.3414e-05,  8.8367e-05,
        -1.5299e-05,  9.5740e-06,  7.7140e-05,  2.4355e-04, -9.4566e-06,
        -1.6050e-05,  3.3946e-04, -9.0314e-05, -1.6701e-04,  1.7319e-04,
        -2.8274e-06,  9.4189e-05,  7.2370e-05, -4.1919e-05,  1.0759e-04,
        -1.3394e-05,  1.3063e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7071e-04, -6.3106e-03, -4.0416e-05, -8.8069e-05, -1.7809e-05,
         1.3985e-04, -4.4960e-05, -1.5596e-07, -9.5354e-05,  1.8031e-04,
        -1.3770e-05,  4.9313e-05,  2.2078e-05, -1.7472e-04,  3.3994e-07,
        -2.8600e-06,  6.6659e-05,  5.5162e-05, -5.5056e-05, -8.8350e-06,
         1.3829e-05,  1.3193e-05,  1.7787e-04, -2.6491e-05, -1.0570e-05,
        -1.8504e-05,  3.6583e-07, -5.3236e-05, -1.3838e-04, -5.0873e-05,
        -6.3144e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-2.4423e-04,  5.3926e-02, -5.1877e-05, -1.0186e-04, -1.4112e-04,
        -2.0265e-05,  9.8580e-05, -3.2239e-04, -1.8632e-05, -2.1744e-04,
        -2.7789e-05, -1.0031e-04, -1.6159e-04, -9.9672e-05, -1.8377e-04,
        -2.9022e-04,  8.9465e-05, -1.0376e-04, -1.2538e-05, -1.6822e-04,
        -1.5871e-04, -6.4622e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5138e-05, -3.4658e-02,  2.7292e-05,  1.1014e-05,  8.2229e-06,
         1.6682e-05, -2.0211e-05, -5.8909e-06,  2.4074e-05, -1.1566e-05,
         4.8237e-05,  2.2030e-05,  8.8625e-07,  3.8188e-05,  4.2481e-05,
        -2.9619e-05,  2.0682e-05, -1.8340e-05,  5.0600e-05, -2.2589e-05,
         2.9207e-05, -1.4633e-05, -5.9634e-05,  4.5638e-05, -2.4185e-07,
         3.2643e-05,  1.1694e-05,  2.0344e-05,  7.9793e-05,  7.7875e-05,
        -3.9186e-05,  8.7021e-05, -2.1817e-05,  3.7961e-05,  7.2647e-06,
         2.0076e-05,  3.1965e-05, -5.6654e-06,  9.8432e-06,  8.9575e-06,
         3.0091e-06, -1.5307e-05,  1.2869e-05, -3.3918e-05,  7.1951e-05,
        -4.4889e-05, -3.0415e-05, -2.9393e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.0450e-04,  2.3991e-02,  1.0301e-05, -4.1516e-05, -7.1269e-05,
         9.1025e-05, -2.0405e-05,  6.5644e-05,  5.1469e-05,  1.1153e-04,
         1.8018e-04,  5.8995e-05,  6.2321e-05,  3.0696e-05, -2.3656e-05,
        -5.5113e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9505e-04, -2.8745e-02,  8.8432e-05,  3.2551e-05,  4.3635e-05,
         1.3655e-05,  2.0639e-05,  5.0190e-05,  5.4355e-05, -1.9325e-05,
        -1.4018e-06,  2.4777e-06,  1.0264e-05, -2.1783e-05,  8.6766e-07,
         4.7585e-06, -3.0658e-06,  3.2322e-05,  5.9958e-06,  1.2564e-05,
         5.2573e-05,  1.3558e-05,  6.3986e-05,  7.4160e-06, -2.4602e-06,
         2.4464e-05,  3.3504e-05,  1.2752e-05,  9.7184e-06,  4.0756e-05,
         4.3686e-06,  1.1179e-05,  2.1781e-06,  2.6115e-05,  9.5428e-06,
         1.8619e-05,  1.2225e-05, -1.3732e-05,  2.8197e-05, -1.9538e-05,
         1.4423e-05,  4.0298e-05,  1.2638e-05,  2.8741e-05,  5.7418e-05,
         4.7910e-06,  9.1901e-05, -1.9911e-05,  2.1124e-05,  5.9621e-05,
         1.5142e-05,  6.1309e-06,  3.6793e-05,  3.3303e-06, -5.1926e-06,
         5.1554e-05,  3.0389e-06,  2.4622e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3212e-04, -2.6489e-02,  4.0779e-05,  2.8681e-05, -2.2001e-06,
         1.1185e-05, -1.0506e-05,  4.1484e-05,  4.8630e-05,  1.0103e-05,
         2.5074e-05,  7.3045e-07,  2.1211e-05,  1.5755e-06,  1.0839e-05,
         1.7699e-05,  1.7138e-06,  5.1087e-05, -1.0225e-06,  1.0366e-04,
         1.0148e-05,  7.8427e-05,  5.9647e-06,  4.6545e-05,  2.0595e-05,
         3.7508e-05,  3.6343e-05, -1.2229e-06, -1.6273e-06, -3.4082e-06,
         6.9924e-07, -3.8015e-06,  6.5640e-05, -8.2209e-06, -7.2807e-06,
        -1.0236e-05, -6.6943e-06,  7.0735e-05, -2.4154e-05,  7.7327e-06,
         3.7732e-06,  1.3615e-05, -1.7277e-05,  5.3270e-05,  1.4070e-05,
        -3.9709e-06, -8.2296e-06,  1.7870e-05,  3.4141e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2222e-04,  2.6555e-02, -1.3119e-05, -4.0616e-05, -9.9422e-05,
        -6.1037e-05, -1.6485e-04, -1.1832e-04, -1.9820e-04, -1.4345e-04,
        -5.7569e-05, -2.0435e-04, -6.0262e-05, -6.8506e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.4284e-04, -4.2297e-02,  1.3226e-04,  2.6588e-04,  2.5669e-04,
         2.4036e-04, -1.7628e-05,  1.7952e-04,  9.4043e-05,  1.5618e-04,
         1.1164e-04,  2.2431e-04, -5.0851e-06,  1.7261e-04,  2.1536e-05,
         1.0052e-04,  4.6801e-05,  9.7329e-05,  1.0335e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.4080e-04, -4.1825e-02,  2.2753e-05,  7.4864e-05,  1.7576e-04,
        -4.8142e-07,  2.2101e-05,  1.7457e-05,  1.2027e-05,  5.2015e-05,
         4.2733e-05,  1.4873e-04,  1.8068e-04, -6.4890e-06, -3.0932e-05,
         2.1117e-05,  9.6929e-05,  1.6364e-05,  1.6388e-04, -1.8238e-05,
        -2.6893e-05,  9.0303e-06,  6.6773e-05, -4.2269e-05,  6.0429e-05,
         6.8677e-05,  9.2687e-05,  5.3695e-05,  3.7952e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-0.0008,  0.0310,  0.0002, -0.0003, -0.0002, -0.0002,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4179e-03, -3.9818e-02,  3.7290e-05,  3.8069e-06, -3.1952e-05,
         1.0030e-04,  3.2229e-05, -9.5703e-06, -1.3733e-06,  5.6079e-05,
        -4.0989e-05,  3.0514e-06,  6.4366e-06,  2.1055e-05,  3.5448e-05,
        -1.1535e-05, -5.3718e-05,  4.6391e-05,  5.6391e-05,  3.8072e-05,
         2.8580e-05, -4.1191e-05, -1.4991e-05, -2.5317e-05, -1.6192e-05,
         9.6894e-05,  3.8429e-05,  5.0100e-05,  6.7867e-05,  1.9553e-05,
         9.9835e-06,  5.3120e-05,  4.6422e-06, -3.4314e-06,  9.3232e-07,
        -8.7854e-07, -3.1015e-05,  3.8082e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5163e-05,  6.0000e-02,  1.1626e-05, -8.3209e-05, -1.7728e-04,
        -2.1312e-05, -5.5625e-05, -1.3850e-05, -7.6419e-05, -7.5903e-05,
        -4.5637e-05,  4.1634e-05, -1.9280e-04, -6.8516e-05, -1.1576e-04,
        -2.2949e-05,  1.7703e-05, -1.0511e-04, -1.5850e-04, -4.5938e-05,
        -3.1984e-05, -2.9750e-05, -3.6501e-05, -5.6724e-05, -1.9455e-04,
        -1.2528e-04, -2.5553e-04, -7.5508e-05, -8.6946e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4982e-04,  3.5279e-02,  2.8157e-05, -6.5248e-06,  8.4054e-05,
        -3.5088e-05, -1.6593e-05, -2.7264e-05, -3.1024e-05,  6.4777e-05,
        -2.5317e-05,  1.0358e-04, -2.6992e-05,  7.3034e-05, -1.6282e-04,
         3.4114e-05, -3.0119e-05, -1.9339e-04, -1.0706e-05, -8.4154e-05,
        -1.8018e-04,  7.2866e-05, -5.4115e-06, -2.6516e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([ 3.1401e-04, -4.7787e-02, -3.2539e-05, -8.4713e-06,  1.8074e-05,
        -4.5743e-06, -3.4454e-06,  7.4631e-06, -4.2223e-05, -6.8483e-06,
        -5.1871e-05,  2.5516e-05, -4.8778e-06, -7.9616e-05, -7.7956e-05,
        -1.8249e-05, -7.6982e-05,  6.5215e-05,  1.0566e-05, -3.0791e-05,
        -1.7225e-05,  3.5397e-06,  3.2953e-05, -2.1123e-06,  3.8429e-05,
        -3.7042e-05,  3.3504e-05, -1.5357e-05,  1.5050e-06, -5.7885e-05,
         2.5672e-05,  2.6146e-05,  6.3387e-05,  1.3404e-05, -6.3976e-05,
        -2.9164e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4226e-03,  3.6672e-02,  1.2218e-05, -2.3324e-04,  5.9369e-05,
         4.7743e-05, -2.8117e-05,  2.7572e-05,  8.5979e-07,  2.8369e-05,
         6.6428e-05, -8.9732e-05,  6.9188e-05, -8.7693e-06,  3.4106e-06,
        -2.1104e-06,  6.5394e-05, -2.6778e-05,  4.3671e-05,  1.3694e-05,
        -3.1825e-05,  4.8262e-05,  1.4535e-04, -5.7696e-05, -1.2606e-05,
         7.5082e-05,  8.8264e-05,  1.2286e-04,  8.3673e-05, -2.8306e-05,
         1.6604e-05, -1.2917e-04,  3.6720e-05,  3.0279e-05,  8.5949e-06,
         8.3013e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7394e-04,  3.6370e-02, -1.6022e-04, -4.9054e-05,  8.4408e-05,
        -4.3611e-05,  4.6612e-05,  2.1345e-05,  3.9055e-05, -1.1523e-04,
        -3.7992e-05,  1.5311e-06,  5.8199e-05, -5.1600e-05, -4.8194e-06,
        -1.3977e-04, -3.1402e-05,  1.6025e-05, -3.1464e-05,  3.2164e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5540e-04,  3.8252e-02,  9.0530e-06, -2.7435e-05, -9.4744e-05,
         8.2639e-05,  1.7753e-05,  3.9758e-05,  6.3934e-05, -4.0540e-05,
         6.3373e-05, -4.9492e-05, -4.2492e-05,  1.3158e-05, -6.0199e-06,
        -3.7553e-05, -2.4127e-05, -6.1931e-05,  3.4171e-05,  1.6480e-05,
         1.2117e-05,  2.3639e-06, -5.4178e-05, -4.4304e-05,  1.4040e-05,
        -2.8128e-05, -1.1862e-05, -1.6956e-05, -7.7355e-06,  6.9084e-06,
        -4.1164e-05,  2.0418e-05,  2.2939e-05,  2.0960e-05,  3.5447e-06,
        -2.2538e-05, -1.6583e-05, -4.5616e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0576e-04,  5.7102e-02, -3.4827e-05,  5.8678e-05, -4.4874e-05,
        -1.2832e-06, -3.5250e-06, -1.7664e-06, -5.2619e-06, -5.4700e-05,
        -2.8484e-05, -3.8030e-05, -1.5359e-05,  8.0191e-06, -8.1117e-05,
         4.0570e-05, -9.2828e-05, -7.5685e-05, -8.0057e-06, -1.8539e-04,
        -2.8873e-05, -6.3848e-05, -7.3692e-05, -7.1299e-06, -2.8681e-05,
         2.6610e-05, -3.0465e-05,  5.8332e-05,  4.4602e-06, -6.0782e-06,
        -5.0485e-05, -7.1750e-05, -3.6518e-05, -4.2366e-05, -6.6676e-05,
         7.7248e-06,  3.3153e-05,  2.6874e-05, -3.6749e-05,  4.0839e-06,
         2.1173e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6216e-04, -8.1175e-02,  8.1002e-05,  1.9879e-04,  1.4856e-04,
         1.4232e-04,  2.1934e-04,  3.5781e-05,  8.1698e-05,  1.7294e-04,
         1.7063e-04,  1.9959e-04,  9.9381e-05, -5.2733e-05,  9.0688e-05,
        -9.9592e-05,  1.0734e-04,  1.1635e-04,  1.2745e-04,  2.8814e-04,
         6.5243e-05, -3.0510e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.6152e-04,  4.9330e-02, -1.2946e-04,  2.4941e-05, -1.6561e-04,
         8.3640e-05,  5.1377e-05, -1.7872e-04, -1.5040e-05, -1.0953e-04,
        -5.6752e-05, -3.4150e-05, -8.7473e-06,  1.3072e-04, -1.1485e-04,
        -3.4133e-05, -5.5354e-05,  7.0588e-05, -2.3566e-05,  7.6337e-06,
        -2.6612e-05, -1.2868e-05, -7.6338e-07, -8.6195e-05,  2.5410e-05,
        -3.7583e-05, -2.3822e-06,  3.6358e-06,  1.5863e-05,  5.6752e-05,
         2.6389e-05, -3.2055e-05, -7.2130e-05, -1.2360e-04, -9.4494e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2447e-04, -3.6326e-02,  4.2813e-06,  5.5815e-06,  4.2511e-05,
        -4.1016e-05, -3.9021e-05,  1.4344e-05,  3.1460e-05,  7.2157e-05,
        -1.0314e-05,  5.1647e-06,  2.9953e-05, -2.8522e-05, -4.2639e-05,
         7.5584e-05,  6.1554e-05, -3.0016e-05,  4.5158e-05, -5.3126e-05,
         4.7812e-05, -4.0357e-05,  6.3516e-05,  6.4477e-05,  6.3870e-05,
         9.9651e-05,  3.7387e-05,  6.6088e-06, -5.1406e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.6156e-04, -5.4976e-02,  9.3817e-05,  2.7755e-05,  8.7884e-05,
        -1.3844e-05, -6.9646e-05,  7.4001e-06,  1.2123e-04,  1.1170e-04,
         3.8275e-05,  7.0835e-05, -6.0519e-07,  4.6960e-05,  1.3086e-04,
        -3.6747e-05,  1.1114e-04,  4.5229e-05,  3.1813e-05,  4.8719e-05,
        -9.6784e-06,  1.1797e-05,  6.4291e-05, -9.4253e-05, -3.0643e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1387e-04,  4.5129e-02, -4.9703e-05, -2.9673e-05, -4.3010e-05,
        -9.7353e-05, -2.9768e-05, -3.1348e-05,  1.6251e-05, -1.5528e-05,
        -8.2351e-05, -3.0007e-05, -2.1899e-04, -2.2987e-04, -6.2605e-05,
        -6.5765e-05, -5.1298e-05, -3.7088e-06, -1.2932e-04, -3.3325e-05,
         4.3587e-06, -2.7618e-05, -1.5297e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9029e-04, -4.9666e-02, -1.3163e-05, -2.7280e-05,  1.7606e-05,
         3.3127e-05,  2.4583e-05, -1.0444e-05,  1.5703e-05,  6.5521e-05,
        -5.0156e-05,  1.2140e-05, -2.1476e-05,  2.9066e-05,  7.8514e-05,
         6.6953e-05,  3.5124e-05, -6.1735e-06, -1.0522e-05, -4.4739e-05,
        -7.2641e-06,  3.3543e-05,  5.6708e-06,  2.7050e-05,  4.4399e-05,
         3.0660e-05,  9.9818e-06,  2.8289e-05,  4.3764e-05,  3.6636e-05,
         1.5403e-05,  5.1178e-05,  7.7530e-06, -1.1879e-05,  1.3219e-05,
        -1.5034e-05, -6.7312e-05, -1.4454e-06, -1.1766e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8731e-04,  2.9572e-02,  8.4879e-05,  7.3739e-06,  4.3475e-05,
         8.5733e-05,  4.6559e-05,  8.0787e-05,  1.0899e-04, -2.4973e-06,
         1.6477e-05,  1.9173e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-1.2263e-03, -8.1634e-02,  2.0761e-05, -2.1572e-05, -1.5934e-05,
         5.3057e-05,  4.6607e-05,  3.6285e-05, -2.4585e-05, -3.5299e-06,
         2.4297e-05, -9.0368e-05, -1.0575e-05, -4.7153e-05, -7.1989e-05,
         3.6635e-05,  5.0920e-06,  1.0898e-04,  9.8160e-05,  2.2188e-05,
         3.7796e-05,  5.2137e-05,  2.0522e-05,  6.6743e-05,  2.0180e-05,
        -7.1268e-05,  9.7629e-05,  6.5577e-05,  3.1495e-05, -1.3119e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.6389e-04,  5.6296e-02,  7.6733e-05, -3.7407e-05, -2.0397e-05,
         1.6381e-05, -2.9746e-05,  7.8075e-05, -5.4411e-05, -1.0965e-04,
        -3.9705e-06, -7.7978e-06,  2.2774e-05, -4.1264e-06,  1.4150e-05,
        -4.2859e-06,  2.4291e-05, -1.7844e-05, -3.7948e-05, -3.1488e-05,
         2.0529e-05, -1.3723e-05, -6.9735e-05, -2.8560e-05, -9.1036e-05,
         2.6704e-07, -1.1324e-05, -4.5541e-05, -4.3484e-05, -3.7906e-07,
         2.2579e-05, -6.3344e-05,  2.7714e-05, -4.7459e-05,  1.5337e-05,
        -4.8216e-05, -4.3550e-05,  1.9992e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4098e-04,  4.2166e-02,  2.4554e-05,  1.4931e-05,  2.5301e-05,
         2.3867e-05,  3.7142e-05,  4.8960e-05,  6.4011e-05,  6.8993e-05,
        -2.9201e-06, -2.4842e-06, -1.4606e-05,  6.0937e-05,  3.4676e-06,
         1.2988e-04,  6.3089e-05,  6.6927e-05,  1.5928e-04, -4.2083e-05,
         2.9391e-05,  7.5592e-05,  1.1684e-04,  4.1232e-05,  1.8325e-05,
         1.1214e-04,  4.1653e-05,  1.5026e-05,  6.2739e-05, -1.7100e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.1037e-05,  5.1916e-02, -2.8946e-05, -1.4650e-04,  3.9319e-05,
         2.5757e-05, -3.2447e-05, -6.3695e-05, -1.3845e-04, -6.4005e-05,
        -3.6935e-05,  3.0848e-05,  3.7811e-05, -2.0393e-06, -6.7487e-05,
         1.7013e-07,  5.8476e-05,  4.9432e-05,  4.3600e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3825e-03,  7.0956e-02,  6.3210e-05, -1.6862e-05, -6.1928e-05,
        -1.0273e-04,  6.6348e-05,  2.4234e-05, -1.2056e-04,  1.1949e-05,
         4.4893e-05,  6.2959e-05, -9.7159e-05, -1.0167e-04, -2.6703e-04,
         2.6753e-05, -5.4865e-05,  1.4467e-05,  7.5764e-05,  7.7861e-05,
         4.7214e-05, -4.0966e-05, -8.5002e-05, -1.0357e-04, -7.7111e-05,
         1.0334e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4224e-04, -5.8159e-02,  2.5397e-05,  1.8346e-05,  1.8290e-04,
        -1.9808e-05,  6.9656e-05,  1.1478e-04,  2.0641e-05,  6.5598e-05,
         1.7654e-06,  5.0843e-05,  3.1601e-05,  7.2905e-06,  4.7078e-05,
        -2.3487e-05, -4.2248e-05,  8.7234e-05,  1.5877e-04,  6.7867e-05,
         1.5910e-04, -4.2136e-05, -6.6917e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1973e-05,  4.8113e-02, -6.4202e-05,  5.9526e-05,  8.5773e-05,
        -1.8357e-04, -1.4790e-05,  8.7369e-05, -9.1545e-05, -5.5755e-05,
        -1.1653e-04, -6.3467e-05, -1.5556e-04, -1.5292e-04, -8.7436e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.7324e-04, -5.1120e-02, -6.9567e-05, -1.0370e-05,  2.5317e-05,
         1.6471e-05, -4.3319e-05, -2.0616e-06,  2.1397e-06, -7.2736e-05,
         1.4345e-04, -1.2761e-04,  1.2234e-05, -1.0471e-04, -2.3940e-04,
        -1.1683e-04,  4.9671e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5176e-06,  7.3059e-02, -8.3629e-06,  4.9284e-05, -4.2959e-06,
         1.4255e-05,  7.5279e-05, -7.5482e-05, -1.2619e-04,  7.7963e-05,
        -1.4485e-04, -1.0054e-04, -8.0224e-05,  6.5426e-05,  6.1157e-05,
        -4.6263e-05, -8.9613e-06,  5.5325e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3759e-04,  8.3351e-02, -8.9874e-05, -9.2524e-06, -3.6679e-05,
         1.2579e-05,  8.3964e-05,  1.9640e-05,  2.8333e-05,  6.4775e-05,
         4.6079e-05, -5.8026e-05, -4.3429e-05, -3.1002e-05, -5.3023e-05,
         4.0076e-05, -6.4779e-05, -5.5543e-06, -7.4198e-05,  4.5744e-06,
         1.1800e-05, -2.8052e-05, -3.4818e-06,  1.0788e-05, -1.9765e-05,
         3.5427e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8742e-05, -5.1763e-02,  2.8142e-05,  4.0764e-05,  1.4251e-05,
        -9.2208e-06,  1.0691e-06,  1.7615e-05, -2.9055e-05,  4.8394e-05,
        -5.3916e-05,  1.5452e-05,  4.1286e-05, -5.7485e-05,  4.9089e-06,
        -6.6704e-06,  2.0838e-05,  4.3088e-05,  6.5717e-06, -6.1453e-05,
         1.6329e-05,  2.4449e-05, -5.3637e-06,  2.5959e-05,  1.5299e-05,
        -1.5900e-05,  2.2423e-06,  2.9831e-05, -9.4895e-05,  2.3065e-05,
        -2.2943e-05,  5.5979e-05, -4.6731e-05,  1.0590e-05,  3.3540e-05,
         3.8394e-05,  1.7448e-05,  8.8755e-07, -3.7518e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9697e-04, -6.2462e-02, -5.0316e-05, -3.2658e-06, -1.7568e-05,
        -6.1455e-05, -5.3041e-05,  2.6429e-05, -1.3489e-05, -1.8078e-05,
        -1.5011e-05, -4.1930e-06, -2.0852e-05, -1.7261e-05,  7.6311e-05,
         1.3914e-05, -1.3409e-05, -1.9281e-05, -4.2233e-05, -3.5732e-05,
         2.2838e-05,  4.5811e-06,  1.2850e-05,  9.5320e-06,  4.3823e-05,
         4.8298e-06,  1.4219e-05,  2.5395e-05, -2.1227e-05, -2.3848e-06,
        -3.2573e-05,  3.3209e-05, -1.0459e-05, -2.0392e-06, -2.7535e-05,
         7.2137e-06,  1.2155e-05, -6.4940e-05,  1.4501e-05, -6.5221e-05,
        -6.1174e-05,  2.2520e-05,  1.6966e-05, -3.6903e-05, -3.1538e-06,
         1.9287e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([-6.9317e-04, -5.0124e-02, -5.1463e-05, -4.6628e-05, -2.3667e-05,
        -6.1837e-05,  4.9469e-05,  4.7198e-05, -9.1739e-07, -1.1411e-05,
        -2.3848e-05, -1.0725e-06, -4.1905e-05, -3.6814e-05, -6.1081e-05,
        -2.2232e-05, -1.1964e-05,  2.9126e-05, -4.7882e-05, -6.5447e-06,
        -4.3559e-05,  5.4212e-05, -9.8282e-06, -5.3638e-05, -3.9075e-05,
        -9.2302e-06,  1.9398e-06,  1.1559e-05,  3.6393e-05,  3.4389e-05,
        -4.2068e-05, -3.9386e-05, -2.4877e-05, -1.7877e-05, -1.8852e-05,
         2.3180e-05, -2.8767e-05, -1.4135e-06,  1.5091e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4101e-04, -3.4306e-02,  1.7267e-05,  9.5225e-06,  6.4857e-05,
         1.9524e-05,  1.6068e-05,  1.4923e-05,  1.5814e-05,  1.2547e-04,
        -5.9497e-05, -2.0026e-06,  2.0442e-05, -6.9445e-06,  1.6421e-05,
         1.8687e-05, -1.3320e-06, -2.5895e-05,  3.1241e-05, -4.5448e-07,
         1.9828e-06,  3.0144e-05, -3.1071e-07,  7.0181e-05,  8.7718e-06,
         6.0193e-06,  9.3731e-07, -5.2926e-05,  2.7907e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5427e-04, -4.4701e-02,  9.9388e-07,  8.2359e-05,  7.2709e-05,
        -1.6306e-04, -2.5639e-05, -6.3281e-05, -8.7309e-05, -8.6724e-06,
         4.1853e-05,  1.0293e-05, -3.2381e-05,  7.9872e-05, -2.2707e-07,
         3.1482e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.7087e-04, -9.3419e-02,  1.4542e-04,  2.4833e-05,  4.0121e-05,
         6.0052e-05,  8.4657e-05,  3.4678e-05,  1.0038e-04, -1.3303e-04,
        -6.4827e-05,  8.9869e-05, -2.3230e-05, -1.0573e-04, -1.5530e-04,
         5.7857e-05,  5.3998e-05,  7.1316e-05,  1.5795e-04,  4.6746e-05,
         5.9842e-05, -1.1488e-04, -3.1838e-05, -3.1123e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3136e-04,  4.4632e-02,  1.2411e-04, -1.3768e-05,  4.3458e-05,
         6.9362e-05,  2.7333e-05,  7.7136e-05,  1.3132e-04,  5.3837e-05,
        -2.6019e-05,  1.9604e-05,  1.1245e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5878e-04,  3.3938e-02, -4.9668e-05,  2.5551e-05,  2.2881e-05,
         5.7744e-05, -2.1360e-05, -3.2620e-06, -2.1077e-05,  2.2652e-05,
         3.9482e-05,  8.1127e-07, -2.7703e-05,  2.6409e-05,  1.8829e-05,
         4.6598e-05, -2.4317e-06,  3.4795e-05, -4.8243e-06, -3.5499e-05,
         7.0736e-06, -5.9467e-05, -1.2168e-05,  2.0993e-05,  2.6578e-05,
        -4.2280e-06, -1.8505e-05,  4.6667e-06,  6.9917e-06,  3.4662e-06,
         6.7862e-06,  1.8480e-05,  6.0618e-05,  1.0123e-05,  4.2187e-05,
         1.5226e-05,  1.5822e-05,  5.5681e-05,  1.0092e-05,  5.8990e-06,
        -8.2223e-06,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1640e-04, -8.4967e-02,  3.1780e-06, -4.7864e-05,  6.2042e-06,
        -5.8476e-06,  3.4346e-06, -1.5750e-05, -3.4778e-05,  5.6579e-05,
        -2.8231e-05,  3.1112e-06, -4.2030e-05,  6.7674e-06,  3.9885e-05,
        -1.4922e-05, -6.2996e-05, -2.0541e-05, -2.5032e-05,  1.1765e-05,
        -4.3685e-06, -5.4302e-06, -6.1189e-05,  2.4313e-08,  1.1295e-05,
        -1.9927e-05, -5.5674e-05,  3.1748e-05,  4.6443e-05, -1.7328e-05,
         1.0954e-05,  1.7328e-05, -1.8175e-05,  3.5381e-05, -1.0194e-04,
        -9.6607e-06, -5.6605e-05, -1.2141e-05, -4.0658e-06, -6.9755e-06,
         7.7990e-07, -4.9159e-06,  1.3913e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9324e-04, -7.8769e-02, -4.2015e-05,  6.0185e-06,  1.1597e-04,
        -5.7424e-05, -1.8800e-06,  5.0537e-06,  1.0365e-04,  1.0030e-04,
         7.3573e-05,  1.4914e-04,  5.7339e-05,  3.8763e-05,  1.3000e-04,
         8.1079e-05,  1.6965e-04,  7.5340e-05, -2.1472e-05,  6.6937e-05,
         2.7546e-05, -3.9701e-05,  2.7827e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.5194e-04,  1.4740e-02,  2.7359e-05,  7.3358e-06,  3.6747e-05,
         1.0146e-04,  8.9286e-05, -6.4592e-05,  2.6409e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3824e-05,  5.9378e-02, -3.5172e-06, -7.4794e-05, -6.3384e-05,
         8.2493e-06, -1.0147e-05,  4.4961e-05, -1.9361e-05, -1.1170e-04,
        -6.7253e-05, -5.4430e-05, -5.6804e-05, -1.9916e-05, -2.2984e-05,
        -2.7184e-05,  8.2879e-07,  1.1886e-05,  6.4311e-05,  1.1683e-05,
        -2.2291e-05, -9.3905e-06, -2.9384e-05, -3.2715e-05, -3.8221e-05,
        -9.9158e-06,  1.9876e-05, -3.5618e-05, -9.5909e-05, -4.5826e-05,
         2.3661e-06, -1.2317e-04,  6.5571e-05,  6.5885e-06, -2.5754e-05,
        -8.3423e-06, -2.6063e-05, -7.1593e-05, -9.1838e-06,  9.7242e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0747e-05,  9.1117e-02, -8.8648e-05,  3.1644e-05, -9.0227e-05,
        -2.8158e-04, -1.9231e-04,  1.3103e-05, -2.0660e-04,  6.2118e-05,
        -1.5494e-05,  6.3265e-05, -1.0265e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9741e-04, -9.2059e-02,  1.0878e-04,  3.8168e-04,  5.4321e-04,
         3.2230e-04, -9.6767e-05,  1.5077e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #350: [tensor([ 0.0001,  0.0523, -0.0001, -0.0003, -0.0003, -0.0005,  0.0001,  0.0003,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3057e-04, -3.0748e-02,  5.0096e-05,  2.8571e-05,  1.8063e-05,
         7.8274e-06, -1.0478e-05, -1.1753e-05, -1.6779e-05, -2.0023e-05,
        -1.9771e-05,  2.6836e-05, -1.2560e-05,  2.1319e-05,  3.1537e-05,
         4.2466e-06, -4.9714e-07,  3.7999e-05, -2.0147e-05,  2.8218e-05,
         1.4612e-05,  1.5302e-05, -1.8441e-05, -1.4727e-05, -4.4534e-06,
        -9.4160e-06,  2.5316e-05,  3.4811e-05,  1.5868e-05,  3.5904e-05,
        -1.3311e-06,  7.9652e-06,  2.6184e-06, -5.3584e-08, -2.2405e-05,
         1.8343e-06, -1.0840e-05, -1.7772e-05,  6.3626e-06, -8.6139e-06,
         1.8512e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.8464e-04, -3.5418e-02,  8.8380e-06,  1.8672e-05, -8.0306e-07,
        -1.0238e-05,  1.8648e-05, -3.9028e-06,  1.2438e-05,  1.0344e-05,
        -1.9751e-05, -2.1360e-05, -6.7510e-06, -1.8465e-05,  8.4778e-06,
        -1.0881e-05, -6.6304e-05,  1.5956e-05,  1.4276e-05,  1.7249e-05,
         5.7606e-06, -8.4117e-06,  1.0668e-05,  2.3282e-05,  3.6120e-05,
        -2.2952e-05, -2.2558e-05,  3.4137e-05, -7.4606e-06, -3.0066e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4531e-05,  5.2136e-02, -6.2081e-05, -1.4361e-04,  3.6729e-05,
        -2.1854e-05, -3.3133e-05, -1.3723e-05, -2.9971e-05, -4.3666e-05,
        -6.8755e-05, -8.9022e-06,  2.6804e-05, -3.7751e-05, -9.7245e-07,
        -7.3708e-05, -9.8361e-05,  4.9879e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.0652e-04, -5.1767e-02,  2.9316e-05, -4.6645e-06, -5.4917e-06,
        -3.5061e-06, -4.5809e-06,  2.6803e-05, -1.0294e-05,  5.5888e-07,
         1.1814e-05, -2.6630e-06,  3.4076e-06,  3.3064e-05,  6.6369e-05,
        -7.5112e-06, -6.7092e-06, -6.8390e-06,  1.7651e-05,  7.5501e-06,
        -1.0571e-05,  2.5361e-05,  3.4848e-06,  7.1997e-05,  5.2195e-06,
         7.5005e-06,  8.9617e-07,  1.6506e-05, -1.1468e-06,  1.0647e-05,
         1.9619e-05,  3.5230e-05,  2.9212e-05,  7.8559e-07, -2.2840e-05,
         2.9212e-05,  4.7790e-06,  2.5852e-05,  3.0874e-05,  2.3251e-05,
        -2.7269e-05,  3.1663e-05, -8.7142e-06,  2.9981e-05,  2.5412e-05,
        -1.8615e-05, -2.7851e-05,  2.0364e-05,  1.8253e-05,  2.1510e-05,
         1.8017e-05,  2.6921e-05,  4.4109e-05, -2.8293e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5585e-04,  3.6646e-02, -5.4112e-05,  1.7043e-05, -6.1882e-05,
        -5.9091e-06,  2.7408e-04, -4.8866e-05,  2.4306e-05, -4.1192e-05,
        -2.3384e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.2290e-05, -9.9043e-02,  6.5780e-05,  1.1992e-05, -1.4128e-05,
         6.4369e-05,  7.0754e-05, -1.9863e-06,  3.6657e-05,  7.8751e-05,
         4.9279e-05, -1.6428e-05, -3.1659e-05,  2.4429e-05,  1.2783e-04,
        -5.3665e-05,  9.0958e-06, -4.9428e-06,  5.9524e-05,  7.4585e-05,
        -5.0702e-05, -1.3141e-04,  1.9897e-05,  1.1474e-05,  2.0500e-05,
        -4.4877e-06,  5.3613e-05, -8.0532e-06,  1.4486e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9734e-04,  6.7503e-02,  7.1068e-05,  1.0426e-04, -1.0328e-04,
         9.3232e-05, -4.1565e-05,  3.8981e-05, -1.2324e-06, -9.9891e-05,
        -4.1313e-05,  1.9473e-04,  6.5431e-05, -6.8494e-05,  1.1216e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2928e-04,  3.8148e-02,  6.6917e-05, -3.2102e-06,  1.7537e-06,
         3.0589e-05,  2.6369e-05,  2.6234e-05,  7.1813e-05,  6.2973e-05,
        -1.6607e-05,  5.3087e-05, -2.8319e-06,  6.4262e-05,  2.9786e-05,
         6.3745e-05, -3.3162e-05,  5.0305e-05,  1.2582e-04,  2.1656e-05,
         5.5070e-05,  2.2079e-05,  3.9891e-05,  6.9048e-05,  4.6551e-05,
         3.7150e-05,  8.2063e-05,  4.1006e-05, -5.6494e-06, -2.0204e-05,
         4.8986e-05,  3.9363e-05, -1.4111e-05, -3.3842e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.6918e-04, -6.8099e-02, -1.0100e-04, -1.8615e-05, -2.6053e-05,
        -4.4830e-05,  5.2260e-05, -1.1750e-05, -1.4556e-05, -8.5093e-05,
         1.9498e-05, -3.6055e-05, -5.0094e-05, -2.3006e-05,  7.6414e-06,
        -9.7949e-06, -2.3682e-05, -5.4173e-05, -6.8681e-05,  5.6423e-05,
         1.8843e-05,  2.5609e-05,  1.8776e-05, -3.9137e-05,  2.1078e-05,
         1.0194e-05,  2.9010e-05, -3.7139e-05, -1.3921e-05,  2.4590e-05,
         8.0186e-05, -5.3063e-05, -5.0454e-05,  1.5895e-05, -9.3085e-05,
        -3.8081e-05, -4.6620e-05, -5.3888e-05, -3.0696e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4780e-04,  4.4238e-02, -6.1832e-07,  8.6232e-05,  7.9352e-05,
         6.2141e-05,  6.2476e-06,  1.0280e-04,  1.0538e-05,  4.4078e-05,
         1.0567e-05, -8.4914e-05,  8.7877e-06,  2.5020e-05,  2.2121e-05,
         7.9041e-05,  1.1397e-04,  6.9099e-06,  1.4091e-04,  2.0744e-05,
         6.4004e-05,  2.9692e-05,  3.2787e-05, -5.3785e-06,  2.8042e-05,
         2.7823e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5294e-04, -6.4941e-02, -1.2081e-04,  7.7913e-05,  1.3360e-04,
        -8.3327e-05, -1.1607e-05, -4.9461e-06, -7.9047e-05, -3.5008e-05,
         5.9462e-05, -5.6140e-05,  4.2438e-05, -1.2349e-04, -3.0887e-05,
         9.9644e-06, -6.1971e-05, -9.3687e-05, -1.5342e-04, -2.3643e-05,
        -1.3982e-05, -5.6760e-05, -2.0130e-04,  2.9612e-05,  4.1600e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 2.2686e-04,  9.2621e-02, -1.1402e-04,  7.5012e-06, -1.3143e-04,
         2.3688e-04, -9.0341e-06, -1.5478e-04,  2.6199e-05, -4.3571e-07,
        -4.8066e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4047e-05, -7.9665e-02, -3.2454e-05, -1.5974e-05, -3.0183e-05,
        -3.9265e-05, -2.5724e-05,  5.6703e-06, -2.9056e-06, -2.8467e-05,
         3.3380e-05, -1.9615e-05, -2.6341e-06,  1.5549e-05, -4.0680e-05,
        -7.3032e-06, -5.1168e-05,  5.4952e-05, -6.2068e-05, -3.7013e-06,
        -1.5138e-05,  5.7298e-06, -1.6938e-05, -2.4116e-05,  1.3273e-05,
        -5.4211e-06, -3.1904e-05, -1.4898e-05, -3.1918e-05, -3.2764e-05,
         9.6845e-06, -4.3502e-05,  5.6583e-06, -3.5156e-06,  9.8457e-06,
        -1.2004e-05, -8.6999e-06, -1.4718e-05,  1.1147e-05, -1.4926e-05,
        -9.7430e-07, -3.9369e-05, -1.2159e-05, -8.5672e-06, -2.2682e-06,
         3.4578e-05, -5.0813e-05, -1.5262e-05, -8.6454e-06, -1.3605e-05,
         4.7262e-05, -1.6401e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1769e-04, -6.4191e-02, -6.3072e-05, -2.9514e-06,  2.4338e-05,
        -7.6997e-06, -1.0119e-05, -7.0627e-05, -8.5300e-05,  7.3642e-06,
        -1.4695e-05, -4.0639e-05,  1.6273e-06,  7.9365e-05, -7.4752e-05,
         1.1333e-04, -2.0830e-05, -1.2251e-05,  9.6808e-05, -4.0539e-05,
        -1.9792e-05,  3.3089e-05, -8.8057e-05, -1.0454e-04,  1.7735e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5957e-05,  5.8483e-02,  1.0286e-06,  3.8473e-05, -4.0576e-05,
         3.5683e-05, -5.9393e-05, -3.0615e-05,  9.6400e-05, -1.0579e-04,
         2.7841e-05,  6.5792e-05,  4.8171e-05, -2.2809e-05,  1.2816e-05,
         8.3445e-05, -2.5100e-05, -9.3135e-05, -6.3657e-05,  3.9166e-05,
         3.7340e-05, -2.7988e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1794e-04,  2.5847e-02, -1.6487e-05,  3.6356e-06,  5.2674e-05,
         1.8151e-05, -6.9462e-06,  5.4429e-05,  9.1981e-06,  2.4622e-05,
         1.3231e-05,  4.4561e-05, -5.9173e-07, -2.9612e-06,  4.0145e-05,
         4.5616e-05,  5.2192e-06,  1.8086e-05,  1.0575e-05,  2.5069e-05,
         4.4770e-05,  1.8469e-05,  1.9040e-05, -1.5000e-05, -6.7326e-06,
         1.5264e-05,  1.8008e-05, -4.5589e-06,  3.9941e-05,  2.7997e-05,
        -4.7663e-06, -2.0064e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.0745e-04,  7.0385e-02, -9.7636e-05, -2.1243e-05, -4.5148e-05,
        -3.5106e-05, -1.5569e-05, -5.2180e-05, -7.7049e-05, -2.6094e-05,
        -2.1467e-06, -2.5446e-05,  2.9513e-05, -4.6583e-05,  4.9537e-05,
        -6.3291e-05,  1.6741e-05, -4.1790e-05, -1.0144e-04,  1.7114e-05,
        -6.7247e-06,  9.3454e-06, -1.8002e-06, -3.3518e-05, -4.1772e-05,
        -1.3674e-05, -5.4438e-05,  9.4981e-06, -6.2147e-05,  3.7251e-05,
         3.1637e-05, -7.9460e-05, -3.3265e-05, -3.4248e-05, -6.6645e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.8557e-04, -7.9492e-02,  6.3580e-05,  1.5875e-05, -1.5925e-05,
        -1.6749e-05, -4.0844e-05, -2.3414e-05, -6.1079e-05, -2.4996e-05,
        -3.6358e-05,  3.6390e-05, -5.5402e-05, -1.8798e-05, -1.9274e-05,
        -6.4897e-05, -4.3689e-05,  1.2110e-04,  5.5154e-05, -4.6120e-05,
         3.0129e-06,  5.9484e-05,  5.9335e-07, -8.1057e-05, -3.9629e-05,
         1.9670e-05, -1.2406e-05, -1.0282e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3149e-04, -1.0304e-01,  4.9039e-05,  1.8384e-05,  8.7827e-05,
         8.0688e-05, -3.9171e-05, -1.8363e-04, -5.7829e-05,  9.0722e-05,
         1.0193e-04, -9.4513e-05, -5.9106e-06, -3.6963e-05, -4.2001e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3579e-04,  7.6775e-02, -1.8959e-05, -2.7246e-05,  5.1408e-05,
         3.6867e-06, -4.9923e-05, -5.0130e-05, -3.7323e-05, -7.5911e-05,
         1.7263e-05, -9.9167e-05, -3.5143e-05,  4.9786e-06,  4.1346e-05,
         2.5535e-06, -1.1639e-04, -3.4983e-05, -2.1404e-05, -5.9523e-05,
        -2.0124e-05, -8.1459e-05, -6.5918e-05, -2.4616e-05,  2.1689e-05,
        -3.4039e-05,  2.8145e-06, -1.9844e-05, -1.5079e-05, -2.0109e-04,
        -1.0443e-04, -6.7282e-05, -6.0855e-05,  2.4338e-05,  1.1265e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1554e-04,  6.1142e-02, -3.3807e-05,  6.7039e-05, -1.8670e-05,
        -8.4334e-05, -1.0349e-04, -3.2714e-05,  6.8048e-05, -1.4217e-04,
        -3.6314e-05,  2.6176e-05, -6.0381e-05, -1.2013e-05, -2.6129e-05,
        -8.5176e-05,  1.1256e-05,  3.0131e-05, -4.1195e-05, -1.0271e-04,
        -6.9082e-05, -5.2168e-05, -6.4245e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.9319e-04, -8.8871e-02,  6.2723e-05,  4.5311e-05,  8.7224e-06,
        -6.6146e-06,  1.7208e-04, -3.4886e-05,  8.1936e-05,  5.8418e-05,
         3.3066e-05,  1.7566e-05,  5.7668e-05, -2.2847e-05,  2.7799e-05,
         9.6178e-05,  5.4614e-05, -9.3872e-05,  1.0345e-04,  5.0121e-05,
         7.2338e-05,  5.1487e-06,  5.0026e-05,  8.7789e-05, -1.7396e-05,
         5.7311e-05, -4.8297e-06, -6.8993e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9051e-05,  6.0898e-02, -9.6339e-06,  9.2160e-06, -2.4854e-05,
        -6.7643e-05, -9.3610e-06,  5.1673e-05,  1.1232e-05, -4.4151e-06,
        -8.9403e-05,  1.9692e-05, -5.1496e-05, -1.1086e-05, -3.1558e-05,
         1.2107e-05,  3.4912e-05, -6.4752e-05,  3.4654e-05, -7.3937e-05,
         6.8812e-09,  2.1124e-06, -3.4083e-05, -3.7850e-05, -2.4401e-06,
        -3.6951e-06,  9.2812e-06,  1.9378e-06, -3.9006e-05, -2.7311e-05,
        -7.2329e-05,  1.0132e-05,  1.9573e-05,  8.5856e-06,  1.0800e-05,
        -5.6821e-05, -2.2913e-05,  3.4592e-05, -3.7206e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([-5.3267e-04,  6.3304e-02, -2.3892e-05, -1.0266e-04, -5.1518e-05,
        -8.2874e-05, -9.4371e-05, -6.2210e-05, -3.2100e-05, -6.5551e-05,
        -4.4768e-05, -6.1351e-05, -8.6484e-05,  2.3708e-06,  1.0309e-04,
        -9.7632e-06,  2.9382e-05,  1.4207e-05, -8.0304e-05, -2.2369e-05,
         1.1623e-05,  1.1740e-05, -3.9172e-06, -9.0571e-05,  4.5429e-05,
        -2.3886e-05, -7.5658e-05, -1.0024e-06, -6.1398e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2181e-05,  4.4154e-02, -3.5408e-05,  6.0002e-05,  2.1298e-05,
         2.1649e-05,  1.1554e-05, -4.2758e-05,  4.4096e-05,  8.5369e-05,
        -2.1256e-05,  3.3209e-05, -1.4851e-05,  1.0651e-04,  3.4419e-05,
         3.3035e-05,  1.7384e-05,  7.6599e-05, -2.9965e-05, -2.8327e-05,
         8.3344e-05, -4.4335e-05,  5.7233e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.3015e-04, -5.9258e-02,  3.1366e-05,  1.6398e-05, -8.0179e-06,
         2.1168e-05,  2.2339e-05, -8.4535e-06, -1.4347e-05, -2.5786e-05,
         6.9018e-07,  6.2300e-05, -3.0590e-05, -2.2566e-05, -1.4912e-05,
         2.7149e-05, -5.5951e-05, -3.5148e-05, -2.8064e-05, -7.3432e-05,
        -1.6449e-05,  2.1066e-05, -6.6273e-06, -7.1977e-05,  1.8973e-05,
         1.0841e-05, -1.7919e-05, -1.3622e-05,  3.9668e-06,  3.5535e-05,
         1.0322e-05,  6.0584e-05, -4.4673e-05,  2.9221e-06, -6.1887e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.1226e-04,  9.1090e-02,  2.6729e-05, -7.4376e-05,  2.2831e-05,
        -1.2951e-04, -4.9973e-05,  5.3394e-06, -4.2672e-05,  7.0708e-06,
         5.5569e-06, -2.5552e-05,  1.4437e-05, -1.1753e-04, -3.6281e-05,
        -1.6691e-05, -2.1777e-05, -2.3286e-05,  2.5744e-05, -1.6621e-05,
         3.3534e-05, -8.0790e-05, -1.3570e-05, -5.2356e-05, -2.7619e-05,
         2.3366e-05, -3.9465e-05, -9.2638e-06,  5.7974e-05,  1.4261e-06,
        -9.1856e-05, -6.7597e-05,  1.0328e-05,  7.0647e-06,  1.4804e-05,
         6.1894e-05, -1.8599e-05, -1.2162e-05, -1.3085e-05,  5.5091e-06,
         1.0024e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6375e-04, -7.3489e-02, -4.3412e-05, -3.9381e-05,  1.4886e-05,
         1.0125e-05,  7.6366e-07,  9.1023e-06,  3.1750e-05,  6.6244e-06,
        -9.8409e-06, -1.0703e-05, -9.4687e-06, -7.2701e-05,  1.9212e-06,
         2.0924e-05, -1.7106e-05,  9.4906e-05,  3.4590e-05, -2.4714e-05,
        -5.2638e-05, -5.7727e-05, -2.1432e-05,  3.3602e-05, -1.5948e-05,
        -2.9352e-05,  1.9970e-05, -1.2477e-05,  1.6860e-05, -7.6626e-05,
         2.8302e-05,  1.4512e-05, -1.2485e-05,  4.9606e-05, -3.7226e-05,
         2.8967e-05,  8.4089e-06, -8.3861e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3889e-04, -6.0365e-02, -3.3870e-05, -3.6583e-05, -2.4226e-05,
        -4.0944e-05, -5.0065e-05, -7.9251e-06, -4.1184e-05, -1.5164e-06,
        -5.3968e-05, -2.8359e-05,  6.9057e-06, -7.0888e-06,  2.4968e-05,
         7.1272e-06, -1.2831e-05, -6.8834e-06, -5.1368e-05, -1.2114e-05,
        -5.3617e-06,  3.1492e-05,  1.7982e-05, -1.8546e-05,  2.9132e-05,
        -7.0759e-05, -5.9651e-05, -3.3877e-05, -3.3548e-06, -9.5303e-06,
         3.2730e-05,  7.8941e-06, -2.5428e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5804e-04,  9.3663e-02,  2.4272e-05,  1.7064e-05, -1.0636e-05,
        -4.6836e-05, -8.2532e-05, -1.7035e-04, -9.0577e-05, -1.8185e-05,
         4.6742e-06,  2.7700e-05,  3.2337e-06,  6.1983e-06,  8.0116e-06,
         5.8411e-05, -1.2707e-04,  4.8559e-06, -1.3924e-04, -1.3781e-04,
        -1.8573e-04, -3.7409e-05,  4.2013e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9059e-04, -9.2732e-02, -2.7192e-04, -1.6172e-05,  8.2292e-05,
        -1.1551e-04, -1.7273e-04, -2.4644e-05,  7.8308e-05, -3.6283e-06,
        -1.0981e-04,  1.3083e-05, -1.2524e-04, -2.3736e-04,  2.1503e-04,
        -6.6016e-05, -3.6951e-05, -2.7799e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0646e-04,  8.3459e-02, -2.9198e-05,  1.5243e-05, -1.5655e-05,
         2.5927e-05,  1.5647e-05,  4.3079e-05,  6.7960e-06,  1.6275e-05,
         1.2670e-04,  4.0932e-05, -9.5664e-08, -6.6143e-05,  3.9546e-05,
         1.6698e-05,  7.7149e-05,  6.2227e-05,  4.4441e-05,  3.0944e-05,
         1.0267e-04,  1.2885e-04, -2.5623e-05, -5.8694e-05,  3.1396e-05,
        -4.1321e-07, -8.1258e-06, -6.7665e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0761e-04,  8.3296e-02,  1.5156e-05, -2.8314e-06,  1.9991e-05,
        -8.6383e-05, -7.1317e-05, -3.7116e-06,  6.4065e-05, -4.7976e-05,
         5.3655e-05,  1.0113e-05,  4.4738e-05, -9.6777e-05, -3.0731e-05,
        -7.2144e-05, -4.6394e-05, -1.0286e-05, -1.4419e-05, -3.8790e-05,
        -6.6675e-06,  3.3439e-06,  6.7746e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.5459e-04, -8.2748e-02,  7.2991e-06, -2.3780e-07,  7.5607e-06,
        -3.7844e-05,  3.7670e-05, -2.2852e-05, -2.2025e-06,  1.0841e-05,
         7.7831e-06,  4.5082e-05,  2.4753e-05,  2.0167e-05,  1.6784e-05,
         4.7697e-05,  2.3371e-05, -2.9648e-05,  1.3437e-05,  4.1226e-06,
        -1.4808e-05,  1.8454e-05, -7.3236e-06,  1.5866e-05,  5.1405e-05,
        -1.0850e-05, -6.3490e-06,  1.2181e-05, -3.2564e-05, -1.9297e-05,
         2.4721e-05, -1.1416e-05,  1.2775e-05,  5.0102e-06, -4.4453e-06,
         6.4236e-06, -9.7533e-06, -6.2605e-05,  1.0587e-05,  2.5347e-05,
         2.8667e-05,  4.3095e-05,  4.4993e-06, -3.7301e-05, -2.6591e-05,
        -2.0512e-05, -6.6512e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2654e-04, -1.1016e-01,  1.8372e-04,  1.6268e-04, -2.1204e-04,
         7.7184e-05,  7.6305e-05,  6.8257e-05, -3.3937e-04, -1.1320e-04,
        -2.3441e-06, -1.2266e-04,  1.2783e-04,  3.7403e-05,  1.9319e-05,
        -1.3622e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 9.2462e-06,  9.1784e-02, -2.4550e-06,  1.9978e-05,  7.7578e-05,
        -5.3973e-05, -6.8207e-06,  4.5520e-05, -4.9909e-06, -5.2166e-05,
        -4.2812e-05, -2.9489e-05, -3.9534e-05,  1.0127e-04,  4.5075e-05,
        -1.4185e-04, -7.6086e-05, -1.2826e-04,  3.3818e-05, -6.6092e-05,
         7.1951e-05,  2.6005e-05, -1.1794e-04, -5.6408e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3458e-04, -8.2034e-02,  1.2855e-05,  2.4595e-05,  1.1317e-04,
         2.6746e-05,  2.2847e-05, -2.3417e-05,  1.7668e-05,  1.9067e-05,
        -3.1683e-05, -3.2793e-08, -5.5945e-05,  1.2625e-04, -8.3350e-05,
        -2.2550e-05,  5.6820e-05, -2.4689e-05,  6.2519e-05,  7.0711e-05,
        -6.9089e-06, -1.4550e-05,  7.6327e-05,  3.2196e-05,  1.9578e-05,
         3.1623e-06,  6.2518e-06,  4.4942e-05,  1.7979e-05, -3.3833e-05,
         3.8417e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.8140e-04, -7.8628e-02, -4.1228e-05, -4.1073e-05, -1.7882e-05,
        -3.1275e-05,  5.4104e-05,  6.7469e-05,  7.9297e-05, -4.6837e-06,
        -1.2628e-04, -3.3008e-05, -5.2510e-05, -7.6497e-05, -6.5799e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1362e-04,  1.2243e-01,  2.0238e-04,  7.6077e-05, -4.6788e-04,
        -4.0230e-04,  1.5343e-04,  2.6482e-04, -2.8522e-04,  2.1658e-04,
         4.1866e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0328e-04,  5.0148e-02, -8.9144e-06, -3.4013e-05, -4.9839e-06,
         5.8807e-06, -8.4001e-05,  9.8258e-06,  5.8200e-05,  4.7839e-05,
         1.1249e-04, -3.3163e-05,  5.2725e-05,  1.1363e-04, -5.0758e-06,
         7.6671e-05,  3.8021e-05, -3.2617e-05,  2.7263e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0443e-04,  5.7779e-02,  4.0481e-05, -1.6298e-04, -4.7612e-05,
        -7.6791e-05, -1.3660e-04, -5.4093e-05,  2.2158e-05, -1.3545e-04,
        -1.4816e-04, -1.5331e-04, -1.0188e-04, -6.2597e-05, -4.0506e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.5602e-04,  1.3047e-01, -2.0957e-05, -1.6222e-04,  7.4025e-05,
         4.2874e-05, -1.6311e-04, -2.0844e-04,  6.2649e-05, -1.3419e-04,
        -4.1974e-05,  6.7620e-06,  1.5798e-04, -2.0578e-04, -2.5815e-05,
         4.9197e-05, -1.8570e-04, -6.8443e-05,  3.1668e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9582e-04,  6.4943e-02,  3.2800e-05,  6.3839e-05,  2.2674e-05,
         7.3225e-05,  5.3665e-05, -5.7116e-05,  5.7368e-05, -2.5756e-05,
        -1.5737e-05,  3.3094e-05, -8.2102e-06, -4.2341e-05,  5.4595e-05,
         2.3074e-06,  4.4412e-06, -2.1872e-05,  7.0610e-05,  1.9432e-05,
         6.4535e-05, -1.8505e-05, -7.6897e-06, -2.7326e-05, -3.9010e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3759e-04, -1.0208e-01,  2.5823e-05,  8.4643e-05, -2.1524e-04,
        -1.0730e-04, -1.8473e-04, -7.5722e-05, -2.3200e-05,  1.1779e-05,
         1.4985e-04,  1.4854e-04, -1.4555e-04, -1.1726e-04,  1.3800e-04,
        -1.5067e-04, -1.7948e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8710e-04,  8.8301e-02, -7.8050e-05, -4.4054e-06, -4.6646e-07,
         3.1644e-05, -8.7802e-06, -6.5003e-06,  5.2731e-06, -9.5336e-05,
         9.8269e-06,  2.0228e-06, -1.7141e-05,  8.6093e-07,  6.9387e-06,
         4.1542e-05,  7.4581e-06, -7.5425e-06, -3.0586e-05,  9.6858e-06,
        -3.0109e-05,  3.0805e-05, -1.5853e-05,  3.0576e-06, -8.7397e-05,
        -4.7654e-05, -1.6048e-05, -2.3346e-05, -5.7320e-06,  7.5184e-06,
         9.8719e-05, -2.3060e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4753e-04,  1.4072e-01,  4.0845e-05, -4.5933e-05,  6.5665e-05,
        -1.0859e-04, -8.7365e-05,  1.2115e-04,  7.7211e-05,  1.1262e-04,
        -2.8203e-05,  1.4126e-05, -1.0846e-04,  4.1047e-05, -3.0008e-05,
        -1.0187e-04, -8.2597e-05,  7.0308e-05, -9.4784e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.2307e-04,  8.4006e-02, -2.7793e-04, -3.4981e-05, -8.1636e-05,
        -1.3618e-04, -1.5751e-04, -1.9647e-04, -5.0084e-06,  4.7800e-05,
        -1.7353e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 4.4871e-04, -9.8686e-02,  5.0954e-06,  2.8199e-05, -1.2358e-05,
         2.1154e-05, -2.8464e-05, -5.4199e-05, -6.3885e-05, -3.1568e-05,
        -3.1728e-05, -1.0057e-05,  7.2127e-05,  6.2816e-06, -4.8210e-05,
        -1.8006e-06, -2.1279e-05, -9.5508e-05,  2.3583e-05,  5.7034e-05,
        -8.3845e-05, -6.7215e-05, -1.3023e-05, -8.0199e-05,  5.8208e-05,
         2.7905e-05, -5.5187e-05, -5.6194e-05, -1.0730e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8160e-04, -6.9933e-02, -2.8261e-05,  1.7676e-05,  2.5381e-05,
        -4.2291e-06, -2.6075e-05, -1.7721e-05, -5.1765e-06, -1.7574e-05,
         2.6534e-06, -9.1484e-06, -8.4345e-06, -4.4891e-05,  3.3160e-05,
         8.2058e-06, -7.7442e-06, -1.3025e-05, -3.6623e-05,  2.5058e-06,
        -4.0114e-05,  3.8768e-06,  6.7627e-06, -1.7609e-05,  8.2475e-06,
         3.2234e-05, -2.9950e-06,  2.3610e-05,  2.9788e-05, -2.8997e-05,
        -3.5597e-05,  3.9657e-05, -3.7140e-05,  1.8975e-05, -6.4793e-05,
        -1.3425e-05, -1.3215e-05,  2.0710e-05, -6.1174e-06, -2.9819e-05,
        -5.2742e-06,  1.9579e-06,  5.2885e-06, -1.6852e-05,  2.6543e-05,
        -1.6948e-06,  2.9219e-05, -4.6195e-05,  1.6564e-05, -3.9676e-05,
         3.2097e-05,  2.9698e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8219e-04, -1.5130e-01,  1.2582e-04, -6.4568e-05,  1.3748e-04,
        -3.2976e-05,  3.9190e-05, -1.0800e-05,  3.0128e-04,  1.0044e-04,
        -1.6750e-05,  9.3850e-06,  1.1185e-04,  9.7037e-05,  3.2748e-05,
         1.4901e-04, -5.7528e-07, -8.6898e-05,  6.7897e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.2496e-04,  9.3680e-02,  2.2986e-05, -9.3697e-06,  5.1434e-05,
        -1.8464e-04, -2.4922e-05, -7.6453e-05,  2.7259e-05,  1.4289e-04,
         1.0231e-06,  2.7202e-05, -5.4975e-05, -4.3616e-05,  2.6574e-05,
         3.4614e-06, -9.4030e-05, -4.6354e-05,  8.0601e-05, -1.0336e-04,
        -6.9150e-05,  3.4085e-06, -2.2901e-05, -2.2128e-05, -4.9602e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6781e-04, -1.3753e-01,  4.7876e-05,  6.4298e-05, -1.8570e-05,
        -6.8343e-05,  2.6936e-05,  2.7180e-05, -4.2532e-05,  1.7087e-05,
         1.1773e-04, -1.4920e-04,  1.5465e-05, -6.6819e-05, -2.6025e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9169e-05,  7.2585e-02, -5.9504e-05, -8.8186e-05, -5.6749e-05,
        -6.9095e-05,  5.6770e-05,  9.8048e-06,  6.4211e-05, -1.2450e-04,
        -3.7944e-06,  5.4643e-05,  2.1618e-05, -2.6649e-05, -1.2886e-04,
        -6.7397e-05,  7.0181e-06, -2.3464e-05,  1.3240e-04, -1.7030e-05,
        -6.2872e-06,  4.8793e-05, -1.7797e-05, -4.8717e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.4122e-04,  3.8226e-02, -6.7394e-05, -5.3741e-05, -1.0875e-04,
        -1.2287e-04,  4.9314e-05,  4.4309e-05, -3.0351e-05,  4.1479e-06,
        -2.2352e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.6287e-04, -9.0772e-02, -5.8756e-05, -2.1691e-05,  1.5140e-05,
         1.9979e-05, -4.1447e-05,  2.5218e-06, -3.6561e-05,  4.4458e-05,
        -5.9226e-05, -5.5731e-05, -2.8917e-06, -4.0543e-06,  1.4825e-05,
         3.0232e-05, -5.4223e-05,  1.4946e-05, -6.7522e-06,  2.7047e-05,
         7.2048e-05,  1.2070e-05, -3.1945e-05, -3.7248e-05,  1.1452e-05,
         2.0238e-05,  6.8273e-06, -4.6547e-05,  1.5133e-05, -3.2831e-05,
        -7.4612e-05, -6.6686e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9678e-05, -1.3132e-01,  3.1588e-04,  2.4583e-04, -5.8263e-05,
         2.4907e-04,  1.0953e-04, -9.8968e-05, -2.0772e-04,  1.8297e-04,
         7.5303e-05,  7.5140e-05,  1.3706e-04,  1.1610e-04, -1.2329e-04,
        -7.6381e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2116e-04,  7.0369e-02,  1.8963e-05, -2.5877e-06,  6.8163e-05,
        -7.8215e-06,  3.5754e-05,  8.6190e-05,  7.4813e-06,  4.3701e-05,
         5.4974e-05, -4.2149e-06,  5.3508e-05,  1.8088e-05, -4.7275e-06,
         8.5852e-05, -2.8295e-05, -1.2133e-05,  3.5729e-06, -1.3298e-05,
         7.3554e-05,  4.0513e-05,  4.9535e-06, -5.9157e-05,  8.1712e-05,
         1.6538e-04, -8.4107e-06,  1.3453e-04, -6.4840e-05,  1.9127e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.3032e-04, -1.0951e-01,  8.5550e-05, -2.8376e-04,  1.9655e-04,
         2.7332e-04,  1.4031e-05,  2.7661e-05,  8.1908e-05, -3.9456e-05,
        -1.3042e-04,  4.6481e-05, -5.3343e-05, -1.8136e-04, -4.4368e-05,
        -1.7211e-04, -2.4350e-05, -4.6155e-05, -6.6925e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2400e-05, -1.5535e-01,  1.1069e-04, -5.0287e-05, -1.4741e-05,
        -1.3156e-04, -1.2605e-04, -1.7520e-05, -6.7130e-05, -6.1432e-05,
        -5.9977e-05, -1.1505e-04,  8.6944e-05, -1.1911e-04,  9.4052e-05,
         7.2482e-05,  1.7717e-04,  9.3476e-05,  7.2200e-05, -1.6623e-05,
        -2.6925e-05, -1.2086e-05, -2.6351e-05, -1.4818e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 3.0046e-04,  4.7579e-02,  8.0297e-06, -1.0832e-05,  1.4652e-06,
         1.6279e-05, -1.7913e-05, -2.0027e-05, -1.0280e-05,  1.6663e-05,
         5.0994e-06, -2.4277e-05,  4.2040e-05,  2.2284e-05,  1.4384e-05,
         3.4746e-05,  4.1722e-06,  1.5914e-05,  4.5633e-05, -1.5536e-05,
         1.4012e-05,  9.0578e-07,  2.7780e-05,  1.3792e-05,  1.6371e-08,
         3.1455e-05,  6.0402e-06,  1.3932e-05,  2.2002e-05,  3.4050e-05,
         4.4100e-05,  6.6298e-06, -1.8572e-05,  3.0923e-05,  5.2459e-05,
         1.1486e-05,  6.2521e-06,  1.6818e-05,  3.9139e-05,  3.5163e-05,
        -1.1984e-05, -1.0290e-05,  4.0231e-05,  1.3788e-05, -1.4682e-05,
         3.8152e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4484e-04,  7.4741e-02,  4.8502e-05,  2.2439e-05,  9.4471e-06,
        -1.2457e-04,  5.1884e-05,  1.1100e-04, -2.4117e-05, -1.4530e-04,
         5.9322e-05, -2.2507e-05,  9.9085e-05,  3.0251e-05, -3.9878e-05,
        -1.0080e-04, -4.5815e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4665e-04,  6.3048e-02, -5.4640e-05,  1.2444e-05,  2.3111e-05,
        -5.7147e-05, -4.3133e-06,  7.5410e-05, -5.1191e-05, -2.1732e-05,
        -9.8892e-06,  2.1765e-05, -3.3899e-05, -2.1102e-05,  3.0201e-05,
         3.0814e-05, -6.5333e-06, -3.3794e-05, -7.9106e-06, -4.7430e-05,
        -1.8319e-05,  3.4106e-05,  1.5748e-05, -4.7495e-05, -2.7043e-05,
         1.7275e-05, -1.1579e-05,  1.7417e-05, -1.0986e-05, -3.2429e-05,
         8.0303e-05, -1.4746e-05,  5.9444e-06,  2.6115e-05, -1.5372e-05,
        -1.7698e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4436e-04,  1.5067e-01, -4.4898e-05, -1.3288e-04, -7.0923e-05,
        -3.2163e-04, -1.1927e-04, -8.1942e-05, -1.4978e-04, -4.2999e-05,
         7.4276e-05, -1.0746e-04, -3.2318e-05, -1.1996e-04, -5.3034e-05,
        -1.2280e-04, -1.1812e-04,  3.1998e-05, -6.8489e-05,  1.1222e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7403e-04,  9.9796e-02,  6.2531e-05, -4.2241e-05,  9.6292e-05,
        -1.8231e-06, -1.5053e-05,  5.3467e-05,  4.1560e-05,  5.6594e-06,
        -3.2657e-05,  2.3551e-05,  8.1696e-05,  5.7568e-05,  1.7904e-05,
        -5.3054e-06,  2.7466e-05, -8.5096e-06, -1.4024e-05, -1.3890e-04,
        -1.8438e-05, -3.9114e-05,  9.0851e-05, -1.3955e-05,  1.3687e-04,
         6.3933e-05, -1.7116e-05,  4.4199e-05, -1.8014e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.5226e-05, -3.7062e-02,  1.2181e-05,  1.3503e-05,  5.6298e-06,
         4.9921e-06, -3.1619e-06,  8.1636e-06, -7.0986e-06,  1.0210e-05,
         2.4579e-06, -1.5834e-05,  1.7789e-05, -6.0070e-06, -2.5813e-05,
        -1.8871e-05, -2.3307e-05,  1.2552e-05, -3.1445e-05, -7.2965e-06,
         1.7063e-06,  3.4110e-05, -8.0562e-06,  1.8136e-05, -3.4422e-06,
        -2.5226e-05, -2.6425e-05, -3.4280e-05,  2.9021e-05,  1.3149e-05,
         2.0449e-05, -7.0463e-06, -2.6656e-05,  2.8255e-05,  1.2002e-05,
        -8.0172e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.3928e-05,  7.5415e-02,  4.0423e-05,  5.8849e-05,  8.6818e-05,
         9.1145e-05, -4.3613e-05, -4.2364e-05, -5.6953e-06,  3.1952e-05,
         6.0608e-05,  8.9282e-06,  1.2070e-04, -2.0841e-05,  4.8862e-05,
         6.6763e-06,  7.4237e-05,  1.4385e-05,  8.6169e-05,  4.0245e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.6010e-05, -1.0216e-01, -8.8063e-06,  7.5941e-06, -3.3211e-05,
        -5.9902e-06,  4.2033e-05, -7.5024e-06,  1.6007e-05, -1.3230e-05,
        -2.2333e-05,  8.9506e-07,  1.0326e-05, -3.1402e-05,  4.1339e-06,
        -1.6025e-05, -9.2933e-06,  2.8310e-05,  4.0375e-05,  2.7492e-05,
        -3.3297e-06, -1.0736e-06, -5.4711e-05,  1.2792e-05, -5.7438e-06,
         1.6580e-05, -1.1545e-05,  1.6761e-05,  4.1004e-05, -1.3628e-05,
        -1.4394e-05, -2.2093e-05, -9.4169e-06,  2.5068e-05, -1.7380e-05,
         2.8670e-05,  2.4660e-05, -5.7278e-06,  6.1622e-05,  2.4806e-05,
         2.2470e-05,  2.7166e-06, -3.8570e-06,  3.3743e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5206e-04, -4.3362e-02, -3.1065e-05, -1.3465e-05, -3.0672e-05,
        -1.3814e-05, -1.7087e-06, -3.9776e-05, -7.9231e-06, -1.8216e-07,
        -2.6017e-06, -1.2867e-05, -2.4507e-05, -2.8848e-05, -4.2262e-05,
         4.9481e-06, -4.0077e-06, -3.0280e-05, -2.8317e-05, -1.8167e-05,
        -1.5588e-05, -1.1414e-05, -3.3453e-05, -1.7698e-05, -6.7741e-06,
        -7.3281e-06, -8.0529e-06, -1.5975e-05,  2.1284e-07, -3.1048e-05,
        -2.5195e-05, -2.8015e-05, -9.5122e-06, -1.9185e-05, -1.9182e-05,
        -1.5194e-05, -3.0809e-05, -2.3560e-05, -4.9096e-06, -2.8965e-05,
        -4.0740e-06,  5.0258e-06, -2.7408e-05, -1.6342e-05,  3.0829e-06,
        -1.0064e-05, -2.7436e-06, -1.9125e-05, -2.8267e-05, -1.6052e-05,
        -1.2494e-05, -4.5666e-06,  1.9238e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1587e-04, -9.8867e-02,  8.0663e-07, -3.0815e-05,  4.2925e-05,
        -1.6841e-05,  4.9983e-05, -7.6257e-05, -5.7173e-05, -1.1540e-05,
         2.5587e-05, -2.5213e-06, -4.9712e-06, -6.4927e-05,  1.2172e-05,
        -6.0817e-06, -2.8825e-05,  2.8663e-05, -8.3313e-06, -5.7194e-05,
         1.6337e-05, -4.8430e-05,  4.3485e-05,  8.6000e-06,  3.7387e-05,
        -1.6293e-05, -1.3450e-05,  7.5897e-06,  2.9569e-05,  4.3501e-05,
        -1.8580e-05, -4.4144e-05, -2.4063e-05, -1.0592e-05,  1.2435e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.1333e-05,  5.8519e-02, -3.4330e-06,  2.6079e-05,  1.7508e-05,
         3.2729e-05,  4.8069e-07, -1.1806e-05, -3.3280e-05, -1.6817e-05,
        -4.4109e-05,  1.4464e-05, -3.3680e-05,  4.8791e-05,  6.1456e-05,
         1.3707e-06, -4.2833e-05, -3.8090e-06,  1.1706e-05, -8.2166e-06,
         1.8120e-05, -3.0149e-05,  1.4359e-05,  1.0175e-05,  5.3201e-06,
         5.9189e-05, -6.0564e-05, -2.0069e-05,  6.6131e-05, -1.9856e-05,
         1.8173e-05, -3.9641e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3564e-04, -6.0012e-02, -2.5656e-06,  4.7564e-05,  1.9064e-06,
        -1.0913e-05, -1.0427e-05,  2.1761e-05,  5.5104e-06,  3.8846e-06,
         9.2033e-06, -1.9208e-06,  1.6054e-05, -1.9584e-05,  2.9820e-05,
        -5.5984e-06, -4.6234e-05,  1.5381e-05, -1.7718e-05,  1.5230e-05,
         1.5544e-05, -5.4955e-06,  1.9194e-05, -4.3878e-05,  6.6885e-06,
        -4.6688e-06, -2.0989e-05, -1.8798e-05, -4.2648e-05, -2.8252e-05,
         1.5068e-05, -6.8282e-06,  9.4918e-06,  2.0640e-05,  1.1964e-06,
        -3.9165e-05,  8.1043e-06, -5.4459e-07, -2.3545e-06,  3.3593e-06,
        -1.1630e-05,  5.1495e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.7863e-04,  1.6810e-01, -3.0783e-05, -1.3168e-05,  1.7710e-05,
        -7.3205e-05, -6.3013e-05, -1.0691e-04, -6.0492e-05,  6.7263e-05,
        -1.2257e-04,  1.6010e-04, -4.8779e-06, -7.6605e-06, -1.8213e-04,
        -4.7795e-05, -5.5810e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.9175e-05, -1.3637e-01,  3.0220e-05,  6.6517e-05,  1.0886e-04,
        -1.5235e-04, -9.1275e-06,  2.0240e-06,  1.5507e-05,  1.2614e-04,
        -1.9356e-05,  1.1170e-04,  6.6422e-05,  1.2325e-04,  1.0958e-04,
         6.7086e-05,  5.8782e-05,  2.9821e-05,  9.4498e-05,  5.3738e-06,
         1.6661e-05,  1.9633e-05,  1.4236e-05, -3.2932e-05, -3.4712e-06,
         5.9505e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1840e-04,  3.0955e-02,  3.8033e-05, -4.2188e-05, -4.9744e-06,
        -6.0643e-05, -5.8611e-05, -4.8639e-05,  4.2036e-05,  5.9410e-05,
         2.9249e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1175e-04,  6.8061e-02,  6.9209e-05, -7.4052e-06, -7.4593e-05,
         2.7372e-05, -2.0833e-05,  2.4896e-05, -7.9848e-05,  7.6423e-05,
        -8.5452e-05,  1.1167e-04,  1.2375e-04,  6.0859e-05, -3.7498e-05,
        -1.1993e-04, -1.0868e-04,  8.6922e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1074e-04,  1.1055e-01,  1.0012e-04,  7.9128e-05, -5.6117e-05,
         1.2247e-05, -8.1934e-05, -8.2653e-06,  1.1976e-05,  1.4606e-04,
        -4.1024e-05, -1.0264e-04,  2.9116e-05, -5.6693e-05, -2.7858e-05,
         6.9965e-05,  1.9106e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.7771e-04, -5.8618e-02, -1.7319e-05, -1.9472e-05,  2.5276e-05,
         2.9967e-06, -2.4379e-05, -6.0302e-05,  3.7671e-06,  1.7084e-05,
         1.5093e-05,  3.7154e-05,  2.8695e-05,  4.4005e-05, -3.4198e-05,
        -4.2021e-06, -1.1153e-06,  4.7236e-05, -1.0358e-05,  8.3304e-06,
        -3.3581e-05,  1.3262e-05,  4.6885e-05, -1.2594e-05,  5.9393e-05,
        -1.5907e-05,  3.3664e-05,  5.3558e-05,  1.6158e-05, -9.0069e-06,
         5.6955e-05,  1.1007e-05, -4.5817e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.0193e-04, -1.2165e-01, -5.3429e-05, -1.2124e-05, -2.6480e-05,
        -4.2161e-06,  3.3247e-05, -2.9967e-06, -1.6770e-04,  8.4493e-06,
        -4.4311e-05, -5.4020e-05,  7.9842e-06,  5.7863e-05, -7.4262e-05,
         1.4415e-04, -1.2393e-05, -2.0352e-07, -2.3545e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5190e-04,  9.6927e-02, -3.0186e-05, -8.0295e-05, -1.2326e-04,
        -1.5252e-04, -1.4700e-05,  4.5595e-05, -6.6054e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.4078e-04,  1.8930e-01, -1.4152e-04, -5.1657e-05, -3.0564e-04,
        -1.3088e-04, -1.7603e-04, -8.9464e-05, -1.0135e-04, -3.3198e-04,
         4.0332e-05, -1.1077e-04,  6.0359e-06,  1.3368e-06, -1.4431e-04,
        -2.3848e-04, -3.2609e-05,  9.4471e-06, -4.2058e-05, -4.2506e-05,
         4.0646e-05,  2.9354e-05, -8.3238e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.8287e-06,  9.9836e-02, -2.6454e-05, -5.0153e-05, -2.5915e-06,
         2.6340e-05, -2.2596e-05, -1.1083e-06, -2.3598e-05, -3.1126e-05,
        -2.7686e-05, -1.3175e-05, -5.8188e-05, -2.2497e-05, -2.2514e-05,
         2.1475e-06, -2.3454e-06, -1.9733e-05, -3.8620e-05, -1.0147e-05,
        -1.3155e-05, -2.7498e-05, -7.9634e-05,  2.6625e-05, -2.3263e-06,
        -6.2577e-05, -2.6137e-05, -1.5892e-05, -8.6440e-05,  5.9145e-05,
        -5.5185e-05,  4.3341e-06, -5.3190e-05, -2.5826e-05, -3.0839e-05,
        -9.0242e-06,  1.4652e-05, -4.1090e-05, -1.2855e-06, -3.7405e-05,
        -8.4747e-05, -2.6267e-05, -1.7932e-05, -4.3572e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.1617e-04, -5.7917e-02, -4.5136e-05, -1.5276e-05, -2.3466e-05,
        -1.8824e-05,  2.1213e-05, -7.1816e-05, -1.6489e-05,  1.2509e-05,
        -2.1593e-05,  8.8636e-06, -1.4778e-05,  1.1654e-06,  4.1657e-05,
        -4.4078e-06, -1.1576e-05,  3.4069e-05, -6.0287e-05, -1.1280e-06,
         1.7848e-05, -6.3672e-06,  1.5775e-05,  6.3476e-06,  4.6823e-05,
         5.8714e-05,  1.4025e-05, -5.3345e-05, -3.5243e-05, -9.5909e-06,
        -1.5816e-05,  8.8980e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1217e-04, -9.3867e-02,  1.1272e-05,  2.1404e-05, -4.7143e-05,
         3.8074e-05, -3.5301e-05, -5.9764e-05,  1.4314e-05,  4.3691e-06,
        -6.5780e-05,  6.5640e-05, -4.4129e-06, -3.5138e-05,  7.2517e-06,
        -1.0117e-06, -5.2709e-05, -2.0116e-05, -3.7787e-05,  1.8848e-06,
         1.1075e-04, -1.1136e-05, -3.3039e-06, -4.2397e-05,  4.2677e-05,
        -1.9702e-05,  4.5974e-05,  2.6000e-05,  2.5471e-05, -1.4193e-05,
        -5.8131e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 2.8574e-04,  1.5158e-01, -1.5499e-04,  2.8847e-05, -6.9225e-05,
         1.3843e-04, -1.7522e-04, -7.1387e-05, -4.7313e-05, -6.9783e-05,
        -1.8755e-04, -1.3361e-05, -8.3847e-06,  1.4047e-04,  1.1011e-04,
        -5.7658e-05, -1.5279e-04, -1.0705e-04,  8.1486e-05,  5.4423e-05,
         8.5329e-05,  6.9685e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.0899e-04, -7.2153e-02,  1.4269e-05, -1.1409e-05, -2.6678e-05,
         2.8695e-06, -2.3054e-05,  2.4596e-06,  3.6389e-05,  2.8401e-05,
        -1.1090e-05, -2.8242e-05, -1.1308e-06, -1.9516e-05, -9.7893e-06,
         3.1353e-05,  3.2163e-06,  8.8507e-06, -1.1291e-05,  8.8536e-06,
         4.5473e-05,  1.7360e-05, -4.5013e-05,  4.8339e-05,  8.8571e-06,
        -1.1521e-05, -1.9866e-05,  8.6405e-06,  6.6503e-05,  2.5605e-05,
         3.2480e-05, -1.6404e-05, -3.8726e-06,  2.7712e-05, -5.2001e-05,
         3.4650e-05, -2.9615e-06, -6.1098e-05, -1.7110e-05, -9.9429e-06,
        -4.6629e-05,  2.6930e-05, -2.0268e-05,  1.8458e-06,  7.4833e-06,
        -1.3149e-05, -8.5976e-06, -2.6186e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.4208e-04,  3.5406e-02,  4.9616e-05,  1.1844e-05, -2.2731e-05,
        -8.8109e-05, -2.7946e-05, -2.0142e-06, -5.6835e-05,  9.8773e-07,
        -2.1124e-05,  2.6155e-05,  4.8621e-05,  1.2864e-05, -1.0759e-04,
         1.0299e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0503e-04, -9.7429e-02,  5.3844e-05, -3.0634e-05,  3.7498e-06,
        -8.0119e-05, -2.9428e-05, -5.2368e-06, -3.8397e-06, -4.8011e-05,
        -3.2654e-05, -1.9503e-05, -2.8216e-05, -3.3079e-05, -3.7164e-05,
        -1.3001e-05, -5.7129e-05, -5.8032e-06, -1.7523e-05, -2.2917e-05,
        -2.2550e-05, -3.7332e-05, -9.4097e-06,  6.2097e-06, -1.2275e-05,
        -2.0976e-05,  3.8071e-06, -1.7495e-05, -1.8786e-05, -1.5918e-05,
        -1.2871e-05, -1.9507e-05, -2.6092e-05,  4.3556e-06, -4.2106e-05,
         2.8064e-05, -4.2058e-05, -1.1301e-05,  9.9906e-06,  1.7578e-05,
         3.0600e-05, -4.7065e-05, -1.5730e-05,  3.0050e-06,  1.4929e-05,
        -2.1030e-05, -4.9230e-05, -1.7760e-05, -1.2654e-05,  2.6186e-05,
        -1.2859e-05,  1.6966e-05, -2.0466e-06, -6.2909e-05, -3.7697e-05,
        -2.0117e-05,  2.2435e-05,  9.5981e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2970e-04, -4.9108e-02, -1.0351e-05,  1.3487e-05, -4.5620e-06,
        -5.0733e-06, -1.8889e-05, -2.0453e-06, -9.3798e-06, -9.0815e-06,
         1.2956e-05,  1.1751e-05, -2.2134e-05,  2.4377e-05, -1.0656e-05,
        -7.4822e-06, -3.2337e-05,  1.9677e-06,  2.3022e-05,  2.5436e-05,
        -2.9105e-07, -1.2064e-05, -5.7045e-06,  9.2073e-06, -9.3393e-06,
        -1.0639e-05, -2.3702e-05,  1.1076e-06, -2.9765e-05,  3.1062e-06,
        -3.7325e-06, -3.7308e-06,  9.1346e-06, -1.5096e-05, -6.7584e-06,
        -2.0064e-06, -2.2354e-05,  3.4108e-05, -1.0527e-06, -2.2527e-05,
         3.8643e-06, -1.2188e-05,  6.6726e-06, -8.5767e-06, -4.6772e-06,
        -2.7523e-07, -1.0976e-05, -2.1073e-05,  1.2523e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.9276e-05,  4.6247e-02, -2.1071e-05,  5.1244e-05, -4.0646e-05,
         7.9871e-05, -5.8322e-05, -4.6714e-05,  4.9401e-05, -3.6000e-05,
         1.1273e-05,  4.8837e-05, -3.5882e-05,  9.3061e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7636e-04, -8.2521e-02, -2.6361e-05, -2.3178e-05, -5.3208e-05,
         9.0323e-05, -5.7848e-05, -1.6627e-05,  1.0527e-04, -4.2261e-05,
         6.5445e-05, -4.7644e-05, -7.4922e-05,  6.3669e-05, -2.1632e-05,
         6.4431e-05,  4.4267e-05,  6.5012e-05,  3.0658e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0569e-05, -6.4174e-02, -1.3561e-05, -5.2012e-05, -3.5933e-06,
         3.1189e-06, -4.0921e-07, -3.0986e-05,  3.7315e-06, -4.2960e-06,
        -1.2846e-05,  4.8072e-07, -4.2956e-06,  6.2954e-05,  1.3129e-05,
        -3.8225e-05, -3.0586e-05, -1.0540e-05,  4.4312e-06, -4.8775e-05,
         2.4419e-05, -4.3849e-05, -3.3078e-05,  2.1485e-05, -3.0049e-06,
        -1.8252e-05,  6.9758e-05,  1.6696e-06,  4.3063e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4878e-04,  2.0268e-02,  6.5668e-05, -1.1890e-05,  6.1965e-06,
        -1.4047e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7287e-05, -1.0424e-01,  4.3641e-05, -2.6729e-06,  4.1663e-05,
         2.4450e-05, -3.1654e-05, -3.6093e-05, -5.2941e-05,  1.8735e-06,
        -6.1604e-05,  8.3568e-06,  4.1226e-05,  4.8867e-06,  2.8509e-05,
         6.3966e-05,  6.2884e-05,  5.4610e-06,  7.9645e-05, -3.6346e-05,
        -1.0852e-05, -2.2339e-05, -4.4319e-05,  1.3288e-05,  4.3738e-05,
         3.5355e-05,  3.7146e-05,  2.6161e-05, -2.4215e-05, -2.1409e-05,
         2.2761e-05,  7.7170e-05, -3.2016e-05,  3.4323e-06, -4.7005e-05,
         1.8442e-05,  3.1137e-05, -2.5050e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.9008e-04,  1.8659e-01,  6.5098e-05, -9.4524e-05,  2.1261e-05,
         3.1543e-05,  8.9449e-05, -9.2042e-06,  5.2067e-05,  1.5218e-04,
         1.4011e-04,  2.8450e-05,  3.3943e-05,  6.4058e-05, -1.0033e-04,
         1.3275e-04, -2.0305e-05, -7.0427e-05,  3.4233e-05,  6.0773e-05,
        -6.6287e-05, -2.6045e-05,  1.3830e-04, -8.3431e-05, -1.3427e-04,
        -1.6628e-05,  3.6855e-05, -2.3647e-05,  8.2334e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.7101e-04, -1.5579e-01, -4.4276e-05,  1.4162e-05, -7.3082e-05,
         9.8137e-06, -4.2713e-05, -1.6359e-04, -5.0457e-05,  4.3881e-05,
        -1.1103e-04,  1.5703e-05, -7.9144e-05,  2.8178e-05, -5.6392e-05,
        -3.0282e-05,  1.5411e-04,  7.1249e-05,  1.8799e-07, -1.0507e-04,
        -8.3929e-05, -8.1440e-05, -6.1205e-05, -1.0658e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-3.5022e-04, -1.0474e-01,  1.5119e-05,  2.5395e-05, -6.4073e-06,
         3.3616e-05,  1.6189e-05, -2.6199e-05, -4.7141e-05,  3.3848e-06,
        -1.3334e-05,  2.0237e-05,  9.8527e-06,  1.1041e-05, -1.0545e-06,
        -1.1656e-05,  2.1485e-05, -4.3117e-05, -6.4828e-07, -1.8178e-05,
         7.6820e-05, -5.1550e-05, -2.5404e-05, -5.1391e-06,  2.5939e-05,
         4.9958e-05, -1.9328e-06, -2.8399e-07,  2.1139e-05,  1.3929e-05,
        -1.2453e-05, -3.7171e-05,  2.0601e-05, -6.4642e-06, -5.0914e-06,
         2.6812e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0217e-04, -1.0908e-01,  6.6433e-05,  7.5740e-05, -2.7680e-05,
         3.9346e-05, -2.0192e-05,  3.0143e-05,  4.1409e-05, -5.8045e-06,
        -3.2953e-05, -1.5493e-05,  4.0194e-06,  4.0014e-06,  8.7756e-05,
        -1.8142e-05, -1.5412e-05, -6.6112e-05,  4.6544e-06, -3.9168e-06,
         4.5947e-05, -6.5423e-06, -2.6366e-06,  4.7523e-05, -7.0779e-07,
        -1.7291e-05,  8.6020e-06, -5.7775e-05, -4.6505e-05, -6.5945e-05,
         4.6363e-05, -5.4478e-06, -2.9035e-05,  4.0752e-06,  3.7406e-05,
        -7.7246e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1899e-04,  7.7036e-02, -1.8250e-04, -3.3670e-05,  2.0138e-05,
        -9.6818e-05,  5.1152e-05, -7.3174e-05, -7.8429e-05, -2.3778e-05,
         1.5316e-05, -1.0110e-04,  1.9671e-05,  2.8912e-05,  9.4003e-05,
         8.1485e-06, -8.9767e-05, -6.4859e-05, -4.0716e-06, -5.7932e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4348e-04,  5.8703e-02, -2.0161e-06,  1.8186e-05, -4.0001e-05,
        -1.5090e-05,  4.0505e-05,  2.2638e-05, -2.5427e-05,  4.9921e-05,
        -2.4666e-05, -7.1092e-06, -1.1477e-05, -3.3714e-05, -2.0537e-05,
        -2.0809e-05, -1.5129e-05,  1.8086e-05, -4.1648e-06,  1.4202e-06,
         1.5666e-05, -9.0964e-05, -6.5074e-06, -4.9327e-05, -3.9320e-06,
        -5.1701e-06,  1.2678e-05,  1.8946e-05, -2.7839e-05,  2.9303e-05,
        -3.0381e-05,  2.5175e-05,  1.8189e-05, -1.7718e-05, -4.0818e-05,
         1.6899e-05,  9.0493e-06, -4.1473e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.6093e-05,  1.3673e-01,  2.1990e-05,  2.2336e-05, -2.1006e-05,
         2.6785e-05,  8.2959e-06,  9.9784e-05,  4.0044e-05, -4.3616e-05,
         1.7512e-05, -3.1927e-05,  1.2761e-05,  7.7209e-05, -1.5364e-05,
         8.2985e-05,  2.2254e-06, -7.8473e-06, -2.3975e-05,  8.4897e-05,
        -3.1449e-05, -7.2088e-06, -9.3560e-05, -1.5601e-05, -5.8685e-06,
        -3.6399e-06,  4.3649e-05,  2.7818e-05,  5.1161e-05, -3.0266e-05,
        -5.1585e-05, -5.6369e-06,  7.5646e-05, -6.1540e-05, -8.2254e-06,
        -1.6392e-05, -2.3719e-05,  7.6193e-06, -4.5048e-05,  2.0219e-05,
         3.4899e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4009e-05, -1.4866e-01,  1.3553e-05,  4.5566e-05, -3.8965e-05,
        -3.7376e-05, -4.5945e-05,  5.7794e-05,  7.6574e-05,  1.5153e-04,
         4.4733e-05, -1.0095e-04,  2.5932e-05,  4.4674e-05,  4.1238e-05,
         2.9030e-05,  1.6956e-05,  2.9837e-05, -2.2208e-05, -3.8044e-05,
        -3.9520e-05,  6.0493e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1219e-05, -9.4430e-02,  6.7322e-05,  1.3560e-05, -5.0337e-05,
         7.9106e-05,  2.8774e-05,  1.1590e-04,  8.1652e-05, -2.8106e-06,
         2.2430e-05,  7.5266e-05, -1.2520e-05,  3.6178e-05, -6.1439e-06,
        -3.3061e-06,  3.9087e-05,  1.3431e-06,  9.5450e-05,  7.6941e-05,
         5.5099e-05, -4.7335e-05,  6.1446e-06,  3.7212e-05,  1.9406e-05,
         5.6293e-05,  3.2948e-05,  2.4751e-05,  7.0148e-05,  1.0547e-04,
         3.7613e-06,  4.8487e-05,  5.6315e-05,  4.3276e-05,  6.8666e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.2625e-04, -1.2794e-01, -4.5750e-05,  9.2292e-05,  1.3038e-04,
         1.2986e-04,  3.1725e-05,  1.3121e-04,  1.2671e-04,  4.8762e-05,
         1.1320e-05,  8.9995e-05,  7.1703e-05,  7.2369e-05, -1.2124e-05,
         1.3054e-04,  8.6225e-05, -5.7417e-05,  6.3517e-05,  3.4352e-05,
         8.0852e-05,  3.2625e-06,  7.1523e-05,  6.0153e-05,  8.7740e-05,
         1.2433e-05,  3.5373e-05,  4.3036e-05, -7.4583e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8042e-05,  1.0290e-01,  4.2069e-05, -6.0829e-05, -6.4362e-05,
         1.4231e-05, -3.9969e-06,  1.6516e-05,  7.6635e-05,  1.1236e-06,
         1.1854e-04,  1.8250e-06,  2.6238e-06,  1.2184e-06, -4.0275e-05,
        -3.7927e-05,  1.4579e-05, -3.5200e-05,  1.2616e-04, -4.1058e-06,
         4.3084e-05, -7.7679e-05, -5.9028e-05, -4.0937e-05, -6.5885e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8247e-04,  1.1165e-01, -2.0102e-04,  3.7021e-05, -1.4468e-04,
        -8.9037e-05,  8.0851e-05, -6.7182e-05, -7.1644e-05, -1.2131e-04,
        -9.8035e-05,  4.5640e-05, -6.4045e-05,  3.9029e-05,  1.6068e-04,
        -1.1895e-04,  7.1900e-06, -1.7358e-04,  1.4464e-04, -7.9619e-05,
         8.7391e-05, -1.5396e-04, -6.8406e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8187e-04, -5.8483e-02, -1.8050e-05, -8.9946e-06, -1.7442e-06,
         1.4587e-05, -2.7059e-05, -1.5068e-05, -6.6518e-06, -1.6150e-07,
        -3.1242e-05, -1.7267e-05, -2.0442e-05, -9.8796e-06, -7.1080e-07,
         6.6463e-06, -3.3771e-06, -4.5687e-05, -5.1009e-06,  8.5178e-06,
        -3.0460e-05,  1.7236e-05, -6.4960e-06, -1.0509e-05,  7.1660e-06,
         2.6044e-05, -9.7006e-06,  1.9213e-05,  2.4986e-05,  3.9071e-06,
        -4.4108e-05,  1.2094e-05,  9.8102e-06, -3.2404e-05,  2.2845e-05,
         1.6913e-05, -1.7573e-05, -1.8728e-05, -1.5919e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9733e-05,  3.7789e-02,  4.6498e-05,  5.0419e-05,  1.9568e-05,
         8.8451e-06,  4.1732e-05, -1.8559e-05,  9.5077e-05,  3.0547e-05,
        -3.1088e-05,  3.7682e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 7.2704e-05, -1.3291e-01, -3.6775e-05,  6.9447e-05,  7.6359e-05,
        -1.2340e-04,  1.9469e-05,  4.6697e-05,  3.4672e-05, -3.6962e-05,
         2.2633e-05, -1.9814e-05, -2.0300e-05, -5.5643e-06,  5.4531e-05,
         1.2330e-04,  7.9793e-07,  9.8758e-06,  1.0883e-04, -7.9158e-05,
         7.3975e-06,  5.5373e-05,  3.7717e-05, -2.0836e-05, -1.2601e-05,
        -7.8933e-06, -7.2638e-07,  3.5556e-05,  3.6780e-05,  3.8471e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6835e-04,  1.4129e-01, -1.3466e-05, -7.4730e-05,  3.0450e-06,
        -7.0511e-05, -6.4605e-06, -5.5869e-05, -4.7655e-05, -2.9930e-05,
         3.5572e-05, -9.3029e-05, -3.9398e-05,  7.8149e-07, -1.1414e-05,
        -6.0777e-05,  1.4659e-05,  4.5130e-06, -1.5022e-05, -1.5529e-05,
        -7.4929e-05, -4.4045e-05, -3.9255e-05, -8.2520e-05,  4.6366e-05,
        -1.2991e-05,  4.3147e-05,  3.9386e-05, -9.7549e-06,  4.8785e-05,
         1.1770e-05, -1.2024e-04,  7.5283e-05, -1.2873e-06, -1.0251e-04,
        -3.1499e-05, -4.0010e-05, -7.4183e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.0896e-04, -1.3453e-01, -8.9033e-05,  3.6517e-05,  1.1669e-04,
         2.7023e-05,  4.1212e-05,  7.3773e-05, -1.7739e-06, -7.1110e-05,
         1.5946e-05,  2.0010e-05,  5.2140e-05, -1.0597e-05, -7.7208e-05,
         7.9943e-05, -5.2068e-06,  1.1227e-06, -7.1547e-05, -2.6162e-05,
         5.6028e-05,  4.2442e-05,  2.9143e-06, -4.6082e-05, -5.5346e-06,
        -4.8615e-06, -1.5311e-05,  4.5648e-06,  6.1639e-05,  4.2032e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9728e-05, -6.1381e-02, -1.0535e-05, -3.5864e-05, -1.7423e-05,
         4.0299e-05, -3.3705e-05, -1.1667e-04,  3.6969e-05, -3.5805e-05,
        -3.9762e-05,  4.1965e-06, -1.2638e-05,  2.0775e-05, -4.4893e-05,
         1.2569e-05, -6.7179e-05,  1.6996e-05,  6.9371e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.1893e-05, -1.4261e-01, -4.1224e-05,  4.3109e-05, -5.2102e-05,
         2.2670e-05, -3.7095e-05, -1.0826e-04, -1.4073e-04, -2.8340e-06,
        -7.4703e-05, -1.5213e-06, -1.9698e-04,  1.6440e-05, -2.7946e-05,
        -2.5938e-05, -9.2403e-05, -7.0621e-05, -1.3075e-04, -1.1904e-04,
        -5.6322e-05, -1.0681e-04,  5.0461e-06,  6.0467e-05,  6.0709e-05,
        -1.8295e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0760e-04, -1.6722e-01,  1.1069e-04,  2.3394e-04,  8.4037e-05,
         2.9442e-04, -7.6060e-05,  3.8577e-05,  9.7331e-05, -4.2380e-05,
         9.5072e-05,  1.4136e-05,  7.2239e-06,  1.0128e-04,  9.7104e-05,
         7.5770e-05,  2.1981e-04, -6.2863e-06,  2.3440e-04,  1.3243e-04,
         3.5509e-04,  1.1016e-04,  6.8299e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1998e-04, -1.8927e-01,  1.9026e-04, -6.5697e-05, -1.3253e-04,
         3.1572e-04, -2.1277e-04, -7.0322e-05,  1.6979e-05, -1.7242e-05,
         1.5775e-05, -1.5271e-04,  1.9032e-05, -2.9750e-05,  1.9713e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5551e-05,  2.2302e-01, -2.7979e-05,  4.7597e-05, -1.3611e-04,
        -4.9647e-05,  5.8565e-05, -1.3518e-05, -1.6154e-04,  2.2003e-04,
        -3.1533e-04,  1.6555e-04, -1.9686e-04,  5.7489e-05,  3.4823e-05,
         1.3971e-05,  1.3651e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.4628e-04, -2.1033e-01,  1.5215e-04, -2.1558e-05,  3.0321e-05,
         8.1977e-05, -6.8858e-05, -6.7567e-06,  1.4078e-04,  1.8165e-04,
         1.5841e-04,  1.9533e-04,  7.1562e-05,  1.2584e-04, -2.3586e-05,
        -1.2009e-04, -4.4329e-05,  9.6436e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5064e-04, -9.9629e-02,  3.0923e-06, -3.3277e-05,  5.1589e-05,
         2.0007e-05, -6.4062e-05,  6.6075e-05,  3.7448e-05,  1.0166e-04,
        -4.0150e-05,  2.9073e-05, -4.8303e-06,  6.9300e-05,  3.3166e-05,
         1.3327e-05,  3.5715e-06,  5.9472e-05,  1.6098e-05,  4.3132e-05,
        -8.5455e-05,  3.1457e-07,  3.4850e-06,  1.3462e-05,  1.4155e-05,
        -1.5359e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.4348e-04, -9.0935e-02,  1.5886e-05,  6.9183e-05,  2.7503e-05,
        -2.7179e-05,  3.9878e-05,  2.4611e-06,  8.6783e-05,  1.2328e-05,
         1.0393e-04, -1.4893e-06,  3.3702e-05, -9.6436e-05,  9.9124e-05,
         4.0939e-05,  4.5413e-05, -4.2983e-05, -5.1166e-05,  2.8433e-05,
         4.8067e-05,  3.3781e-06, -4.1923e-05,  3.3653e-05,  5.5476e-05,
         4.5514e-05,  1.1328e-05,  7.4475e-05, -8.0830e-05,  1.3070e-06,
         2.9454e-05,  6.9951e-05, -3.6232e-05,  4.7297e-05,  2.4962e-05,
        -1.4740e-05, -1.3550e-05,  2.0584e-05, -2.3892e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7884e-04, -7.0555e-02, -1.8128e-05, -1.6835e-05, -2.7253e-05,
        -2.5996e-05, -2.4101e-05,  2.8056e-05,  8.1618e-07, -1.4177e-05,
        -1.9266e-05,  2.9001e-07, -2.1852e-05, -3.3327e-05,  6.9350e-05,
         1.6051e-05, -3.6008e-06, -4.8255e-05, -4.1116e-05, -1.4425e-05,
         6.2807e-06,  2.2168e-05, -5.7469e-06, -3.4124e-05,  3.8517e-06,
        -1.1745e-05,  1.4633e-05,  4.2112e-05, -3.5026e-06, -1.2340e-05,
         1.0198e-05, -2.1468e-06,  3.2104e-05,  1.8428e-05, -1.6113e-05,
         2.1240e-05, -3.6716e-06, -1.3639e-06,  7.9164e-06, -3.7257e-05,
         1.9720e-05,  1.9361e-05,  6.5025e-08, -1.9195e-05,  2.0730e-05,
        -1.0047e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 3.4681e-04,  1.1303e-01, -5.5889e-06,  5.4426e-05,  1.0606e-04,
        -9.5769e-06, -7.4273e-05, -2.4751e-05,  3.7875e-05,  2.2631e-05,
         3.3446e-05,  1.1459e-05,  2.9155e-05, -6.9192e-05,  6.6320e-05,
        -6.3657e-05, -6.1625e-07, -8.1813e-06,  4.6747e-06, -6.1365e-05,
         3.4460e-05, -6.6863e-05, -1.5235e-06,  3.4629e-05,  2.6604e-06,
        -2.9962e-06,  3.4017e-05,  4.4374e-07, -3.3723e-05,  1.7072e-05,
         5.8931e-05,  1.2562e-05,  2.5085e-05, -6.3588e-05,  6.2652e-05,
        -6.6637e-05,  1.5431e-05, -3.7031e-05, -4.7337e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6916e-05, -1.2278e-01,  5.3457e-05,  2.9187e-06,  2.6621e-05,
         1.1267e-04,  2.9635e-05, -5.9938e-06, -3.3706e-05,  1.0406e-04,
         1.7031e-05,  6.7443e-05,  6.5925e-05,  1.4492e-04,  6.3104e-05,
        -2.0693e-05,  1.0672e-04,  5.7000e-05,  9.2017e-05, -2.5033e-05,
         4.3226e-05,  3.3222e-05,  5.2316e-05,  4.3917e-05,  2.7048e-05,
         6.6177e-05, -2.8886e-05, -7.0708e-06,  9.7273e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5645e-05,  1.3413e-01,  6.8596e-05, -1.0515e-04,  4.9705e-05,
        -7.5221e-06, -5.3750e-05, -1.8028e-05,  2.5444e-04, -1.9919e-05,
         1.8589e-04, -8.7279e-05, -1.1699e-04, -9.3387e-05, -1.2763e-04,
        -3.0631e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.6969e-04, -1.5183e-01, -1.0334e-04, -4.8790e-06,  7.4845e-05,
         5.2284e-05,  1.3037e-04, -3.8891e-05,  5.4458e-05,  2.4925e-05,
        -4.6301e-05, -1.1262e-04, -6.0196e-05, -2.0488e-05,  4.3050e-05,
        -1.9561e-05,  3.1307e-05,  7.5437e-05,  7.4565e-06,  2.5166e-05,
        -6.0384e-05, -4.9736e-05, -3.5605e-06,  6.6781e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0996e-04,  6.2916e-02,  6.9269e-05, -1.1255e-05,  5.0125e-05,
        -4.8911e-05, -1.6083e-05, -1.3153e-04,  3.2959e-05,  2.6691e-06,
         5.7044e-05,  3.9427e-05,  3.9828e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.1631e-04, -8.7104e-02, -5.5508e-07, -3.8364e-05,  3.3302e-05,
        -1.8642e-05, -4.1522e-06, -9.7648e-06,  3.2318e-05,  1.4285e-05,
         3.2428e-06, -1.4404e-05,  2.3769e-05, -4.0942e-05,  2.4309e-05,
        -1.3664e-06, -3.2475e-05,  7.5205e-05,  4.1676e-05,  1.6029e-04,
         1.0191e-04,  4.3361e-05, -6.0359e-05,  1.7637e-05, -9.8284e-06,
        -2.1923e-05, -5.0274e-05, -7.7547e-06,  2.4638e-05,  3.7637e-05,
        -2.6124e-05,  1.4607e-05, -8.1174e-05,  8.4672e-07, -2.0993e-05,
        -4.1415e-06,  1.7743e-06,  5.6802e-06,  1.9396e-05,  2.4169e-05,
        -2.1837e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1064e-05, -9.7202e-02,  1.7721e-05,  1.9254e-05,  1.2370e-05,
        -7.9773e-06, -5.1806e-05, -3.2528e-06,  7.8724e-06, -1.0751e-05,
        -5.8584e-05, -2.9217e-05, -1.7303e-05, -3.1246e-05, -5.2882e-06,
        -1.8208e-05,  1.2302e-05,  5.7039e-06, -4.0036e-05,  3.5368e-05,
         5.0415e-05, -5.3354e-06, -1.3730e-05, -1.1501e-05, -1.8093e-06,
         4.4373e-05,  8.6047e-06, -2.2735e-05, -2.1732e-05, -2.2318e-05,
        -1.9988e-07, -1.5110e-05, -2.8419e-06, -3.8017e-06,  1.9231e-06,
         4.1573e-06,  1.9843e-05, -3.7954e-05,  1.3658e-05, -4.5391e-05,
         1.5364e-05,  1.1109e-05,  4.1292e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2417e-04, -1.2522e-01,  4.3926e-05,  7.0384e-05,  2.6064e-04,
        -4.7736e-05, -1.5487e-06, -4.1988e-06, -8.4657e-05, -3.2728e-06,
        -4.1081e-05,  6.0887e-05, -5.8749e-06, -4.0756e-05, -5.6634e-05,
         3.3457e-05, -3.0200e-06,  4.1235e-05,  3.1730e-05, -4.8891e-05,
        -8.4573e-05,  2.0149e-05,  2.3331e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4474e-04,  2.0848e-02,  5.9757e-05, -5.2173e-05,  6.5478e-05,
         1.9215e-04, -2.7654e-05,  1.0801e-04,  5.7444e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2368e-04,  8.2738e-02, -2.4948e-05, -5.1553e-06,  2.0735e-06,
        -1.0178e-05,  8.3844e-06,  4.8170e-06,  1.7100e-05, -2.1011e-06,
         2.4763e-05, -1.2306e-05, -2.6805e-05,  2.9624e-05, -1.4347e-05,
         2.1409e-05,  2.9960e-05, -1.3744e-07, -7.3937e-05,  3.5592e-05,
        -9.1901e-05, -4.4027e-05, -5.1607e-05, -1.4705e-05, -9.9702e-07,
        -3.0144e-05, -5.3094e-05,  3.4632e-06, -5.7326e-05, -5.1577e-05,
         6.8517e-05, -5.1461e-05,  3.1793e-05, -5.5052e-05, -5.4670e-05,
        -5.2860e-06, -2.9566e-05, -4.7524e-06, -8.1692e-06, -4.1470e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.7442e-06,  1.5064e-01, -4.8755e-05, -1.4971e-04, -1.8805e-04,
         2.0171e-04, -9.1356e-05,  3.6861e-05,  3.1158e-04,  1.6556e-04,
         1.0742e-04,  8.6776e-05,  8.3848e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4898e-04, -1.4110e-01,  2.0229e-04, -8.1438e-05,  3.3208e-04,
         9.2376e-05, -2.4502e-04, -2.4871e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #350: [tensor([-6.1868e-05,  5.0517e-02, -3.2592e-05,  1.5213e-04,  2.0824e-05,
         5.4796e-05,  9.8021e-05,  4.2893e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.6203e-04, -1.2744e-01,  5.1765e-05,  7.9261e-06,  4.9909e-05,
        -1.2253e-05, -2.0090e-05, -3.3825e-06, -3.7182e-05,  1.1016e-05,
         1.2724e-05,  1.6809e-05, -6.5017e-05,  3.1215e-05,  6.2984e-06,
         4.4128e-05, -2.0474e-05, -3.4349e-05,  4.9786e-06, -8.6850e-06,
         9.8129e-06, -1.3155e-07,  2.2270e-05,  1.6978e-05, -1.1167e-05,
        -2.7120e-05,  1.5959e-05,  1.0520e-05, -3.1707e-05, -5.2840e-05,
         4.7516e-05, -7.1829e-05, -5.4773e-05,  3.6027e-05, -1.5694e-05,
        -2.2162e-05,  5.7602e-07,  5.4998e-05, -5.7210e-05, -2.5097e-05,
        -8.7385e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.5593e-05,  1.6130e-01,  6.1546e-05, -3.3692e-05, -6.7805e-05,
        -4.9324e-05, -4.2155e-05, -3.5749e-05, -7.9664e-05, -4.1059e-05,
         1.3859e-05, -2.2010e-05, -1.1009e-04,  9.7971e-07, -2.3726e-05,
         3.5293e-05, -4.5376e-05, -9.6003e-05,  1.9357e-05, -3.9593e-05,
        -9.5172e-05, -7.1807e-05, -8.6212e-05, -7.3624e-05, -1.0172e-04,
        -5.3838e-05,  5.1069e-05, -2.9425e-05,  7.5533e-05,  1.8282e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7297e-04,  1.1204e-01, -1.4398e-04, -2.3911e-04, -1.5011e-04,
        -1.0991e-04, -4.8079e-05, -1.0230e-04, -7.8150e-05,  4.5463e-06,
        -9.7916e-05,  5.5895e-05, -2.3540e-04, -1.6448e-04,  5.1950e-05,
        -9.8903e-05, -4.0412e-05,  3.4184e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.7726e-05, -1.1145e-01,  6.6825e-06,  3.8229e-05,  1.6570e-05,
        -6.4735e-06, -2.7515e-05,  2.5572e-05, -3.8511e-05,  9.9126e-06,
        -6.2421e-06, -3.5993e-05, -3.0084e-06, -2.3237e-05, -6.3859e-06,
         1.6125e-06, -8.2894e-06, -6.8873e-05, -2.2716e-05,  3.5404e-06,
        -2.7573e-05, -1.8454e-05, -4.1291e-07,  5.7789e-06,  1.9838e-05,
        -7.7875e-05,  4.6667e-06, -6.6994e-07,  5.0441e-05,  1.2396e-05,
        -1.3445e-05, -2.2448e-05,  8.7309e-05, -4.1145e-05,  1.7772e-05,
        -1.7818e-05,  3.8877e-05,  2.6505e-05,  9.0328e-05, -1.2081e-05,
         5.0398e-06,  5.2800e-05,  2.3852e-05,  4.0202e-05, -6.9904e-06,
        -4.7236e-05, -6.3390e-07,  1.4649e-05, -3.1748e-05, -3.7130e-06,
         2.0768e-06,  4.8302e-06, -5.0090e-06,  1.8523e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0878e-04,  8.0249e-02,  4.4116e-06, -5.5239e-05, -8.1025e-05,
         7.4423e-05,  2.2084e-06,  1.0468e-04,  5.7610e-07, -5.7731e-05,
         2.7842e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2624e-05, -1.4291e-01,  1.1513e-04,  2.9834e-05,  9.6198e-05,
         1.0831e-04,  6.7516e-05,  1.1599e-05,  1.3954e-04,  3.8336e-06,
         9.4225e-05, -8.9023e-05, -3.9103e-05,  1.2251e-05,  8.4026e-05,
        -2.4963e-05, -9.3326e-05,  1.7288e-06, -4.6105e-05,  6.2742e-05,
        -7.1269e-05,  1.6093e-06,  1.8252e-05,  3.4729e-05,  8.0840e-05,
         5.1055e-05,  1.1995e-05, -2.4869e-05, -1.5212e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6372e-04,  7.6524e-02,  5.6375e-05,  4.9730e-05,  2.5140e-05,
         1.1920e-04, -1.1354e-05,  5.5212e-06,  1.1299e-04, -3.3216e-05,
        -4.5485e-05,  3.3704e-05, -7.8065e-05, -2.4022e-05,  7.1546e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.7678e-05,  3.8312e-02,  4.6780e-06,  1.1443e-06, -2.6538e-05,
        -1.4667e-05, -4.0789e-05, -1.0335e-06, -5.6897e-06,  1.8838e-05,
        -2.6192e-05,  4.8517e-06, -9.3342e-06,  2.1930e-05, -5.8780e-05,
         2.7805e-05, -2.3027e-05,  8.0169e-06,  1.1954e-05, -4.9220e-06,
        -2.4466e-05,  1.9561e-06, -1.2934e-05, -1.9524e-05,  4.0324e-06,
        -1.2832e-06,  1.0451e-05, -4.2379e-06, -1.5106e-05, -1.9742e-06,
        -5.8935e-06,  9.5938e-08,  1.7180e-05, -1.2868e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.1977e-04, -9.3405e-02, -3.3924e-05, -1.6274e-05, -2.1000e-05,
        -6.2054e-05,  3.5205e-05, -7.3025e-05, -8.1307e-06, -3.5335e-05,
         4.7394e-05,  7.2497e-06, -1.1825e-05, -2.4500e-05,  6.4220e-05,
         4.5464e-05,  3.2946e-05, -1.1774e-05, -8.4190e-06,  3.1626e-05,
        -4.9962e-05, -6.8690e-06,  1.0166e-05,  1.5936e-05,  2.9453e-05,
         9.7702e-06, -1.7078e-05, -2.1479e-05, -4.3804e-05, -2.2118e-05,
        -4.2352e-06, -8.2890e-05, -7.3004e-06,  5.2258e-05, -3.1027e-05,
        -8.3814e-05,  4.3657e-05, -2.9652e-05,  2.3873e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5081e-04,  8.2913e-02, -5.0019e-05, -1.2015e-06, -5.3675e-05,
        -1.9276e-05,  1.0379e-05,  9.2222e-05, -7.3260e-06, -4.3860e-05,
        -8.3953e-06, -1.0212e-04,  1.6171e-05,  2.8125e-05, -2.3269e-05,
         1.2919e-05,  1.0025e-05,  6.5853e-05,  1.1506e-04, -2.9105e-05,
         1.5404e-05, -2.7986e-05,  2.8967e-05, -3.9714e-05,  3.0557e-05,
         1.1760e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.3060e-04, -1.0429e-01, -4.2260e-05, -7.8627e-06,  7.9164e-05,
        -2.5376e-05, -5.3879e-05,  2.4639e-05, -4.2496e-05,  2.1335e-05,
         9.8196e-05, -6.0080e-06, -5.8695e-06, -3.0843e-05, -3.9452e-05,
        -5.2222e-05, -2.0246e-05, -2.7218e-05,  8.3832e-06, -3.1843e-05,
        -9.6887e-05, -6.5692e-05, -7.8721e-05, -2.3291e-05, -3.9252e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 8.5741e-05, -1.2216e-01, -4.1646e-05, -1.8267e-04,  1.7517e-05,
        -2.8236e-04,  1.2021e-04, -2.3631e-05, -9.3437e-05, -1.3653e-04,
        -6.6179e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4796e-04, -9.4995e-02, -1.6852e-05, -2.2105e-06,  2.8836e-05,
        -2.3392e-05,  4.6004e-06,  1.1841e-05,  2.6407e-05, -7.0444e-06,
         6.1279e-05,  8.8514e-06,  2.3535e-05, -2.2273e-06, -4.7353e-05,
        -9.6192e-06, -5.0255e-05,  3.2708e-05, -2.2768e-05,  1.3424e-05,
        -1.6929e-05,  6.5373e-06,  1.7399e-05,  5.0364e-06, -1.5487e-05,
         2.2552e-07, -2.8801e-05,  5.9329e-06, -8.6339e-06, -1.1668e-05,
         2.9666e-05,  8.0665e-06, -1.5047e-05,  4.3358e-05,  1.1722e-05,
        -2.6069e-05, -1.4093e-05,  8.6541e-06,  1.5873e-05,  1.0932e-05,
         3.5291e-06,  1.4651e-05, -2.3611e-05,  2.9776e-05,  1.2416e-06,
         1.2317e-05, -1.7487e-05, -8.9604e-06, -1.3779e-05,  5.8248e-06,
         6.4822e-06,  4.6165e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.3722e-05, -1.7560e-01, -1.8487e-04,  1.6091e-05,  6.1467e-05,
        -1.9521e-05,  3.0158e-05, -1.5364e-05, -2.8463e-06,  3.3886e-05,
        -1.1559e-05, -3.8363e-05,  3.1962e-05,  1.2108e-04, -2.8161e-05,
        -4.1062e-08, -3.6938e-05,  7.7711e-05, -2.8374e-05,  2.2689e-05,
        -4.1961e-06, -5.5380e-05,  2.0518e-05,  2.2113e-05,  3.3897e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.1476e-05,  1.3326e-01, -2.1714e-04,  1.3917e-05, -1.4014e-04,
         8.2076e-05, -5.0109e-05, -7.2807e-05, -1.6249e-06, -7.7901e-05,
         5.9824e-05, -7.2408e-06, -6.1169e-05, -7.9955e-06,  3.0947e-05,
        -8.4236e-06, -6.0577e-05, -1.6829e-04, -5.2003e-05,  2.9516e-05,
         1.5606e-04, -4.9326e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0036e-04,  3.3244e-02, -2.3884e-05,  2.8494e-05, -1.1790e-06,
         2.5146e-05, -2.1059e-05,  1.7373e-06,  1.3255e-06,  7.3758e-07,
        -2.8919e-05,  1.5090e-05, -2.2594e-05, -1.8989e-06,  1.4420e-05,
        -1.2637e-05, -3.0906e-05, -1.2817e-05, -7.8157e-06,  9.3247e-06,
        -2.1664e-05,  3.2944e-06, -1.3580e-05, -2.6198e-05, -1.9735e-05,
        -9.0213e-06, -3.3514e-05, -4.8660e-05, -1.6650e-05,  1.5337e-06,
        -2.1841e-05, -1.8670e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1722e-04, -7.4394e-02, -4.2244e-06,  9.5027e-06,  8.3338e-05,
         8.9653e-05,  1.2232e-05, -1.1236e-05,  1.5439e-05,  4.3978e-05,
        -2.7094e-05,  7.6939e-06, -4.1631e-05, -4.1612e-05,  2.6521e-05,
        -3.5460e-05,  3.4139e-06, -2.1931e-05,  1.0482e-04, -1.2909e-05,
         5.2895e-05,  1.7705e-05,  1.9044e-05,  3.0757e-05,  3.7479e-05,
        -4.3912e-06,  3.3134e-05, -3.7508e-05, -5.3226e-05, -3.0730e-05,
         1.0503e-06,  4.8595e-05, -3.5997e-06,  8.4164e-06, -6.0324e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.8372e-04,  8.4642e-02, -1.9003e-05,  2.6680e-05, -1.7074e-05,
        -1.1381e-05,  1.1744e-04,  6.3760e-06, -1.9089e-05,  1.0171e-04,
         2.6426e-05, -3.5795e-06,  7.8154e-05,  2.9185e-05, -7.2198e-05,
         3.9656e-05,  5.6027e-05, -6.3820e-05, -1.6344e-05,  5.1652e-05,
        -5.2065e-06,  2.6706e-05,  4.4384e-05,  2.5802e-06,  2.4363e-05,
         8.0198e-06,  3.8491e-05, -6.6230e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8830e-04,  1.9014e-01, -1.5184e-04,  1.6455e-04,  4.4443e-05,
         1.8467e-05,  1.4422e-04, -6.1760e-05,  1.0603e-04, -2.7838e-04,
         8.9371e-05,  1.7132e-04,  3.3173e-04, -9.3670e-05, -1.1680e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3583e-04,  1.3440e-01, -9.3970e-06,  2.9975e-06, -2.0673e-05,
         4.2376e-05, -9.2159e-05, -8.0945e-05, -5.0379e-05, -5.4867e-05,
        -4.9335e-05,  8.8719e-06, -4.4155e-05, -4.7713e-05,  2.2890e-05,
         9.4667e-06, -8.1371e-05,  2.7906e-05,  3.2643e-05, -7.4927e-05,
        -1.8372e-05, -4.6227e-05, -3.6588e-05, -4.3062e-05, -1.1271e-04,
        -1.3336e-04, -9.8131e-05, -3.2016e-05, -4.3720e-05, -8.3771e-05,
        -5.2818e-05, -2.7217e-05, -7.5138e-05,  8.8900e-05, -2.6079e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9108e-04,  9.6351e-02,  8.9245e-06,  7.4176e-08, -2.2622e-05,
        -2.2140e-05, -2.4624e-05, -4.1176e-05, -6.4160e-05, -1.2690e-05,
         3.1276e-05,  6.6011e-06, -1.1399e-04, -8.9780e-05,  1.1460e-04,
        -1.4562e-04,  6.5525e-05, -6.8673e-05,  1.3823e-06, -1.4731e-04,
        -5.8738e-05,  5.7323e-07,  4.0440e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3459e-04, -1.5515e-01, -6.6110e-05, -1.4809e-05, -4.2850e-05,
         8.9134e-05, -6.1057e-06,  3.0910e-05,  1.6475e-05, -1.8115e-05,
        -1.7800e-05,  1.5458e-04, -2.8821e-05, -1.2760e-04,  1.0343e-04,
         4.8065e-05,  6.1776e-05, -4.9156e-07,  4.8108e-06, -3.1788e-05,
         1.5192e-05, -2.8269e-05, -5.4938e-05,  3.9589e-05, -5.5821e-05,
         5.1874e-05,  1.0575e-05,  9.2618e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6808e-06, -1.0452e-01,  7.8334e-05, -7.1725e-05,  5.7019e-05,
        -8.8904e-05,  3.2177e-05, -1.3547e-05, -2.0654e-05, -1.9482e-05,
         9.9518e-05, -5.4242e-05,  1.7045e-05, -3.9779e-06, -3.9995e-05,
         3.6877e-06,  1.5461e-05,  3.2585e-05,  4.9872e-05,  9.1249e-05,
        -3.2223e-05, -8.5628e-05,  2.5938e-05, -1.2750e-05, -5.4518e-05,
         1.6016e-05, -4.3184e-06, -3.4205e-05, -7.8149e-06,  5.7635e-05,
        -5.1172e-05, -1.0356e-05, -1.0780e-06,  5.0874e-05, -8.5988e-05,
         2.9980e-05,  5.4812e-05, -4.6215e-05, -8.3872e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.6024e-04,  7.5011e-02, -2.0096e-05, -1.2497e-04, -6.8712e-05,
         3.2000e-05, -3.2093e-06, -1.9295e-05, -2.4766e-05, -6.1456e-05,
        -2.0405e-05, -3.5052e-05, -7.8486e-05,  9.4038e-06,  7.3577e-05,
        -5.3653e-05,  3.0280e-05,  9.9895e-08, -4.1126e-05, -5.6572e-05,
        -5.7598e-05,  4.0389e-05,  2.5882e-05, -4.9227e-05,  3.2122e-05,
        -8.2173e-06, -2.3769e-05,  1.1569e-05, -4.1954e-07,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.2743e-05,  7.9614e-02, -5.9880e-05,  5.3345e-05,  2.2493e-05,
        -5.8221e-05,  2.9559e-05,  2.4709e-05,  1.3542e-05,  1.1408e-05,
        -4.8748e-05, -9.9406e-05,  6.1846e-05, -1.4513e-05,  3.5434e-05,
         3.4322e-05,  6.1695e-06,  2.1551e-05,  1.1887e-05,  2.0856e-05,
         6.0790e-05,  5.0048e-05, -6.6109e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1703e-05,  1.4950e-01, -3.2307e-05,  1.4246e-05,  2.0346e-05,
        -1.8794e-05,  6.3015e-05, -1.3753e-04, -3.6660e-05, -4.7060e-05,
        -7.5548e-05, -7.6315e-05, -5.7633e-05, -6.7393e-05,  3.5256e-05,
        -4.7053e-06,  7.4339e-05,  5.1310e-05,  6.2443e-06, -2.2776e-05,
        -6.5798e-05, -8.2062e-05, -2.3579e-05, -4.3674e-05, -1.1748e-05,
         6.9768e-05,  7.9598e-05,  2.1742e-05, -2.4847e-05, -3.6145e-05,
         1.5884e-06, -1.2248e-04,  1.0275e-04, -7.7966e-06, -3.0727e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8683e-06,  1.6396e-01,  2.4086e-05,  6.1921e-05, -4.3850e-05,
         6.5863e-05,  1.1680e-05,  4.9580e-05, -1.5784e-05, -1.6256e-05,
         1.2144e-04, -2.4594e-05,  1.7319e-05,  2.4302e-05, -2.0191e-05,
         6.0112e-05,  3.2538e-05,  1.9190e-05, -6.3960e-07, -4.7047e-05,
         1.0596e-04, -1.6816e-05,  3.2767e-06,  7.1052e-05,  6.2784e-05,
         2.7634e-05,  7.8838e-07,  1.9609e-05, -1.9689e-05,  1.2994e-06,
         6.5967e-05,  7.6414e-05,  4.7122e-05, -2.7601e-05,  1.1388e-04,
         2.3892e-05,  1.4659e-05,  3.0647e-05, -7.8000e-05,  9.6888e-06,
         5.4676e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3767e-04, -1.2845e-01, -2.4793e-05,  4.1552e-05, -2.0753e-05,
         9.3273e-06, -4.4826e-05, -6.3842e-05,  1.8250e-05,  1.2879e-05,
         2.3129e-05, -8.7138e-06,  4.5964e-05,  2.7174e-05, -2.1291e-05,
        -6.0671e-06,  2.3410e-05,  1.2390e-05,  6.8350e-06,  2.1463e-05,
         2.1711e-05,  2.0239e-05, -4.5957e-06, -2.4480e-05,  3.6082e-05,
        -3.9103e-06, -1.6277e-05, -4.8718e-05,  3.3593e-06, -1.4162e-05,
         1.5685e-05,  4.8808e-05,  2.0093e-05,  1.8090e-06,  5.0198e-06,
         1.3764e-05,  9.2445e-06, -1.4575e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1277e-04, -6.8234e-02,  1.1584e-05,  6.2438e-06,  1.9553e-05,
         5.3807e-05, -2.3353e-05,  2.3752e-05,  1.3352e-05,  2.4326e-05,
         1.4539e-05, -1.1846e-06, -2.0095e-05,  1.4751e-05, -4.8948e-05,
        -2.9128e-05, -2.2440e-05,  4.3856e-06, -1.9130e-05, -1.6801e-05,
        -3.4063e-05,  2.7378e-05, -1.0091e-05,  2.2843e-05, -2.4788e-05,
         7.4057e-06, -1.2183e-05,  7.0374e-06, -5.3303e-05,  6.3085e-07,
        -1.9456e-05, -9.9803e-06, -4.8373e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.9943e-05,  7.9331e-02,  3.7422e-07, -2.0344e-05, -3.5602e-05,
        -5.0107e-05, -6.5803e-06, -8.0718e-05,  3.1887e-05, -3.9981e-05,
        -3.6782e-05,  2.2701e-05, -1.3407e-05, -1.7862e-05,  4.5484e-06,
         4.7961e-05, -8.9759e-05, -4.6882e-05, -4.1003e-05, -1.1004e-05,
         1.8946e-05, -2.1314e-05,  1.0823e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.1655e-04,  2.0282e-01,  1.7041e-04, -3.0709e-05, -1.3514e-04,
        -4.6315e-05,  1.5271e-04, -1.0134e-04, -2.1403e-04,  7.7202e-05,
         1.7176e-04, -1.6949e-04, -6.3563e-05,  8.7615e-05,  3.0755e-05,
         1.4486e-05,  6.9028e-05,  1.4909e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.3561e-05, -1.2222e-01,  6.5546e-05, -9.2658e-08,  1.0659e-05,
        -2.0710e-05, -2.6254e-05, -1.0567e-04, -1.4910e-05,  3.1391e-05,
        -1.3913e-06, -1.3796e-05, -2.4491e-05,  5.3073e-05, -7.4135e-05,
        -1.7357e-05, -5.4260e-05,  1.2984e-04,  3.5484e-05,  2.6897e-05,
         1.3291e-05,  1.7857e-05,  6.8544e-05,  1.6203e-06,  5.8855e-05,
         4.8497e-05, -4.7056e-05, -6.1556e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0323e-04, -6.7025e-02,  1.3337e-05,  3.8403e-05,  4.2384e-05,
        -7.6684e-06, -5.2084e-05, -2.4108e-05, -5.7219e-05,  4.7852e-06,
        -1.2827e-04,  4.3193e-06, -2.9088e-05,  5.3481e-05, -1.1033e-05,
        -6.1844e-05,  3.7864e-05,  4.0918e-05, -4.9447e-05, -6.6970e-05,
        -2.4397e-06, -3.8101e-05,  2.5401e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4478e-04, -1.1411e-01, -2.5795e-06, -1.4966e-05, -1.2119e-06,
         3.0645e-05, -2.5599e-05, -1.6244e-05, -1.4671e-05,  5.4551e-05,
         6.2269e-06, -3.8859e-06,  4.7536e-05, -3.1163e-05,  1.4258e-05,
        -2.7710e-05, -1.5928e-06, -1.9304e-05,  2.5388e-05, -1.3767e-05,
        -6.1943e-06, -1.8823e-05, -2.4304e-05,  2.2555e-05, -1.3564e-05,
        -3.2447e-06, -2.0927e-05, -3.4376e-06, -2.2056e-05,  3.9047e-05,
        -6.9513e-06, -3.5508e-05,  4.9768e-05,  1.5592e-05, -1.1498e-05,
         2.3792e-05, -3.7795e-05, -3.3730e-06, -2.3344e-05, -5.9728e-06,
        -2.6253e-05,  6.6966e-05, -1.5749e-05, -3.1431e-05,  7.6374e-06,
        -2.6136e-06,  1.0801e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.0837e-05, -1.3402e-01, -1.5434e-04,  8.7919e-05, -1.7156e-04,
        -2.0995e-05,  1.7029e-04,  8.3338e-05,  6.1002e-05,  8.8743e-05,
         1.0310e-04, -5.4823e-05,  8.9050e-05,  7.4394e-06, -1.5286e-04,
         2.8102e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 8.8006e-05, -1.2560e-01,  1.8558e-05,  1.1472e-04,  5.8589e-05,
         1.6719e-07,  3.1119e-05,  3.2182e-07, -1.3547e-04, -9.8991e-05,
         2.6008e-05, -7.0064e-06,  2.4070e-05, -2.9392e-05, -9.9986e-06,
         4.3317e-05,  8.8730e-05,  2.8607e-05,  2.1148e-06,  4.4550e-05,
        -1.8130e-05,  1.0352e-05,  1.5902e-04, -1.4098e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2345e-05,  1.2548e-01,  3.1735e-05, -2.3924e-06, -5.0443e-05,
        -5.6713e-05, -3.4658e-06, -3.8695e-05, -9.8144e-06, -2.2514e-06,
         4.4528e-05,  1.0759e-04, -1.7229e-05,  2.4842e-05,  6.7058e-05,
        -8.6909e-06, -3.2241e-06,  3.4067e-06,  2.7930e-05, -1.0842e-05,
         2.8134e-05,  3.6069e-05,  1.9348e-05, -3.0779e-05,  4.4210e-06,
        -1.4437e-05,  7.0899e-05,  1.0649e-04,  5.6001e-05, -6.8514e-05,
        -9.3681e-06,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.3250e-05, -1.8127e-01, -7.0678e-05, -1.5330e-05,  8.3265e-05,
         1.5521e-04,  1.0596e-04,  1.9545e-05,  2.0852e-04,  1.5099e-04,
         4.8208e-05,  1.6913e-04,  4.7157e-05,  2.0677e-04,  3.7632e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.5008e-04,  1.0978e-01, -4.7874e-05, -1.7096e-04, -1.2990e-05,
        -7.7584e-05, -1.1136e-04, -1.2758e-04,  1.0843e-04, -1.4283e-04,
         4.1749e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-9.8687e-05, -6.2370e-02,  5.3846e-05, -2.0180e-05, -4.6324e-05,
        -7.0468e-05,  4.3722e-05, -5.3116e-06, -1.3170e-05, -1.0786e-05,
        -1.2243e-05,  2.2361e-05, -5.6840e-05,  7.5206e-06, -9.9755e-05,
        -6.8272e-05,  4.8185e-05,  5.3078e-05, -5.6846e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9141e-05,  5.9644e-02,  8.9973e-05,  1.1857e-05,  2.5169e-05,
         7.0558e-05,  4.9821e-05,  1.2900e-05, -1.6843e-05, -5.3174e-05,
        -7.4750e-05, -3.1713e-05, -1.2947e-05,  4.2532e-05,  8.7910e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.8462e-05, -1.7924e-01,  2.0005e-05, -1.7003e-04,  3.2445e-05,
        -3.0530e-05, -3.9372e-05, -9.3959e-06, -1.2960e-04, -4.3557e-05,
        -4.3638e-06, -1.2996e-04, -1.7424e-04,  1.1468e-04,  1.5265e-05,
         6.6968e-05,  1.8513e-04, -1.2433e-04, -6.4533e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.7440e-04,  1.1627e-01, -5.2998e-05, -5.3997e-05,  2.0230e-05,
         5.8353e-06,  1.3760e-04, -2.9163e-05, -3.8232e-05,  2.0790e-05,
        -2.5466e-05,  5.0818e-05,  7.3046e-05, -4.4438e-05,  2.7303e-05,
         1.2802e-05,  1.5219e-05,  3.1675e-05, -6.4999e-05,  5.0388e-05,
         5.9722e-05,  8.3247e-05, -3.0810e-05, -2.5043e-05, -2.9300e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2277e-04,  1.1744e-01, -8.9857e-05,  2.4020e-07,  5.7863e-05,
        -1.9480e-05,  3.6522e-06, -3.6393e-06, -5.9003e-05, -9.4441e-05,
         6.4752e-05, -1.5398e-04, -6.0801e-05,  9.7161e-05,  3.1737e-05,
         4.9905e-05,  3.0518e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0477e-04, -1.2320e-01,  8.2466e-06,  1.0754e-04, -3.1831e-05,
         2.1199e-05, -1.7606e-07,  4.4197e-05, -9.1355e-06,  2.2794e-05,
         1.8979e-05,  7.8339e-06,  4.9420e-05,  1.4335e-05,  1.1176e-05,
         6.0457e-05,  1.9511e-05,  5.5125e-05,  2.3498e-05,  5.6363e-05,
        -7.0191e-05,  2.8967e-05,  1.1823e-05,  3.7929e-05,  8.0245e-05,
        -1.5533e-06,  8.5345e-05, -6.5916e-05,  4.0473e-05,  2.6044e-05,
        -1.7234e-05,  3.4301e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0560e-04, -1.1077e-01, -4.1138e-05, -8.7070e-06, -2.1454e-05,
        -1.3850e-05, -1.6963e-05, -5.8150e-05,  7.2743e-05, -7.3507e-05,
         6.3756e-05, -1.4304e-04,  2.1774e-05, -3.6399e-06, -3.7153e-06,
         3.8884e-06,  6.0876e-06,  8.0385e-05,  6.8531e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.0453e-05,  1.1758e-01, -8.8877e-05, -9.2175e-05, -6.0053e-05,
        -4.5710e-05, -9.4675e-05,  1.3953e-04,  2.0487e-04, -2.2856e-04,
        -4.1554e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 6.3833e-05, -9.9162e-02, -4.9562e-05,  6.7311e-05, -4.7078e-06,
        -4.1224e-05, -1.2061e-05, -2.1950e-05, -1.8409e-05,  4.2520e-05,
        -2.5582e-05, -1.0143e-05, -2.9584e-05, -2.0925e-05, -1.2718e-05,
        -1.0049e-05, -5.7219e-05, -7.9163e-07, -1.5967e-05, -8.3110e-06,
        -1.5491e-05, -1.0313e-05,  6.4050e-06,  7.1272e-06, -2.3735e-05,
         2.4424e-05, -4.3838e-06, -7.1593e-07,  2.1716e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0840e-04, -1.1030e-01,  2.4280e-05,  7.4599e-06, -6.3607e-06,
        -3.5645e-05,  5.6028e-06, -3.8070e-05, -3.1714e-05,  1.2296e-06,
         3.8474e-06, -3.2336e-05, -1.0698e-05, -1.5207e-05,  1.5385e-06,
         1.3815e-05,  8.9881e-07,  3.2716e-05, -1.7800e-05, -2.5001e-05,
         2.5342e-05,  7.9791e-06,  4.0255e-05,  4.7425e-06,  9.6462e-06,
        -1.8320e-06,  4.6930e-05,  3.8737e-05,  3.0258e-05,  3.0381e-06,
        -8.4300e-05, -2.0939e-05,  1.2485e-05,  5.1983e-06, -1.6512e-05,
         3.2151e-05, -5.9741e-05,  1.0373e-06, -3.9258e-05, -1.0016e-05,
         2.4309e-05,  2.2657e-05,  1.6981e-05, -3.0399e-05, -2.5362e-05,
        -1.8199e-05, -3.0074e-05, -5.8295e-06, -2.3770e-05, -3.3469e-05,
        -1.7582e-05,  2.5955e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.9996e-05,  1.8783e-01,  1.8673e-04,  4.6885e-05,  3.2581e-05,
         5.0518e-05,  2.2474e-04, -3.2630e-05, -9.7473e-05,  2.7722e-05,
         4.6043e-05, -3.1798e-07, -5.7933e-05, -2.9221e-05, -1.0673e-05,
        -1.2930e-04, -3.5914e-04,  1.0403e-04, -1.0243e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4835e-05,  6.1739e-02, -3.1537e-05,  6.6879e-05,  4.3238e-05,
         1.1585e-05, -3.1455e-05, -2.2449e-05, -6.2426e-06,  3.9493e-05,
        -5.4953e-05,  3.4898e-06, -8.2705e-06,  3.3408e-05, -1.9201e-06,
         5.1706e-05,  4.1771e-05,  2.2189e-05,  3.2882e-05, -3.2835e-05,
        -1.6743e-05,  1.1778e-05, -4.5691e-05, -3.9147e-05, -1.7716e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9682e-04,  2.0982e-01, -1.2799e-04, -7.2126e-05, -1.5169e-04,
        -1.6506e-04, -1.8859e-04,  8.3568e-05,  2.4738e-05, -2.7572e-05,
        -2.1247e-04, -1.3660e-04, -1.7839e-04, -4.7166e-05, -1.0355e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.6323e-05,  8.5717e-02, -5.9656e-05, -1.0273e-04, -2.6460e-05,
         1.8143e-05,  3.0453e-05, -1.0588e-04,  1.3121e-05, -1.0290e-04,
         1.2891e-06, -9.8236e-06,  2.3577e-06,  5.7835e-05, -1.2289e-04,
        -4.4241e-05, -2.8689e-05,  4.6138e-05,  1.1337e-05, -7.6865e-06,
        -1.1434e-04,  5.8658e-06, -4.0384e-05,  2.4232e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5009e-04,  4.9654e-02,  4.7493e-06, -3.7181e-05, -2.4151e-05,
         5.5230e-05,  5.9361e-05, -3.0676e-05, -5.6675e-06, -2.6084e-05,
         2.5968e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0121e-04, -5.9940e-02, -1.5124e-05,  2.8025e-07,  1.3142e-05,
         1.5987e-05, -4.7577e-05,  4.1184e-05, -4.1427e-05, -2.4901e-05,
        -8.2564e-06, -3.9895e-05, -2.3635e-05,  2.6411e-05,  6.6020e-06,
         1.0906e-05, -6.3384e-05, -8.5141e-06, -9.9150e-06,  7.0605e-06,
        -4.0698e-05, -1.2595e-05, -2.0775e-05, -5.6331e-05,  9.3944e-06,
         2.4439e-05, -2.6191e-06, -1.6617e-05, -4.7409e-05, -5.6600e-05,
        -2.0076e-05, -1.7004e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7249e-04, -1.1357e-01,  1.7828e-04,  2.2576e-05,  2.1998e-05,
         5.2156e-05, -5.2374e-05, -4.1722e-05,  4.5988e-05, -5.6822e-05,
         1.3247e-04,  2.9780e-06,  1.9776e-04, -5.0808e-05, -4.0851e-05,
         4.3107e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.9929e-05,  4.1541e-02,  1.7413e-05, -4.0945e-05, -9.9047e-06,
        -6.0390e-06,  2.4934e-05,  3.8873e-05, -8.3460e-06,  3.9494e-05,
         4.1244e-05, -6.0169e-06,  1.6478e-05, -1.2358e-05,  6.1752e-06,
         3.6468e-05,  2.5467e-08, -9.3688e-06,  2.7650e-07,  2.0148e-05,
         3.0461e-05, -2.6297e-06,  3.4437e-05, -1.4636e-05,  4.1407e-05,
         6.0640e-05,  1.8108e-05,  1.2606e-06, -3.0747e-05,  5.4641e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.9139e-05, -1.5636e-01, -5.0744e-05, -6.8053e-05, -1.1598e-04,
        -8.7246e-08, -6.7829e-05,  1.6005e-04,  1.1492e-04, -8.6219e-05,
        -1.3107e-04, -7.7606e-05, -1.4220e-04, -2.5064e-04, -1.6131e-05,
        -2.0707e-05, -1.5613e-04, -1.8593e-04,  1.7055e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1641e-04, -1.8518e-01,  1.0187e-04, -9.0746e-05,  3.5105e-05,
        -1.6644e-04,  4.6467e-05, -2.5683e-05,  5.8051e-05,  1.2180e-05,
         1.3540e-04,  7.9137e-05, -3.3504e-05, -2.8460e-05,  7.9493e-05,
         8.6148e-05,  1.4030e-04, -4.3576e-05, -5.1644e-05,  9.7320e-05,
         1.2280e-04, -9.3609e-05, -3.1147e-06, -9.0858e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 1.7099e-04, -6.0020e-02,  3.7205e-05,  3.7768e-06, -8.1664e-06,
        -3.3399e-05,  3.6937e-05,  2.7176e-05, -8.8880e-06,  4.2363e-06,
         2.1190e-06,  1.2952e-05,  2.6810e-06, -1.7876e-06,  8.5651e-06,
        -5.3159e-06, -1.0137e-05, -2.1353e-05,  5.3694e-06, -6.9528e-06,
        -2.1478e-05, -2.1476e-05,  2.5333e-05, -1.9776e-05, -1.0577e-05,
        -2.7247e-05,  2.2928e-06, -7.7608e-06, -5.6487e-05, -3.0551e-05,
        -2.4120e-05, -1.5804e-05,  3.7796e-06, -1.1564e-05, -5.0032e-06,
         1.4811e-05,  3.7237e-05,  1.7402e-05, -3.8339e-05,  6.7000e-06,
         1.1947e-05,  1.0560e-05,  3.5440e-06,  2.0626e-07,  8.9177e-06,
        -4.3997e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1430e-04,  1.5583e-01, -7.9608e-05, -5.6052e-05, -6.5321e-05,
         1.5412e-05,  4.2645e-05, -2.5729e-05, -2.1355e-05, -1.6748e-04,
         1.0064e-04,  7.7689e-05, -2.6612e-04, -1.1594e-04, -9.5369e-05,
        -1.4978e-05,  7.7269e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2287e-04, -1.1342e-01,  2.7929e-05,  1.3006e-05, -8.3123e-05,
         4.8967e-05,  7.1403e-06, -4.6365e-05,  2.4018e-05,  1.6925e-04,
        -4.3046e-05, -4.2705e-05, -3.3079e-05, -9.7288e-07,  3.1805e-05,
         4.0298e-05, -2.5801e-05,  6.4340e-05,  7.3735e-06,  2.6016e-05,
        -5.1549e-05,  4.6248e-05,  2.5587e-05, -2.6441e-05, -6.0995e-06,
         3.7721e-05,  3.0238e-05, -5.7473e-06,  1.1161e-05, -2.1193e-06,
        -2.4440e-06,  2.5918e-05,  3.4781e-05,  6.5854e-06, -3.6565e-06,
        -5.6718e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0008e-04, -1.5109e-01,  8.4856e-05, -1.5630e-04, -8.0149e-07,
        -1.3170e-04, -6.7016e-05, -3.1249e-05,  2.6585e-05, -3.0480e-05,
        -6.0191e-05, -7.8255e-05, -1.3697e-04, -6.2683e-05,  1.3711e-05,
         6.7092e-05, -2.5122e-05, -2.0064e-05,  3.6866e-05, -4.6048e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.5753e-04,  8.4800e-02, -4.1223e-05,  2.6395e-05,  2.4793e-05,
         2.5427e-05, -2.3811e-05, -4.0936e-05, -3.1753e-05, -2.3807e-07,
        -4.3779e-05,  2.4599e-05,  5.5818e-05,  8.3915e-06, -4.3085e-05,
         3.7526e-05, -5.9148e-06, -1.7163e-05,  1.7296e-05, -1.9254e-05,
         5.8957e-06,  6.7595e-06,  3.1696e-05,  3.5709e-05,  6.2198e-05,
         2.1818e-05,  9.8465e-06,  3.0904e-06, -1.3369e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.2744e-04, -1.2522e-01,  3.4944e-06,  2.2992e-05,  2.8736e-05,
        -1.4275e-05, -6.5017e-05,  4.1258e-06,  3.6243e-06,  1.5953e-07,
         2.2115e-05, -3.0566e-05,  3.6994e-05, -1.1740e-05,  3.9778e-06,
        -5.2012e-05, -2.9603e-06,  6.3693e-05, -6.6234e-05, -1.7332e-05,
         2.5964e-05, -2.1423e-05, -3.5798e-05, -3.0465e-05,  2.4149e-05,
        -2.9088e-05, -3.6947e-05, -2.1605e-05,  4.3835e-05,  1.4864e-05,
        -1.9050e-05, -1.7384e-05, -3.7755e-05,  3.8124e-05,  1.7101e-05,
        -8.4218e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.3829e-06,  2.0844e-01,  4.6436e-05, -1.4236e-04,  8.1651e-05,
         3.6584e-05, -8.0272e-05, -5.9902e-07,  5.0666e-05, -1.5008e-04,
         1.1765e-05,  5.2289e-06,  1.6668e-05,  5.1958e-07,  1.2797e-04,
        -6.9740e-05,  1.2519e-04, -1.3114e-05, -1.5355e-04,  8.4644e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.8259e-04,  2.2363e-01,  5.1582e-05,  8.6166e-05, -6.8553e-06,
        -5.7806e-05, -6.5441e-05, -7.6849e-06, -1.5691e-05,  2.8024e-05,
         3.0932e-05,  1.6521e-05,  8.9325e-06, -2.5722e-05, -5.8517e-05,
         2.0949e-05, -1.7576e-05,  8.0143e-05,  8.5604e-06,  3.4506e-06,
        -4.0295e-05, -7.5388e-06,  2.1012e-05,  5.4583e-05,  6.3308e-05,
        -6.3271e-06, -5.1970e-05,  8.1613e-05, -5.9154e-07, -1.6483e-05,
         1.1412e-05,  6.2316e-06, -5.8999e-05,  3.4198e-06,  6.5001e-05,
        -8.0526e-05, -2.6626e-05,  9.1441e-06,  3.2313e-05,  9.7768e-05,
         1.3433e-05,  4.8668e-05, -1.1927e-05,  2.1989e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0904e-04, -5.6489e-02,  2.7805e-06, -1.1109e-05,  2.7270e-06,
        -2.6860e-05,  1.4122e-05, -2.4207e-05,  6.8354e-06, -1.7081e-05,
        -8.8488e-06, -8.9079e-06, -4.7708e-06, -9.2759e-07, -2.0552e-05,
        -3.8186e-05, -3.3153e-06, -8.1607e-06, -1.2902e-05, -2.7920e-05,
         1.6709e-05, -6.9399e-06, -4.5983e-05,  1.4948e-06, -1.9466e-05,
        -2.0854e-05, -2.4744e-05, -1.8891e-05, -4.3560e-05, -3.1012e-06,
        -3.4230e-05, -1.5039e-05, -2.1122e-05, -8.0542e-06, -3.3978e-06,
        -2.4627e-05,  7.6975e-06, -1.5495e-06,  1.7636e-05, -1.4860e-05,
         7.3602e-07,  7.6729e-07, -3.6858e-06, -7.6868e-07,  3.2359e-05,
         3.2775e-07,  2.0707e-05, -1.3363e-05,  6.4214e-06, -1.4348e-05,
         1.7805e-05, -3.8827e-05,  6.3922e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.8143e-06, -1.2687e-01,  4.7070e-05,  3.4666e-06,  8.4067e-05,
        -1.4584e-05,  8.2247e-06,  4.4182e-05,  4.2733e-05, -1.0159e-05,
        -3.7267e-05,  2.3813e-05,  2.6121e-05, -1.8946e-05,  3.0156e-05,
        -6.1169e-05,  4.7703e-05, -1.2510e-05,  4.0767e-05,  9.4816e-06,
         7.1394e-05,  1.7840e-05, -5.4420e-05,  7.4953e-05,  3.8685e-05,
         2.9616e-06,  5.4018e-05,  1.7414e-05, -2.6122e-05,  6.4758e-05,
        -2.9606e-06, -9.1378e-06,  4.0457e-05,  2.2799e-05, -3.3477e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.0103e-04,  7.7437e-02,  1.0953e-05,  1.6783e-05, -5.5845e-06,
         6.8852e-05,  2.6942e-05,  4.9119e-05,  1.4715e-05,  1.1012e-04,
        -1.6707e-05, -5.4824e-05, -4.1643e-05,  1.4074e-05, -1.2201e-05,
         4.2621e-05,  5.8407e-05,  4.4155e-05,  3.7563e-05, -2.0213e-05,
        -1.4457e-05, -5.3494e-05,  4.6932e-06, -9.0544e-06,  2.2088e-05,
         6.1487e-05, -3.9507e-05, -1.8738e-05, -3.3017e-06,  5.7923e-05,
         7.0750e-05, -3.1452e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7869e-05, -1.0019e-01, -9.7734e-06,  9.1886e-06,  1.9164e-05,
        -1.5789e-05,  7.9242e-06,  1.3680e-05,  3.6541e-05,  1.7166e-05,
        -1.9501e-05, -4.8786e-05, -2.7471e-05,  9.2015e-06, -5.9069e-05,
         3.9106e-05, -3.0110e-05,  5.0579e-05,  2.0830e-05, -1.4719e-06,
        -2.7813e-05, -4.0610e-05, -1.6677e-05,  5.0332e-06,  3.9468e-05,
        -7.9824e-06, -2.9071e-05, -2.0147e-05,  4.1785e-05, -1.2665e-05,
        -3.5398e-05,  1.5201e-05, -1.6632e-05,  3.4281e-05,  1.6800e-05,
        -2.2771e-05,  7.3037e-06,  4.0792e-05,  2.6213e-05,  9.1169e-07,
        -4.2776e-06, -5.3413e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 2.7005e-04, -2.3525e-01, -1.1050e-07, -7.6764e-05, -6.7883e-05,
        -1.2951e-04,  1.2668e-05, -6.3335e-05,  7.8836e-05, -1.9975e-05,
         6.8194e-07,  9.5264e-05,  5.8734e-05, -2.1089e-04, -9.9039e-05,
        -5.2505e-05, -7.6029e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.1789e-05, -9.6533e-02, -3.9376e-05, -2.3793e-05,  3.3009e-05,
        -1.5153e-05, -1.5537e-05, -8.9116e-06,  5.3809e-05, -2.4511e-05,
         8.1778e-06, -2.8367e-05, -8.0949e-05,  6.1661e-06,  5.8499e-07,
        -4.4272e-05, -6.7314e-05, -1.7375e-05,  9.7081e-06,  1.0102e-05,
        -2.8520e-05, -1.8615e-06,  5.3146e-06,  6.8905e-07, -2.0138e-05,
         1.9578e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.6839e-05,  6.2634e-02,  5.1368e-05, -5.6801e-05,  6.5660e-05,
        -1.9555e-04,  5.1685e-05, -1.6118e-04,  1.4510e-04,  1.4399e-04,
        -7.0852e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7699e-04,  4.5011e-02,  3.3080e-05,  3.4314e-05, -5.9421e-05,
         3.2422e-05, -4.0839e-06, -1.4739e-05, -2.4092e-05,  6.1533e-05,
        -6.4645e-05,  2.1453e-05,  1.8875e-05, -1.9926e-05, -5.1020e-06,
        -3.1229e-05,  2.1524e-05,  2.0757e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3827e-05,  1.1433e-01, -7.7258e-05,  8.3543e-05,  8.8417e-05,
         8.7509e-05, -5.7627e-05, -8.0823e-05,  4.7681e-05,  7.6819e-05,
         2.4161e-05,  4.3132e-05, -4.3733e-05, -3.1830e-05,  6.0728e-05,
        -5.5851e-05,  1.2590e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.0480e-04,  1.8803e-01, -7.3764e-05,  1.9782e-04, -1.4562e-06,
         1.3343e-04,  1.0295e-04,  5.6319e-05,  2.8347e-06,  1.6002e-05,
        -1.1567e-05, -1.1150e-04, -1.2800e-04,  2.6112e-06,  6.0167e-05,
         3.6802e-05, -8.2700e-05, -2.6570e-05,  1.8918e-04,  9.9964e-05,
         9.1813e-05, -9.0184e-05, -9.3722e-06,  2.1117e-04, -3.6227e-05,
         7.3786e-05, -7.7547e-05,  1.1151e-04,  1.1113e-04,  1.4715e-04,
        -6.8983e-05,  2.7617e-05,  1.4512e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2321e-04, -1.8334e-01,  1.7131e-05,  2.6607e-05, -6.1721e-05,
         6.7109e-05, -1.5579e-05, -2.2063e-05, -7.5545e-05, -6.4035e-06,
        -8.5561e-05, -6.6209e-05,  5.3028e-05, -1.1587e-04,  1.1719e-04,
         1.1328e-04,  7.2865e-05, -1.1780e-04, -8.0732e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1870e-04,  7.1870e-02, -6.9180e-05, -1.1541e-05,  1.4247e-04,
         3.0064e-04, -2.2501e-05,  1.2760e-04, -1.6633e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.9405e-04, -1.9765e-01,  7.9877e-06, -3.0203e-05,  2.5970e-04,
        -2.7597e-05,  2.8333e-05,  1.5205e-04,  1.6614e-04,  3.9594e-05,
         2.6035e-05,  6.7299e-05, -1.0564e-04, -1.1955e-05,  5.3435e-05,
         2.8546e-05, -3.0500e-05,  7.8716e-05, -9.6215e-05, -3.5586e-05,
        -1.4057e-05, -7.1078e-05, -8.9315e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.5894e-04, -1.1074e-01,  1.0451e-05,  1.7281e-05,  1.0054e-05,
         4.0502e-05,  3.4580e-05,  1.1504e-04,  1.0494e-05,  2.6737e-06,
        -2.1425e-06,  1.9101e-06,  6.5270e-05, -3.9295e-06,  2.2400e-05,
         5.8526e-05, -3.1256e-05,  3.1991e-05,  3.2432e-05, -1.1325e-05,
         5.3876e-05,  9.2745e-05,  1.0581e-04,  1.5850e-05,  1.0891e-04,
         1.8759e-06,  1.8380e-05,  1.7347e-05,  1.5409e-05, -1.4684e-05,
         3.5211e-05,  1.0255e-06,  1.5461e-05,  8.6257e-05,  8.7428e-06,
         1.8627e-05,  2.7274e-05,  1.6508e-05,  2.7812e-05,  3.4127e-05,
        -8.6563e-06,  4.2736e-05,  2.1888e-05, -1.1928e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.3059e-05,  1.0883e-01,  1.1169e-05, -3.0460e-05, -5.3961e-05,
        -1.1494e-05, -7.3542e-05,  4.9191e-05, -1.9547e-05, -9.3450e-06,
         6.2187e-05,  1.5946e-05,  4.2669e-05,  1.8930e-05, -5.0669e-05,
        -1.4432e-05,  2.6191e-05, -4.6588e-05,  5.4182e-05,  2.2397e-05,
        -9.2949e-05, -1.3518e-05, -4.3285e-05,  4.8057e-05, -8.5594e-05,
        -1.2324e-05,  3.3391e-05,  1.9136e-05,  1.4065e-05, -3.5420e-05,
         3.9493e-05, -7.2432e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.2365e-05,  9.7022e-02, -3.5889e-05,  3.0507e-05,  3.0019e-05,
        -4.8035e-06, -1.5439e-05,  2.6395e-05, -3.7229e-05,  7.0948e-05,
         4.8404e-05, -7.3803e-05,  8.3412e-06,  8.1256e-05, -2.5505e-05,
        -8.0903e-05,  1.4936e-04,  4.6808e-05, -4.4731e-05, -3.4264e-05,
        -9.1175e-05,  7.7225e-06,  2.8243e-05,  2.0826e-05,  4.5365e-05,
        -2.0285e-05,  1.3674e-05, -6.1123e-05, -3.1113e-05, -3.6146e-05,
         3.5165e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([ 6.7387e-05,  6.2975e-02, -1.7984e-05,  4.8623e-05,  2.5538e-05,
         3.2838e-05,  9.3201e-06, -4.1419e-05, -7.3807e-06,  3.6942e-05,
        -1.7211e-06, -3.4767e-05, -2.3238e-05,  1.7508e-05,  6.4583e-05,
        -1.3392e-05, -3.9650e-05, -2.4275e-05,  1.9985e-05,  3.7568e-05,
         2.1955e-05,  2.6185e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1913e-04, -8.4927e-02,  2.2260e-05, -2.0588e-05, -1.2619e-05,
         1.1011e-05,  1.3160e-05,  2.6387e-06,  7.3374e-06,  3.4476e-05,
         8.8745e-06,  4.2263e-05,  1.2114e-05, -9.8935e-06, -3.0785e-05,
         2.2478e-05, -2.0859e-05,  1.2365e-05,  8.6047e-06,  4.1773e-05,
         9.4780e-06,  1.2907e-05, -7.2497e-05,  2.4259e-05, -3.7769e-05,
        -6.0786e-06,  2.2243e-05,  2.1067e-05,  5.1320e-05,  3.5276e-05,
         1.5650e-05, -2.5055e-06, -2.0228e-05, -5.4170e-06,  1.3942e-05,
        -1.4658e-05,  8.0379e-06, -2.1055e-05, -1.5394e-05,  6.2837e-05,
         3.2001e-07, -2.8269e-05, -2.8390e-05, -2.4934e-05,  4.4198e-06,
        -3.7608e-05,  1.3916e-05, -3.8971e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1262e-04,  7.0192e-02,  5.3714e-05,  2.8712e-05, -8.7500e-05,
         2.0144e-05, -4.9410e-05, -9.3674e-05, -4.5681e-05, -1.7306e-05,
         4.7135e-05,  1.7547e-04,  1.8423e-05,  2.3414e-05, -5.5603e-05,
        -2.5197e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5573e-06, -8.6417e-02,  8.1933e-06, -2.2213e-06, -9.1023e-07,
        -2.7157e-06, -3.2443e-05,  2.3096e-05,  6.5632e-06,  1.5488e-05,
         2.5826e-05,  3.2818e-06, -1.3619e-05,  7.3667e-06,  2.6340e-05,
        -1.3983e-05, -2.1683e-05, -1.4107e-05, -2.6766e-06,  3.1733e-06,
         1.6241e-05, -5.1478e-06, -3.3214e-06, -3.9900e-06, -9.0873e-06,
         1.5960e-05, -2.8331e-05,  1.4622e-05, -2.2339e-05,  1.1685e-05,
        -6.7487e-06,  1.5801e-05, -4.5037e-05, -2.7351e-06, -1.3786e-05,
         6.5802e-06, -1.8804e-05,  2.9253e-05, -5.6338e-06, -1.6093e-05,
         3.6981e-07, -3.0961e-06,  1.1449e-05, -9.8130e-06,  4.2873e-06,
         1.8395e-05, -1.8312e-05,  1.7415e-06, -1.0105e-05,  1.7207e-05,
        -4.2846e-06,  7.6381e-06, -1.7719e-06, -3.5484e-05, -7.4525e-06,
         1.5598e-05,  1.7206e-05,  1.2035e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.7667e-05, -5.6358e-02, -1.3001e-05, -7.8537e-06, -4.0519e-06,
        -1.6949e-05, -1.6025e-05,  1.9125e-05, -2.0374e-05,  1.7773e-05,
        -1.4919e-05, -5.5814e-06, -7.5843e-06,  6.3364e-06,  1.1836e-05,
        -2.2490e-06,  6.3730e-06,  1.1140e-05,  1.5410e-05,  1.8700e-05,
        -2.2880e-05, -1.3523e-05,  1.2213e-05,  4.8598e-06,  5.2142e-06,
        -3.7035e-08, -3.2601e-05, -7.0638e-06,  1.0716e-05,  2.7066e-05,
        -1.2414e-06,  8.6928e-07,  2.2246e-05,  8.8897e-07, -1.8877e-05,
        -4.0603e-07, -4.7445e-06,  2.0978e-05,  6.5647e-06,  1.0890e-06,
        -1.0229e-05, -8.7350e-06,  1.5291e-06,  8.4203e-06,  2.7349e-05,
         2.1521e-05,  1.7778e-06,  7.4588e-06,  1.1085e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.6122e-05,  5.4061e-02, -2.0987e-05, -4.7137e-05,  1.6822e-05,
         4.5916e-06, -4.4209e-05, -1.0214e-05,  9.4318e-06, -3.0683e-05,
         1.1327e-05, -4.2377e-06, -2.9576e-05, -2.7258e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.9091e-05, -2.1226e-01, -4.2639e-05, -4.7949e-05, -9.7250e-05,
         1.4699e-06,  1.5007e-04, -1.0227e-04,  1.1796e-04,  5.8749e-05,
        -9.5209e-05,  3.0974e-05, -7.1371e-05,  2.6558e-05,  1.6009e-04,
        -3.9357e-05,  1.1602e-05,  9.3809e-05, -1.2652e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6577e-04, -1.5657e-01, -7.2349e-05, -7.0650e-05,  1.8159e-05,
        -1.6364e-05, -8.6584e-05, -7.3725e-05,  1.1611e-04, -3.2640e-05,
        -9.9546e-05, -4.6725e-05,  8.2382e-06,  3.9048e-05, -6.1242e-05,
         9.8655e-06, -6.8812e-05, -1.2354e-04,  5.2408e-05, -3.2929e-05,
        -3.6820e-05, -6.8774e-05,  2.1785e-06,  9.1409e-05,  1.9371e-05,
        -9.1527e-05,  3.0101e-06, -1.7529e-05,  1.6210e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.7934e-05,  2.6362e-02, -6.7460e-05,  1.8768e-05,  4.7637e-05,
        -2.3596e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.3953e-05, -1.3500e-01,  6.0981e-05,  1.7759e-06, -2.3695e-05,
        -4.3728e-05,  4.9533e-07,  6.6348e-05, -7.5888e-06, -2.5950e-06,
        -6.3718e-05,  9.3327e-07, -2.3724e-05, -3.1832e-05,  4.4954e-05,
         3.8171e-05, -1.0075e-04, -1.9817e-05, -1.7132e-05,  3.1187e-05,
        -9.0762e-06, -3.4872e-05, -4.5849e-05,  1.7461e-05, -6.4686e-06,
         6.3062e-05, -2.0899e-05, -1.3148e-05, -1.1759e-04, -6.4196e-06,
         2.8449e-05,  1.3950e-05, -1.0874e-05, -7.3591e-05, -1.0507e-04,
        -2.6952e-05, -2.7958e-06,  4.9331e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.9087e-04, -2.1753e-01, -8.0176e-05, -5.7575e-07,  5.2001e-05,
        -9.3557e-05, -6.9799e-06, -4.2813e-05,  3.4488e-05, -2.8793e-05,
        -4.1919e-05, -2.1469e-05, -4.1893e-05,  7.3936e-06, -9.9902e-05,
        -8.2524e-05, -4.8446e-05, -1.1017e-04,  9.9555e-06,  1.4918e-05,
        -1.5263e-05,  8.8000e-05,  3.8703e-05,  8.6221e-05,  4.5605e-05,
        -1.9128e-05, -2.1920e-05, -6.3558e-05,  7.4069e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7242e-04, -1.7578e-01,  1.9554e-04, -3.9496e-05, -1.0174e-04,
        -1.3590e-04, -3.9920e-05,  6.8987e-05,  8.7481e-05,  4.3231e-05,
        -7.8848e-06, -1.7597e-04,  3.0183e-05, -3.1258e-05,  1.2000e-04,
         4.0447e-05,  2.2032e-04, -7.2615e-05,  3.5294e-05, -1.0236e-04,
        -1.1137e-04,  2.2366e-06, -5.8317e-05,  1.2040e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-1.3604e-04, -1.6207e-01, -1.8829e-05,  1.5849e-05,  6.8536e-05,
        -5.0430e-05,  1.2558e-05,  9.4111e-05,  1.0838e-05,  3.5581e-05,
         4.6672e-05,  7.1754e-05, -5.8309e-06,  1.8374e-05,  6.1776e-05,
        -4.3809e-06, -4.9398e-06,  4.1023e-05,  8.3019e-05,  1.9640e-05,
        -1.3314e-05,  1.8305e-05, -5.6732e-05,  1.6285e-05,  4.0628e-05,
         1.1670e-05, -7.9495e-06,  1.0863e-05, -2.4468e-05,  1.3735e-05,
         1.6382e-05,  2.9271e-05,  5.2677e-05, -4.7327e-05,  6.1976e-06,
         2.5005e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2764e-04, -1.3774e-01, -4.1627e-05,  8.4212e-05,  2.8962e-05,
         1.5343e-05,  1.0775e-05,  2.7080e-05,  8.0562e-05, -5.0561e-05,
        -6.5846e-05,  1.4027e-05, -2.7693e-06,  4.9944e-06,  2.2531e-05,
        -9.6220e-06,  2.1692e-06, -2.4320e-05, -2.7905e-05, -2.9560e-05,
        -2.6989e-05,  1.4580e-05, -7.7694e-06,  2.8358e-05,  5.8379e-05,
        -8.7227e-06, -1.5521e-05, -2.8565e-05, -1.8147e-05,  4.3633e-05,
        -1.9986e-05,  2.9358e-05,  4.0404e-05, -3.7019e-06,  5.2696e-06,
         3.4298e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9784e-05,  3.6507e-02, -2.8475e-06,  9.5262e-06,  9.9978e-06,
         2.2491e-05, -1.3806e-05,  3.3917e-06, -9.6964e-06, -4.0119e-05,
        -8.7766e-06, -1.5780e-05,  1.6014e-05, -6.2213e-06, -3.4279e-05,
         1.2331e-05, -3.8619e-05,  3.4771e-06,  6.6096e-06, -1.1830e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.3888e-06,  1.1649e-01, -5.6134e-05,  4.4762e-06, -4.7073e-05,
         4.1458e-05,  2.4706e-05,  2.0579e-05, -4.0275e-05,  3.7128e-05,
        -6.9712e-06, -5.1366e-05, -4.6915e-05, -9.5787e-05, -1.2613e-05,
         8.5468e-05, -9.3281e-06,  7.3217e-05, -1.7265e-05, -3.2410e-06,
         1.7910e-05, -3.4919e-05, -3.3100e-05, -6.4366e-05, -2.8767e-07,
        -2.0522e-05,  3.0845e-05,  8.4876e-05,  3.3274e-05, -1.5992e-06,
        -4.5244e-05,  1.0613e-04, -5.7198e-06,  1.4802e-05,  3.5456e-06,
         2.6084e-05,  4.7977e-05, -6.3994e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8187e-05, -1.1727e-01,  2.8391e-05, -5.0727e-05, -1.3412e-05,
         1.7673e-05, -3.2861e-06, -6.3259e-05,  3.7707e-05,  1.7731e-05,
         2.2432e-05,  6.2974e-05, -1.1349e-06, -6.7159e-07, -7.0542e-06,
        -8.2024e-05,  1.0242e-05,  7.1813e-05,  1.8928e-05,  3.9164e-05,
         5.7724e-05,  1.8021e-05,  3.2202e-05, -3.5733e-05, -4.5366e-05,
         2.1581e-05, -2.3979e-05, -8.4221e-06, -5.3950e-05,  3.5208e-05,
         2.0885e-05,  6.6698e-05,  2.3104e-06,  5.4043e-06,  7.1193e-05,
         1.1814e-05,  4.9264e-06,  4.8729e-05, -2.9464e-06, -2.2052e-05,
        -2.1072e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0849e-04, -1.8559e-01, -7.8179e-05,  1.9319e-04, -4.4415e-05,
        -1.6061e-05, -5.2469e-05,  1.1077e-04,  1.3227e-04,  1.0925e-04,
         1.8324e-04,  1.5220e-05, -1.7333e-06,  5.5865e-06, -1.3772e-04,
         4.4501e-05,  5.0648e-05,  9.7816e-05, -5.6988e-05, -8.0194e-06,
        -1.1354e-05, -2.9332e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.8378e-04,  4.0860e-02, -7.4851e-06,  1.0455e-06,  1.2967e-05,
        -1.4978e-05, -2.7070e-05,  1.2805e-05, -3.0119e-05, -3.3043e-06,
        -2.2828e-05, -2.0656e-05,  1.2432e-06, -1.0733e-05,  2.0075e-05,
         1.9785e-05, -1.8390e-05,  1.8891e-05, -3.9816e-05, -2.3510e-05,
         6.5769e-06,  6.7363e-06, -1.6554e-05,  7.4326e-07, -1.7759e-05,
         1.0110e-06,  3.8899e-06,  2.3234e-05,  3.2777e-05,  1.2844e-05,
         8.6085e-06, -8.8725e-06, -1.7202e-06,  6.7106e-06, -1.7741e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7498e-04, -1.2155e-01, -1.1396e-06,  9.5840e-06, -4.6872e-05,
        -5.5044e-05,  2.4689e-05, -1.8687e-05, -1.3065e-05, -1.2890e-07,
        -2.4202e-05,  4.6558e-05, -3.4807e-05,  1.3282e-05, -2.5982e-05,
        -2.9563e-05, -5.9006e-05, -1.6959e-05,  1.0398e-04,  5.2246e-05,
         5.8592e-05,  5.8930e-06, -1.7587e-06, -3.6083e-06,  9.2348e-05,
        -1.1464e-05, -2.3899e-06,  1.2784e-04, -4.4128e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0793e-04,  1.2829e-01, -4.8115e-05,  2.8701e-05, -1.3615e-05,
         7.1289e-05,  1.7718e-04, -3.0811e-05, -5.0883e-05, -6.7894e-06,
        -2.3110e-05, -5.6361e-05,  6.2506e-05, -7.5241e-05, -2.9941e-05,
         7.6480e-05,  4.5337e-05, -2.9540e-05,  6.5341e-05,  5.2865e-06,
         3.0737e-05, -3.6497e-05, -6.0699e-05,  5.4555e-05,  3.5294e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7015e-05,  3.1581e-02, -1.3924e-05, -3.4947e-05,  2.6744e-06,
         2.8362e-05,  1.6857e-05,  5.1366e-06,  1.1441e-05,  2.4982e-05,
        -2.3812e-05,  3.2653e-05, -2.2069e-05,  5.5678e-05,  4.6256e-05,
        -1.0950e-06,  1.6546e-05,  7.2074e-07,  3.5148e-05, -8.2950e-06,
         2.2343e-05, -1.7117e-06, -1.9467e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2176e-05, -1.3940e-01,  2.3052e-05,  1.5467e-07,  4.9496e-05,
         3.3657e-05, -2.6478e-05,  2.5860e-05,  5.6933e-05,  3.9766e-05,
        -8.1436e-07, -6.0324e-05, -5.6127e-06,  1.1715e-05, -2.4705e-06,
         2.3641e-05, -2.5182e-05,  1.2290e-05, -4.7539e-05,  4.9048e-05,
        -2.5049e-05,  1.0588e-05, -1.9928e-05, -1.9570e-05,  2.2621e-05,
         3.6958e-05,  5.3210e-05, -3.0223e-05,  2.0120e-06,  2.2063e-05,
         1.3067e-05,  6.0982e-06,  4.6838e-05, -5.3448e-06, -2.0729e-05,
        -2.1571e-05, -3.3000e-05,  2.7343e-05, -1.4303e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.9850e-06,  4.8325e-02,  1.9600e-05, -7.0747e-06,  4.1770e-05,
         5.2916e-05,  2.9804e-05,  7.4132e-05,  5.2309e-05,  1.3107e-04,
        -4.5019e-05,  3.0256e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([-2.2895e-04, -1.3517e-01,  1.1430e-05, -2.6133e-05, -5.1295e-05,
        -6.8041e-05, -3.0277e-05, -1.4548e-05,  7.8099e-06,  1.9683e-05,
         1.5793e-05,  5.8788e-05, -2.2474e-05, -4.3327e-05, -5.0795e-05,
         3.9500e-05,  7.7452e-06,  1.3270e-05, -4.3855e-05, -4.3854e-05,
        -6.4784e-05,  1.1780e-05, -3.0558e-05,  4.1770e-05, -4.1010e-06,
        -2.8925e-05, -8.6323e-05,  2.6999e-06,  6.4938e-05, -2.6532e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.2242e-06, -5.9591e-02,  1.9254e-06, -6.6268e-06,  1.0447e-05,
         7.9863e-06, -2.1509e-05,  2.0083e-05, -2.8814e-06,  7.1065e-06,
        -6.5281e-06, -3.6611e-06,  3.1000e-06,  3.6325e-05,  3.6636e-05,
        -2.2530e-05,  8.8286e-06, -2.8749e-05,  2.4476e-05,  8.5739e-06,
         9.0059e-06, -1.6913e-05, -6.1817e-06,  4.7195e-06,  1.2117e-05,
        -1.2847e-05,  7.8744e-06,  1.1421e-05, -2.3393e-05,  3.4348e-05,
         3.7468e-05,  1.3507e-05,  1.9348e-05,  1.3327e-05,  1.5282e-05,
         1.1672e-05,  8.6886e-06,  2.1166e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.5869e-05, -7.6545e-02, -9.5229e-07,  1.3594e-05,  1.3608e-05,
        -1.3907e-05,  4.7610e-07,  3.7989e-05, -1.7846e-05,  2.5843e-05,
         4.7303e-05, -1.8633e-05, -7.2125e-05, -1.0374e-06, -1.5352e-05,
        -2.2314e-05, -4.6020e-06, -3.1755e-05,  1.0607e-05,  1.5589e-05,
         8.5389e-06,  1.5861e-06,  4.6553e-06, -3.1595e-05,  2.1353e-05,
        -3.1483e-05, -1.3323e-05,  8.9609e-05,  3.0108e-05,  8.8278e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.9118e-05,  1.9007e-01, -2.3914e-04, -3.6568e-05, -7.2063e-05,
        -1.5147e-04,  2.5631e-05,  5.5932e-05,  9.8066e-05, -1.1741e-04,
         1.7168e-04,  1.4780e-04,  1.6255e-04, -3.7077e-05,  1.7562e-04,
         1.0282e-04, -1.9638e-05,  6.5457e-05,  2.4894e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.2816e-04,  1.7505e-01, -1.7893e-04, -4.5218e-06,  1.4285e-04,
        -2.8886e-06,  1.7081e-04, -4.2663e-05,  3.6827e-05,  6.6462e-05,
        -1.1132e-04,  1.6889e-04, -1.0363e-04,  8.7864e-05, -1.3643e-04,
         1.4780e-04, -7.0623e-05, -3.7178e-05, -1.4290e-05, -4.7805e-05,
         9.7387e-05, -5.1438e-05, -1.7249e-04, -2.0955e-04,  2.5003e-05,
         5.8492e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.4826e-05, -1.2309e-01,  2.7046e-05,  1.2542e-04,  6.0864e-06,
         1.2872e-04,  4.0441e-06,  3.8727e-05, -2.0569e-05,  1.0437e-04,
        -4.1404e-05,  1.3094e-04,  5.2718e-05, -5.0263e-05, -6.5537e-06,
         9.0089e-05,  3.2861e-06, -4.7378e-05,  6.3341e-05,  6.2319e-05,
         7.5736e-05, -2.2468e-05, -3.3599e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7817e-04,  1.0933e-01, -4.4083e-05,  1.8831e-04, -4.6589e-05,
        -3.0033e-05, -3.0873e-05, -7.9622e-05,  1.2448e-04, -1.2829e-05,
        -6.1137e-05,  6.7276e-05, -4.8480e-05,  1.1329e-04, -1.9541e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4656e-04,  1.5113e-01, -4.2869e-05, -9.6524e-05,  4.3171e-05,
        -2.2196e-05, -1.0279e-04, -4.8522e-06,  6.0992e-05, -3.0034e-05,
        -6.4510e-05,  4.5596e-05, -1.0997e-05, -1.1078e-04, -7.0701e-05,
         4.4897e-05, -8.5134e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0086e-04, -1.1164e-01, -1.2904e-05,  3.3601e-05, -6.6317e-05,
         2.2681e-05,  1.2625e-05, -2.9824e-05, -4.3725e-05, -2.5887e-05,
        -4.0774e-05,  3.3312e-05, -2.2909e-05,  2.3934e-05, -8.0341e-06,
         3.2631e-05, -3.6158e-05, -2.9858e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2130e-04,  2.1243e-01,  1.0381e-04,  6.0164e-06,  8.8657e-05,
         4.1444e-06,  8.7072e-05,  7.0188e-05,  1.6769e-04,  3.3278e-05,
         1.3232e-04,  5.4118e-06, -2.2864e-06, -4.8028e-05, -6.7543e-05,
         1.9989e-04,  4.6037e-05, -4.8575e-05,  4.1062e-05,  2.0513e-05,
         6.2182e-05, -1.0600e-05, -2.0330e-05,  1.2237e-04,  4.8103e-05,
         1.3246e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.0908e-05,  1.3177e-01, -1.8693e-06, -7.8125e-05, -4.2308e-05,
        -2.4119e-05, -3.5259e-05,  8.6947e-05, -3.8194e-05, -6.0707e-05,
        -1.6967e-05, -3.4672e-05,  2.4667e-05,  3.5730e-05, -6.0771e-05,
        -6.7005e-05,  1.4369e-05,  3.6748e-05, -2.6393e-05,  4.1988e-05,
        -3.2070e-05, -3.5688e-05,  7.9702e-06, -1.9817e-05, -1.0654e-05,
         2.8544e-05, -3.7860e-05, -8.2017e-07,  1.6434e-05,  3.2383e-05,
        -2.3830e-06,  7.9864e-06,  3.3569e-05, -3.6059e-05, -2.9492e-05,
         7.7794e-05,  7.2247e-05, -5.6380e-06,  3.7298e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.0621e-06, -6.3988e-02, -2.3580e-05, -6.7827e-06, -3.8753e-06,
         8.4611e-06,  6.7960e-06,  1.6947e-05,  3.2484e-06,  8.6886e-06,
        -1.1320e-05,  7.0353e-06, -1.9811e-06,  1.0140e-05,  4.7437e-05,
        -1.0829e-05, -1.4106e-05,  9.6336e-06, -1.7118e-05, -1.0551e-06,
        -1.4101e-05,  5.3299e-06,  1.1038e-05, -6.9396e-06,  1.4590e-05,
        -3.2056e-05, -1.2086e-05, -4.7609e-06, -7.7830e-06, -1.2128e-05,
        -1.8415e-06,  1.5824e-05,  1.1067e-05,  3.0134e-07, -1.1019e-05,
        -7.3391e-06, -3.1852e-05,  1.0135e-05,  2.4704e-05, -1.1674e-05,
        -8.7973e-06,  1.4016e-05, -1.3419e-05,  3.8247e-06, -1.0954e-05,
         2.0273e-05], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #300: [tensor([ 6.2732e-05, -1.1165e-01,  6.2554e-06, -2.2919e-05, -2.8613e-05,
         1.6283e-05, -2.1282e-05,  1.8630e-06, -2.2904e-05, -5.2993e-05,
        -9.7348e-06, -2.6465e-05,  1.8449e-05, -4.2205e-05, -5.6024e-05,
        -2.7036e-06,  3.2698e-05, -1.7361e-05, -5.2462e-05,  1.0410e-05,
         2.7347e-05,  1.8200e-05, -2.8136e-05, -9.1816e-06, -1.9533e-05,
        -2.0144e-05, -2.7599e-05, -2.6211e-05, -1.1654e-05, -3.2006e-06,
         5.2833e-06, -2.8701e-05, -5.6500e-05,  1.3243e-05, -6.5907e-05,
         3.3764e-05, -1.9065e-05, -4.5339e-05, -1.9698e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4252e-05,  2.0874e-01, -2.0608e-05,  4.9845e-05, -9.3169e-05,
        -5.9253e-05, -7.2245e-05, -1.1447e-04,  3.9311e-05,  2.6086e-05,
        -1.1934e-04,  8.0726e-06, -7.5579e-05, -6.4665e-05, -1.4477e-05,
        -5.4861e-05, -6.6738e-05,  5.2972e-05, -1.5683e-04, -4.0187e-05,
        -2.3732e-05,  5.7826e-05, -1.6865e-05, -8.4474e-05, -2.0923e-04,
         5.8269e-06,  1.1111e-04,  5.6390e-05, -1.1311e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5190e-05,  1.1615e-01, -4.3118e-05, -1.5418e-04, -1.1866e-04,
         5.6401e-05, -4.3741e-05, -9.8863e-05, -3.5474e-05, -1.1102e-04,
        -6.7313e-05, -2.2005e-04, -2.0928e-06, -1.4842e-04, -8.0339e-06,
        -8.3150e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.4693e-05,  2.3447e-01, -9.6610e-05, -3.8043e-05, -1.3605e-04,
        -6.9925e-05, -5.5890e-05, -1.4981e-05, -7.6815e-05,  3.7871e-05,
        -1.5210e-05,  3.2793e-06, -7.5986e-05, -3.6306e-05, -4.0945e-07,
         8.1730e-05, -1.7707e-04, -5.1231e-05, -5.7126e-05,  1.2745e-04,
         1.3574e-04, -1.8884e-04, -7.7765e-05, -2.3896e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.3103e-04, -1.6834e-01, -1.0492e-04,  1.3646e-04,  9.9953e-05,
        -1.2176e-04, -3.6133e-06, -1.3812e-04, -1.3587e-04, -1.7677e-04,
        -1.2430e-04,  6.0868e-05, -2.0581e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5255e-05, -1.3147e-01,  5.2313e-05,  2.2414e-05, -1.2657e-05,
        -4.5887e-06,  4.8440e-05,  1.9483e-05, -1.9447e-05,  4.5219e-05,
        -5.7832e-05,  3.2981e-05,  2.9275e-05,  1.7926e-05,  2.3710e-06,
         6.1185e-05,  2.2085e-05, -2.9716e-05,  3.1361e-05, -1.8128e-06,
         6.5433e-05,  6.3168e-05, -1.0969e-05, -1.8244e-05,  1.8137e-06,
        -8.0090e-06,  1.8801e-06, -5.2207e-05,  2.3526e-05,  1.8179e-05,
         9.8949e-05,  3.8069e-05, -4.6046e-05, -2.3673e-05,  3.6465e-05,
        -2.3742e-06,  3.0360e-05,  1.2233e-05, -2.9279e-05, -1.3480e-05,
         2.6322e-05,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.8799e-06, -9.8401e-02, -3.5804e-06, -9.2437e-06,  1.1864e-05,
         1.7147e-06, -1.5771e-05,  6.6140e-08, -2.0343e-05, -1.7587e-05,
        -2.7400e-05, -3.1854e-07,  3.6953e-06,  6.8203e-06,  5.0907e-05,
        -6.3686e-07,  7.4946e-06,  1.5270e-06,  8.3150e-06,  4.2489e-06,
         1.7633e-06,  3.1694e-06, -3.0221e-05,  1.0446e-05,  8.8176e-06,
        -9.3967e-07,  6.3408e-06, -2.8262e-06,  1.9077e-05,  3.0534e-06,
         1.5775e-06, -1.9873e-05,  2.8128e-05, -3.0189e-05, -4.1534e-05,
        -1.6504e-05,  1.0922e-06, -1.8048e-05,  3.7781e-05, -1.1560e-05,
         9.4251e-06,  1.5549e-05,  7.4072e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3657e-04, -1.4386e-01,  1.6579e-05, -4.0306e-05,  5.6944e-05,
        -4.2466e-05,  5.2892e-06, -2.3987e-05, -4.8601e-05,  2.9781e-06,
        -6.6789e-05, -3.5549e-05, -6.8678e-05, -5.3955e-05, -2.6275e-05,
         1.2846e-04,  4.1441e-05,  3.9801e-05,  4.5631e-05,  4.5182e-05,
        -4.0245e-05, -2.9577e-05, -4.6121e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4212e-04,  1.7378e-02,  3.9588e-05, -7.8866e-05, -1.4992e-05,
         3.6314e-05,  5.9906e-05,  1.9416e-05,  1.7707e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.2509e-05,  1.0282e-01, -8.0683e-05,  4.5585e-06, -2.5686e-05,
        -2.0243e-05,  2.0808e-05,  6.2018e-06,  9.3173e-06, -3.9160e-05,
         7.2454e-06,  2.1665e-06,  1.7069e-05, -1.5739e-05, -9.4394e-06,
         5.4669e-05, -5.4462e-05,  3.1987e-06,  7.2207e-05, -4.5221e-06,
        -3.2895e-05, -1.4995e-05, -4.0210e-05,  3.4690e-05,  5.9077e-05,
        -8.3301e-06,  2.1078e-05,  9.2265e-06, -9.8455e-06,  9.3201e-06,
         4.8009e-06, -8.3586e-06,  2.5138e-05, -7.2971e-05,  7.0307e-05,
        -3.0587e-05, -3.4476e-05, -2.0951e-05, -4.7996e-05, -9.4177e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.0780e-05,  1.3099e-01, -1.6931e-04, -6.7315e-05,  3.0499e-05,
        -3.5646e-05,  1.6672e-04, -1.2220e-05,  1.5646e-04,  2.7745e-04,
         1.6786e-04,  6.0015e-05, -7.5553e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.4855e-04, -1.8640e-01, -9.7736e-06,  2.9765e-05, -1.3778e-04,
         3.3639e-05, -5.5380e-04, -1.0295e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #350: [tensor([-1.4251e-04,  7.7649e-02, -7.5403e-05,  5.5825e-05,  1.1391e-04,
        -2.6520e-05,  5.7479e-05,  8.0169e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.6234e-04, -1.6402e-01,  4.3652e-06,  5.6364e-05,  1.9534e-05,
        -7.0241e-06,  1.9857e-06,  7.3542e-06,  3.4210e-05, -2.8955e-05,
        -8.4409e-05, -5.5305e-05, -4.3964e-05, -1.8156e-05, -1.3991e-05,
         6.7772e-05, -2.5611e-05,  1.5530e-05, -2.4078e-05, -9.2162e-06,
         2.3427e-05, -2.0552e-05,  1.2057e-05,  4.0803e-05, -3.3965e-05,
         4.1087e-05, -7.9989e-06, -1.4456e-05, -1.8729e-05, -2.2854e-05,
        -1.3995e-05, -5.5241e-05, -1.5253e-05, -8.1832e-05, -9.8501e-05,
        -3.2438e-05,  1.5947e-05, -5.0994e-05, -6.1337e-05,  3.1400e-06,
         1.8502e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.2737e-06, -1.3260e-01, -4.9272e-05,  1.5238e-05, -1.1773e-06,
         1.0088e-05,  2.4070e-05, -2.0088e-05, -8.3410e-06,  2.2769e-05,
         7.6726e-06,  7.3600e-05,  2.4542e-05, -2.9170e-05,  2.9298e-05,
        -3.2881e-05, -2.1283e-05,  3.6430e-06,  5.1678e-05,  1.5056e-06,
        -2.3026e-05, -3.4407e-05, -3.1781e-05,  1.2932e-05, -1.9988e-05,
        -2.8588e-05,  1.3732e-05, -1.7636e-06,  4.2972e-05, -1.6478e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.6597e-05,  1.0399e-01,  5.1361e-05,  2.0046e-05,  2.1455e-06,
         5.3759e-05, -1.1062e-04, -3.7029e-05, -6.9465e-05,  4.6017e-05,
        -2.9037e-05,  4.3692e-05,  1.8209e-05, -4.8619e-05,  1.1348e-04,
        -3.5066e-05, -2.1403e-05,  1.1962e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4423e-04, -1.3352e-01,  2.9328e-05,  3.0596e-05, -1.6092e-05,
         4.1449e-05,  3.4968e-06,  4.2215e-05,  2.5180e-05, -1.3607e-05,
         4.8297e-05, -1.0563e-05,  3.6280e-06,  6.6264e-06,  2.2015e-05,
        -4.5454e-06,  7.4336e-06, -2.7870e-05,  3.0106e-05, -7.5980e-06,
        -2.4369e-05, -2.9148e-06,  4.2073e-05, -2.2279e-05,  9.3970e-06,
        -5.7799e-07,  3.1893e-05,  1.5411e-05,  9.2579e-06,  5.4847e-05,
         2.0473e-05, -1.2784e-05,  6.0052e-05, -2.6767e-05,  4.2200e-05,
         1.1675e-05,  3.3441e-05,  2.8429e-05,  1.6826e-05, -1.2896e-05,
        -4.7268e-06,  3.1922e-05, -1.1873e-05,  3.3683e-05,  5.1957e-06,
        -1.8983e-05,  3.4325e-05,  9.9179e-06,  4.0528e-05,  4.2856e-05,
         3.7338e-05,  6.9150e-05,  1.5914e-05, -2.7420e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3041e-04,  5.4407e-02, -2.4872e-05, -3.5412e-05,  8.8943e-06,
         1.0609e-05,  5.3549e-05,  4.2527e-05, -2.4249e-05,  1.8489e-05,
         1.7401e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9736e-05, -1.4944e-01,  7.6082e-05,  5.9872e-05, -2.2080e-06,
         9.6428e-05,  8.1567e-06,  1.6120e-05,  2.3703e-05, -7.5638e-05,
         3.0537e-05, -2.4560e-05,  1.2766e-05, -8.8133e-06,  6.2324e-05,
         2.0559e-05, -4.0672e-05, -2.2031e-05, -2.7431e-05,  7.0037e-05,
         3.9397e-05, -9.7843e-05, -1.7503e-06,  2.7404e-05,  3.5821e-06,
         3.8051e-05,  7.0516e-05, -4.7779e-06, -2.1936e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 8.9397e-05,  8.9864e-02, -5.0535e-05, -3.3055e-05,  1.7462e-05,
         1.6011e-05,  2.8468e-05, -5.3418e-06,  1.9604e-06, -4.2829e-05,
        -2.5473e-05, -1.5257e-05, -5.0610e-05, -7.3029e-05, -3.2687e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0190e-04,  6.7603e-02, -3.0308e-05, -1.3191e-05,  1.1587e-05,
        -2.3325e-05, -3.8475e-07, -2.3020e-06,  6.0859e-06, -1.3205e-05,
        -6.4259e-06,  3.0404e-06, -3.9382e-06, -9.5161e-06, -5.2914e-05,
         4.1187e-05, -2.0321e-05,  2.9688e-05, -3.7645e-05, -1.3456e-05,
        -3.8485e-05,  9.6840e-06, -1.6281e-05, -5.7670e-05, -3.7827e-05,
         3.2005e-05,  1.8959e-05, -3.7136e-05, -2.8612e-05,  2.2417e-05,
         4.9113e-06,  1.6247e-05,  1.0606e-05, -2.2561e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.3968e-04,  1.0386e-01, -3.0332e-05, -1.7818e-06,  1.9498e-06,
         2.4601e-05,  3.6156e-06,  6.3840e-05,  2.2965e-05, -5.6255e-06,
        -2.4099e-05,  4.4535e-06,  3.9146e-05,  2.6578e-05,  4.1565e-05,
        -6.3705e-05, -4.7791e-05,  2.1322e-05, -5.2988e-05, -8.6551e-05,
        -1.3178e-05,  4.8640e-05, -6.3080e-06, -3.1536e-06, -5.8972e-06,
        -4.1218e-05, -5.4343e-05,  1.0509e-05,  5.5827e-05, -6.7939e-05,
        -8.7793e-06,  2.7012e-07,  6.0577e-06, -8.3174e-05,  6.1760e-06,
        -5.3799e-06, -5.1546e-05,  2.9499e-05, -6.7821e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.3467e-04,  7.3477e-02, -1.1530e-05,  3.6886e-05,  3.1565e-05,
         5.5462e-05, -4.3332e-05,  7.0995e-05,  5.9321e-06, -5.4175e-05,
        -3.8485e-05,  8.4494e-06, -1.9123e-05,  5.2061e-05, -4.6292e-05,
        -2.0038e-05, -4.5921e-06, -4.8180e-05,  5.8738e-05,  2.8313e-05,
         3.4713e-05,  1.9357e-05,  6.1394e-05,  4.2034e-05, -7.0658e-06,
         5.9081e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2195e-04, -1.1663e-01,  5.0160e-05, -2.6892e-06,  1.5321e-04,
        -4.9306e-05, -9.4974e-06, -7.4057e-05, -5.3307e-05, -7.8429e-05,
         1.0653e-04, -1.2268e-05, -1.0533e-05, -6.7770e-05, -6.6023e-05,
        -4.4280e-05, -1.0002e-05, -1.7904e-05,  4.6978e-05, -3.3523e-05,
        -2.7458e-05, -1.9627e-04, -7.7507e-05, -2.3333e-05, -6.0933e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #400: [tensor([ 1.0125e-04, -2.0070e-01, -1.6417e-04,  2.1044e-04,  9.1369e-05,
         4.6272e-05,  1.6250e-04,  1.2985e-04, -3.2398e-04,  3.2034e-05,
         2.8625e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.1835e-05, -1.1951e-01,  2.2651e-05,  3.1299e-06,  1.6704e-05,
         3.5997e-06, -1.8664e-05, -3.3597e-06,  2.9106e-05,  2.5385e-05,
         6.6834e-06,  1.6067e-07,  5.7284e-06, -3.2389e-06, -2.9636e-05,
         1.8209e-05, -7.6113e-06, -3.2685e-06,  1.7835e-05,  2.5552e-05,
        -1.9449e-05,  3.9047e-06,  3.0767e-05, -5.1501e-06,  2.4036e-05,
        -1.2598e-05,  2.0637e-05,  1.6707e-05,  1.5333e-05,  1.1356e-05,
         4.4485e-05,  2.6884e-06,  8.6308e-07, -1.7729e-05,  4.4146e-07,
        -4.9463e-05, -9.6803e-06,  4.2169e-06,  5.2558e-06,  4.8378e-06,
         8.1471e-06,  2.5799e-06, -1.1113e-05, -2.7738e-05,  2.2622e-05,
         6.1994e-06, -2.7068e-05,  1.2948e-06,  2.1766e-05,  7.3010e-06,
         1.9167e-05,  1.8509e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1975e-06, -1.9080e-01, -4.2445e-05, -9.8582e-05, -4.2511e-05,
         5.4282e-06, -4.5657e-05,  1.7280e-05, -8.8183e-07,  1.2657e-04,
         2.5712e-05,  2.0661e-05, -5.7937e-05, -3.0242e-05, -1.2470e-04,
         7.6141e-05,  1.3743e-05, -9.1251e-05, -9.8415e-06, -7.5337e-05,
         2.5643e-05,  4.7862e-05, -6.2151e-05, -3.0770e-05, -1.6191e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.8506e-05,  8.6323e-02,  4.2714e-06,  3.2589e-05,  3.2179e-05,
         3.1304e-05, -2.8909e-05,  5.5201e-05,  4.2651e-06,  2.4413e-05,
         9.3263e-05, -2.0526e-05,  1.5743e-05, -3.7382e-05, -1.8931e-05,
         1.1253e-05,  6.1385e-06, -1.2333e-04,  8.7561e-06,  9.6983e-06,
         4.5925e-05, -2.4835e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.3357e-05,  5.1442e-02, -1.4709e-05,  1.6982e-05, -1.1336e-06,
         1.5273e-05, -2.8079e-05,  7.9376e-06, -6.4115e-06,  6.5412e-06,
         5.6198e-06,  1.9551e-05,  1.8013e-06, -1.4488e-05,  1.2808e-06,
         5.5517e-06,  1.1665e-05, -2.5456e-05, -1.7711e-05,  2.0154e-05,
        -5.0183e-05, -3.7598e-06,  6.1350e-07, -2.9887e-05, -2.4257e-05,
        -6.2734e-06, -8.9098e-06, -1.6166e-05,  7.0003e-06,  2.3555e-05,
         1.3813e-05, -2.9128e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.8934e-05,  1.0361e-01,  4.8092e-05,  4.5872e-07, -1.8166e-05,
         3.6678e-05,  6.0008e-06,  1.6095e-05,  2.2486e-06, -3.6296e-05,
        -5.4706e-05, -8.2268e-05,  1.2946e-05, -3.9146e-05, -4.0651e-05,
        -6.8872e-06, -2.2465e-05, -1.0864e-05, -3.7654e-05, -1.8612e-07,
        -8.2513e-06, -9.3413e-06, -9.9962e-06, -3.9768e-05,  8.6862e-06,
        -7.8881e-06, -2.1286e-05,  2.8995e-05, -5.3688e-05, -3.6198e-06,
         4.1798e-05, -2.0496e-05,  7.1592e-05,  2.8047e-05,  1.6074e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6903e-05,  1.4326e-01, -8.3661e-05, -1.7598e-05,  4.8045e-05,
        -4.5581e-05,  8.9696e-05,  1.4438e-04,  1.3307e-04, -5.3907e-05,
        -5.7216e-05,  2.8126e-05,  7.4172e-05,  4.7529e-05,  5.9972e-05,
         3.1157e-05, -2.9438e-06, -3.0510e-06, -2.3083e-05,  1.8860e-05,
         6.3301e-06, -1.6379e-05,  2.8644e-05,  5.1159e-05,  1.3262e-04,
        -2.4296e-05,  6.1016e-05,  1.4300e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.0821e-05,  6.7761e-02,  2.5591e-05,  1.6896e-05,  1.0603e-05,
         4.4117e-05, -4.5292e-05,  3.7694e-06,  1.4702e-05, -6.2185e-05,
         5.9404e-05,  1.6646e-05, -5.6045e-07, -4.0384e-05, -2.3832e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.4997e-05, -9.1130e-02,  5.2005e-05,  9.7538e-06, -3.1262e-05,
        -1.1534e-05, -3.7972e-05,  5.4731e-05, -8.8764e-06,  1.2656e-05,
        -4.9455e-06, -5.1216e-05, -1.6516e-05,  1.3908e-05, -1.7969e-05,
         1.2319e-05,  2.6969e-06, -1.0341e-05, -2.6492e-06,  3.5457e-05,
        -7.9859e-06,  1.3570e-05, -1.9521e-06, -2.9794e-05,  8.9675e-07,
         2.8902e-05, -2.5774e-05, -4.2314e-05, -3.2124e-05,  4.0358e-05,
        -3.5216e-05, -2.1132e-06, -2.1400e-05,  1.9374e-06, -4.8085e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.3894e-05,  1.1019e-01, -3.0204e-05,  5.0641e-05, -7.1635e-05,
        -8.1417e-05, -8.4748e-06,  1.2775e-05, -2.0027e-05, -1.2069e-05,
        -4.7596e-05,  1.6667e-05, -6.8713e-05,  1.0353e-04, -3.9109e-06,
        -5.1222e-05,  8.7970e-06, -2.6168e-05,  5.6590e-06,  1.8958e-05,
         8.7093e-06, -2.0952e-05, -3.9194e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-8.2913e-05, -2.2636e-01, -4.1772e-05,  6.6250e-05, -5.1863e-05,
         7.8161e-05,  3.1367e-05,  8.1677e-05, -3.4989e-05, -1.5688e-05,
         1.4980e-05, -3.4606e-05, -1.8494e-04, -7.4228e-05,  3.4435e-05,
         1.0231e-06, -4.3715e-05,  2.8533e-05,  4.7467e-05,  8.3643e-05,
         8.6203e-05,  1.4886e-05,  1.4146e-05,  4.2889e-05, -7.3511e-05,
        -2.8923e-05,  1.3823e-04, -2.1274e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.7247e-05, -1.2955e-01, -1.2501e-05,  1.7160e-05, -5.8460e-05,
        -1.0398e-05,  5.8087e-06, -2.5942e-05, -1.0892e-05, -4.7936e-05,
         3.5098e-05, -2.6766e-05,  4.9057e-05,  1.0414e-05,  1.3125e-05,
         4.3335e-06, -1.1121e-05,  2.7985e-05, -6.8135e-05,  1.2546e-05,
        -3.2660e-05,  4.5059e-06, -2.2834e-05,  1.8377e-05, -1.4813e-05,
        -2.6418e-05, -2.0630e-05, -2.7048e-07,  2.9082e-05, -1.4848e-06,
         1.8149e-05, -7.6681e-05, -2.5959e-05,  2.6168e-05, -5.6842e-06,
        -1.5069e-05,  4.0024e-05,  2.0831e-05,  5.8069e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #450: [tensor([ 1.0845e-04, -1.0430e-01,  3.4092e-05,  6.2937e-05,  2.6432e-05,
        -2.0961e-05,  2.6952e-05, -2.2254e-07,  3.9290e-05,  5.3324e-05,
         1.7949e-05,  1.0582e-05, -4.0106e-05, -1.6873e-05, -8.0780e-05,
         8.2083e-05,  2.2898e-05, -1.1654e-05, -4.7792e-05,  2.4650e-05,
        -1.0636e-05,  7.9414e-06, -3.7997e-05, -3.2025e-05, -5.1536e-05,
        -1.3272e-05,  4.5838e-05, -8.0357e-06,  2.5239e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.9398e-04,  4.2346e-02, -2.8526e-05,  3.8709e-05, -1.0624e-05,
         1.1694e-05, -2.6245e-05, -1.1308e-05,  1.9347e-05,  5.4098e-06,
         1.4999e-05,  6.9708e-06,  1.0709e-05, -4.5343e-07, -1.1211e-05,
         3.5156e-06,  5.5783e-06,  5.1422e-05, -2.8898e-05, -5.1875e-05,
         4.2376e-05,  9.9472e-06, -6.0708e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.5930e-05,  1.0720e-01,  3.0212e-06,  6.5196e-05,  3.6426e-05,
         6.1204e-06, -5.5031e-05, -2.2907e-05,  1.7693e-05, -8.1497e-06,
        -1.6090e-05, -5.2896e-05, -5.7621e-06, -1.3977e-05,  8.6499e-06,
        -1.4371e-05,  3.3581e-05,  5.0447e-05,  2.1723e-05,  1.5187e-05,
        -1.3049e-05, -8.2945e-05,  1.6481e-05,  3.5740e-05, -4.9592e-05,
        -4.1445e-05, -2.8404e-05,  3.8379e-05, -1.4843e-06, -2.4024e-05,
        -8.0983e-06,  2.3590e-05,  3.5585e-05,  1.0899e-05, -5.4191e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6622e-05, -1.2660e-01, -6.0360e-05,  4.1012e-05, -9.1582e-06,
        -8.9784e-06, -4.0742e-05, -3.3978e-05, -1.3597e-05,  1.3011e-05,
        -2.6019e-05, -2.1507e-05, -3.7252e-05,  5.8728e-06, -2.4575e-05,
        -6.1646e-05, -1.1377e-06, -1.9004e-05, -1.3708e-06,  5.2465e-05,
        -1.1632e-05,  6.6098e-05, -2.5237e-05, -6.9676e-05, -5.1860e-05,
        -2.5376e-05, -1.0473e-05,  4.8435e-05, -1.4254e-05, -4.5124e-05,
        -7.2085e-05,  1.0961e-05,  9.6965e-06,  5.7496e-05,  1.2406e-05,
         7.0853e-06, -1.1132e-05, -2.8763e-05,  3.3487e-05, -2.6123e-05,
        -3.0400e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.2955e-05, -1.6118e-01, -9.8801e-06, -2.3873e-06, -8.3907e-05,
         1.4714e-05,  1.4698e-05, -1.5928e-05, -5.8987e-06, -2.0146e-05,
         1.1766e-05, -1.3520e-05, -6.3093e-05, -8.2075e-06,  3.6853e-05,
        -8.8854e-06,  2.7484e-05,  7.2945e-05, -6.7903e-06, -7.1631e-05,
        -9.3269e-07,  3.0756e-05, -3.1401e-05,  1.0073e-06,  4.7249e-06,
        -3.6792e-05, -7.6568e-05, -1.1050e-05,  2.6257e-05, -6.0164e-05,
         1.0105e-05, -2.4771e-05, -1.4062e-05,  6.3559e-06,  1.2873e-05,
        -1.2119e-05, -1.4300e-05, -3.6748e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 5.7806e-05, -1.5078e-01,  8.2576e-05,  3.0043e-05,  2.8462e-05,
         2.9228e-06,  6.4194e-05, -3.3688e-05,  2.9167e-05, -2.6703e-05,
        -2.9392e-05,  3.2552e-05, -3.0735e-05,  3.4840e-06,  1.6762e-05,
        -3.0087e-05,  4.3795e-05,  1.2361e-05, -1.9386e-05,  5.8312e-06,
         1.0509e-05,  4.1904e-05,  3.1039e-05, -1.4674e-06,  1.8017e-05,
         3.3153e-05, -3.5946e-05,  1.0471e-05, -1.1443e-05,  4.6592e-05,
        -1.5202e-05, -3.0044e-06, -6.4293e-07,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-6.9979e-05,  7.9299e-02,  4.4137e-05, -1.7966e-05,  2.0225e-05,
         3.3549e-06,  1.2493e-05, -6.4086e-05, -2.9026e-05,  1.9662e-05,
        -3.6565e-05,  3.7466e-05,  2.2301e-05, -5.3057e-06,  4.9371e-05,
         2.2864e-05,  3.5050e-07, -4.4598e-05, -6.3871e-05, -8.8391e-05,
        -4.6418e-05,  3.2764e-05, -1.4752e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6265e-04, -1.2554e-01, -1.0029e-04,  5.4964e-05,  7.5175e-05,
        -5.3996e-06, -1.0081e-04,  7.3825e-06,  6.4344e-05, -5.8931e-05,
        -2.3566e-05,  8.9361e-05, -1.1508e-04, -7.7431e-05, -4.5210e-05,
        -1.3520e-04,  4.5241e-06, -2.8120e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.7289e-05,  1.9789e-01, -1.1129e-05,  1.1207e-04,  3.9954e-05,
        -2.6802e-06,  4.6298e-05,  1.6319e-05,  1.4051e-04, -6.3704e-06,
        -1.7586e-05,  5.7389e-05,  6.3760e-06, -5.5047e-05,  5.2052e-05,
        -2.8746e-05, -1.0627e-05, -1.0766e-04,  9.8830e-05, -3.5089e-05,
        -1.0113e-05,  5.3491e-05, -5.4033e-05,  1.3108e-05,  2.0329e-05,
        -5.0748e-05,  6.8431e-05,  2.0376e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.2737e-04, -1.5279e-01,  6.2569e-05,  2.2622e-05,  2.3744e-05,
         1.6197e-05,  5.0960e-05, -6.2923e-05,  2.0687e-06, -8.0334e-05,
        -1.1115e-04,  1.2620e-05, -2.6110e-05, -1.8959e-05,  5.4356e-05,
        -7.5699e-05, -3.3340e-05, -3.6777e-06,  2.5344e-05,  3.6430e-05,
         7.8581e-05,  4.5655e-05,  4.9299e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.6180e-05, -1.1363e-01, -1.9958e-05,  1.2258e-05,  3.5783e-05,
         8.9690e-06,  5.5879e-06,  2.4928e-05, -2.8360e-05,  2.7709e-05,
        -7.0508e-06,  4.8037e-05,  1.4486e-05,  8.5546e-06,  3.9934e-05,
         2.9283e-06, -1.1254e-05,  1.3889e-05, -1.2134e-05, -1.8732e-05,
         2.5308e-05, -2.9966e-05,  5.7508e-05,  4.8149e-05, -1.5511e-06,
         3.0826e-05,  1.0031e-05,  1.5251e-05,  4.5588e-06,  6.4778e-05,
        -5.1721e-06,  1.7593e-05,  4.3588e-05, -1.8994e-05, -5.4288e-05,
         4.5271e-06, -2.7004e-05, -1.4109e-06,  1.7708e-05,  3.5454e-05,
         6.2443e-06,  2.4907e-05,  5.1322e-05, -1.4763e-05,  3.0831e-05,
        -4.5637e-05, -3.8062e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.7533e-05,  1.7859e-01,  2.5410e-04, -1.4495e-05,  9.6858e-05,
        -1.2088e-04,  1.3852e-05, -4.6421e-06,  4.7541e-05,  6.5613e-05,
        -1.0221e-04, -6.6027e-05, -1.9423e-05, -1.7103e-05, -1.8514e-04,
        -6.5466e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #500: [tensor([ 5.8854e-05,  8.4359e-02, -1.8191e-05, -2.1837e-06,  1.1506e-04,
        -2.4271e-05,  1.8805e-05,  3.3053e-05,  3.1397e-05, -1.2223e-05,
        -4.0015e-05,  2.1568e-06, -6.8792e-05, -9.0859e-06, -1.5620e-05,
        -1.3158e-05, -2.3814e-05,  2.0995e-05, -2.7167e-05,  7.0916e-07,
        -3.1944e-05, -2.8592e-05,  1.9248e-05,  1.1762e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2718e-05, -9.6497e-02, -2.2404e-06, -1.6327e-05, -2.7037e-06,
        -1.4908e-06,  3.3593e-05, -1.6211e-05,  1.1229e-05, -1.9230e-05,
        -6.6054e-06,  2.1122e-05,  1.8906e-05,  1.3048e-05, -1.6385e-05,
        -4.0438e-05, -5.5628e-06, -2.4814e-05,  9.1689e-06,  9.5550e-06,
        -7.3802e-06, -2.2256e-05,  1.5652e-05, -1.6444e-05, -2.3143e-05,
        -1.3265e-05, -6.1334e-06,  3.4500e-06,  1.3473e-05, -1.4933e-05,
         1.6822e-05,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0186e-04,  1.2483e-01, -8.0339e-05, -1.5540e-04, -1.1239e-04,
        -6.0572e-05, -6.2211e-05,  4.9859e-07, -2.5219e-05, -1.5152e-04,
        -8.5251e-05,  6.2625e-05,  3.5496e-05,  2.2077e-05, -8.6790e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3443e-04,  1.4325e-01,  3.5372e-05, -1.7111e-04, -2.6697e-04,
        -4.2735e-05, -9.6116e-06, -6.8183e-06,  5.0568e-05,  6.6452e-05,
         8.7719e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-2.3288e-05,  4.9460e-02,  2.2984e-05,  9.4071e-07,  3.4981e-05,
         1.6300e-05, -1.3462e-05,  2.4182e-05, -1.0600e-06,  2.1978e-05,
         3.4455e-05, -7.5739e-05,  5.1427e-05,  1.5521e-05,  9.3451e-06,
         2.6783e-05,  1.2485e-05, -1.8891e-05, -1.0584e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.0485e-05,  7.0154e-02,  4.0877e-05, -6.3520e-05, -5.9475e-05,
         3.0012e-06,  3.3494e-05, -4.2015e-05,  3.8797e-05, -2.4378e-05,
        -3.0332e-05, -4.0211e-05, -1.3210e-05,  1.3182e-05, -1.0939e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.1717e-04, -1.6671e-01,  1.0118e-04,  1.2850e-05,  6.1725e-06,
         3.2091e-05, -7.0848e-05,  1.3289e-04, -1.0279e-04, -2.9724e-05,
         1.2299e-05,  3.6414e-05, -1.7827e-05,  7.4056e-05,  7.5940e-05,
        -8.9054e-05,  8.0639e-05, -5.2943e-06,  6.3542e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.5761e-05,  1.3102e-01,  2.5464e-05,  6.5797e-06,  3.4674e-05,
        -6.4350e-05,  1.1824e-04,  5.8380e-06,  6.4961e-05, -2.5597e-05,
         5.9804e-06,  1.2876e-04,  8.0120e-05, -4.9274e-05,  1.0362e-04,
         5.8188e-05, -4.6289e-05,  9.1916e-06, -1.4378e-05, -1.8170e-06,
         5.0641e-05,  2.9166e-05, -6.3436e-05,  9.8009e-05, -3.6017e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.0556e-04,  9.2376e-02, -5.4206e-05,  5.7947e-05, -9.4534e-05,
         7.7076e-05, -2.8893e-05,  1.4574e-06,  3.7892e-05,  2.6782e-05,
         8.4850e-05, -2.2447e-05,  5.0957e-06, -3.1478e-06,  2.2600e-05,
         1.2492e-04,  8.8152e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7754e-05,  1.6182e-01, -6.1370e-05, -6.9872e-05, -5.4740e-05,
        -3.1737e-05,  1.6310e-05, -9.6586e-06,  3.4980e-05, -2.4179e-05,
        -3.5669e-05, -6.6979e-05, -4.2296e-05, -7.9877e-05,  2.0506e-05,
        -2.2351e-05, -1.5867e-04, -3.8006e-05,  4.0054e-05,  3.1819e-05,
        -7.6102e-05,  3.8460e-05,  7.1711e-05, -5.1844e-06, -1.2007e-04,
        -9.6968e-05, -5.7044e-05, -5.5633e-05, -1.5262e-04,  1.6281e-05,
         1.2610e-04, -1.5073e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.9592e-05, -2.0072e-01, -9.5369e-05,  4.8508e-05, -3.4261e-05,
        -6.4839e-05, -1.0808e-05,  8.7604e-05, -1.3353e-04,  6.1464e-05,
         5.2108e-05, -5.9267e-05,  1.1115e-04, -1.0055e-04,  3.3550e-05,
        -4.1367e-05,  2.3813e-04, -4.2119e-05, -3.7897e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.2271e-04, -1.7627e-01,  1.5606e-04, -2.7154e-04, -1.3591e-04,
        -1.8509e-04,  5.2574e-05, -1.6562e-06, -5.5668e-05, -2.1877e-05,
        -9.0976e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #550: [tensor([ 1.2530e-04, -1.8905e-01, -9.9725e-06, -4.9812e-05,  7.1330e-05,
         5.3602e-07,  5.4603e-05,  2.4509e-05,  1.6687e-06, -2.9910e-05,
        -1.0966e-04, -2.3645e-05,  4.5043e-05, -2.7914e-05, -6.1001e-05,
        -6.5958e-06,  1.1575e-04,  4.1035e-05,  3.5962e-05, -5.5900e-05,
         3.2497e-05, -1.9280e-05,  9.5960e-06, -4.8501e-05,  4.4071e-05,
         7.2003e-05, -4.2646e-05, -4.4997e-05,  2.4316e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.8661e-05, -1.3690e-01,  1.4141e-05,  6.1537e-06, -2.0116e-05,
        -2.4063e-05,  8.4946e-07, -6.4765e-06, -6.1412e-07, -1.6560e-05,
         8.8492e-06, -4.3888e-05, -7.7891e-06, -3.2894e-06,  1.3226e-05,
        -7.3107e-07,  2.5441e-05, -2.5821e-08,  1.5056e-06,  3.1889e-05,
         1.0191e-05,  6.6643e-06,  1.0020e-07, -1.2066e-05, -1.6362e-05,
        -8.8130e-06,  2.6287e-05,  2.3140e-05,  4.4561e-06,  7.3954e-06,
         7.0901e-06,  2.5479e-05,  1.7079e-05, -9.2464e-06, -1.5736e-05,
         3.5385e-06, -2.2391e-05,  3.8877e-05,  1.8361e-05, -1.8537e-05,
         9.7298e-06, -1.8849e-05,  3.5995e-06, -1.2150e-05,  1.3592e-05,
         3.9453e-06,  1.4323e-05,  8.7102e-06,  9.5035e-06, -2.5759e-05,
         2.0379e-05,  4.4985e-06], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.1880e-05,  1.5581e-01,  2.3542e-05,  4.6062e-05, -3.8960e-05,
        -7.7348e-05,  2.3275e-06,  7.7017e-06, -3.3798e-05, -6.0994e-07,
        -8.7459e-05, -8.6973e-06, -9.9368e-05, -2.1912e-05, -4.4340e-05,
        -2.4059e-05,  2.3808e-05, -1.5525e-05,  4.2299e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.1202e-04,  1.9863e-01, -1.1315e-04, -1.7978e-04,  7.5086e-05,
         1.9675e-05, -6.1354e-05, -1.5426e-04, -8.3026e-05, -1.3240e-05,
        -6.8298e-06, -3.3039e-05,  2.9780e-06, -2.7724e-05,  4.6465e-05,
        -3.0325e-05, -6.5726e-05, -1.2537e-04,  1.1985e-05, -1.4134e-04,
         3.0543e-05,  1.0933e-04,  1.9302e-05,  3.0158e-06, -9.9537e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-5.1205e-05,  2.0049e-01, -2.0078e-04,  6.8331e-05, -4.2881e-05,
         1.3967e-04, -3.8172e-05,  1.5628e-04,  9.2466e-05, -9.0897e-05,
         7.5617e-05, -4.7206e-05, -6.5021e-05,  1.2413e-05,  1.6341e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.3181e-05, -1.0124e-01,  2.4930e-05,  2.0397e-05,  2.5676e-05,
         2.0925e-05, -9.7042e-05, -6.2285e-05,  7.7068e-07,  6.0627e-05,
        -3.4961e-06, -3.5554e-05, -5.7798e-05, -3.8201e-05,  2.4915e-05,
        -3.6178e-05,  1.9127e-05, -8.3533e-05, -8.3378e-05, -1.1394e-05,
         1.2011e-05, -2.5937e-05,  5.5336e-05,  4.4781e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.4490e-04,  3.9437e-02, -5.5376e-05,  5.4277e-05,  1.0290e-05,
        -5.0571e-06,  7.9727e-05, -1.7339e-05,  1.2529e-05, -1.3617e-05,
         2.2924e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.7091e-05, -1.3224e-01, -1.2582e-05,  1.7874e-05, -5.4256e-05,
         8.5368e-06,  1.0057e-04,  1.0137e-04, -4.4036e-05, -1.3480e-05,
         3.4765e-05,  1.8255e-05,  1.0060e-04,  2.9444e-05,  9.3271e-05,
         9.6714e-05, -3.6765e-05, -1.7391e-05,  1.4404e-04, -1.5478e-05,
         2.1741e-05,  4.6279e-05,  4.6179e-05,  4.5362e-05, -5.3995e-05,
         8.1918e-06,  4.9051e-05,  9.8822e-05,  5.1036e-06,  7.6921e-05,
        -3.7842e-05,  2.2231e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 2.0822e-05, -1.8849e-01,  9.3150e-05,  1.9531e-04,  9.2365e-05,
        -9.4100e-05, -7.2162e-05,  1.7990e-04, -9.8002e-05,  2.0149e-05,
         1.3162e-04, -8.0375e-05,  2.4096e-05, -1.2271e-04, -1.2453e-04,
        -1.6698e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.1092e-05,  6.5556e-02,  2.4262e-05, -3.4485e-05, -1.6685e-05,
         7.1856e-05, -1.0716e-05, -1.6894e-05, -2.0330e-05, -1.5410e-05,
         3.2815e-05, -2.3353e-06,  3.0308e-05,  3.5226e-05, -7.1507e-06,
         1.3966e-05, -2.4518e-05, -1.9475e-06, -3.6687e-05, -7.3283e-07,
         2.0129e-05,  3.5590e-06,  2.2326e-05, -6.5263e-05, -2.3538e-05,
         5.6252e-05,  5.3198e-05,  4.9204e-06, -1.4360e-05,  5.9545e-06,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.5159e-05, -1.5716e-01, -8.9623e-05,  4.3424e-07,  8.6909e-05,
         1.0488e-04,  3.6643e-06, -4.1666e-05, -8.2189e-06,  1.0976e-04,
        -1.1553e-05, -4.1016e-05,  4.3796e-05, -1.6062e-04,  5.3459e-05,
         2.2370e-06, -1.0492e-04, -4.4436e-05, -1.0559e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6412e-04,  1.7658e-01,  1.0810e-05, -8.3127e-06, -6.6079e-05,
         1.7011e-05,  9.1312e-06, -2.8956e-05, -7.1611e-06,  3.9152e-05,
        -7.6752e-05, -4.9466e-05, -1.9225e-05,  1.3340e-04, -3.5742e-05,
        -4.5819e-05, -7.5243e-05, -1.6405e-05, -9.7173e-05,  1.0150e-06,
        -6.5853e-05, -7.0795e-05,  5.8424e-06,  2.7929e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #50: [tensor([ 5.2939e-05,  1.1711e-01, -6.6911e-05, -8.1121e-06, -1.5455e-06,
         4.2838e-05, -4.1086e-05, -1.7493e-05,  5.4584e-06, -2.5003e-05,
        -2.0273e-05,  1.7116e-06,  1.7620e-05,  2.8032e-06, -1.0043e-05,
         2.4376e-05, -4.8121e-05, -1.9848e-05,  9.3029e-06,  1.9129e-05,
         1.3029e-05,  1.9138e-05, -7.6297e-05,  4.2095e-06, -9.2654e-06,
         2.0827e-05,  2.4110e-05, -1.3870e-05,  4.1500e-05,  2.9030e-05,
         1.9872e-05,  1.1734e-05,  3.1878e-05, -1.5411e-05, -1.1951e-05,
        -7.7393e-06,  9.0920e-06, -6.2709e-05,  9.7735e-06,  4.7807e-05,
         2.2919e-05, -5.7660e-05,  6.7510e-06,  5.5952e-05, -1.8340e-05,
        -4.0779e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.6250e-04, -1.6387e-01,  5.0483e-05,  7.8351e-05, -1.4782e-04,
         9.7540e-05, -9.6043e-05, -4.4982e-05,  3.6594e-05,  3.0896e-05,
         4.5202e-05,  6.4800e-05, -1.5672e-04, -7.8479e-05, -5.9026e-05,
        -3.9567e-05, -1.5474e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0978e-04, -8.4598e-02, -9.4387e-05, -3.9790e-05,  1.4696e-05,
        -9.8735e-06, -5.6477e-06, -6.9317e-05,  3.1474e-05, -2.2561e-05,
        -3.5645e-05,  1.3561e-05, -3.7260e-05, -9.2600e-06, -2.3062e-05,
        -3.0748e-05,  9.5184e-06, -1.6484e-05, -2.8702e-05,  4.3958e-06,
        -5.1783e-06, -3.9198e-05,  8.1082e-06,  8.1052e-05,  3.5406e-07,
        -9.7160e-06,  3.0903e-06, -1.0379e-05, -1.0418e-05, -3.2103e-05,
        -2.9241e-05,  4.4665e-06, -2.5008e-05,  1.9038e-05,  4.9154e-06,
        -4.7436e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.6797e-05, -1.3581e-01,  3.3205e-05, -8.1966e-07, -6.1630e-05,
        -1.1562e-04, -1.8055e-05,  2.4524e-05,  7.1288e-05, -6.8030e-05,
        -8.1817e-05, -1.4501e-04, -4.7156e-06, -2.2415e-05,  6.1277e-06,
         9.0285e-05, -1.5934e-04,  1.8315e-05,  7.2820e-05, -3.4286e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.1034e-04,  1.0905e-01, -3.5192e-05,  2.6431e-06,  5.7428e-05,
        -3.3550e-05,  1.0464e-05, -2.3006e-05,  1.4971e-05, -4.0976e-05,
        -1.3760e-05, -6.5175e-05,  1.0102e-06,  3.9148e-05,  7.1480e-06,
        -6.8847e-06,  1.0662e-05, -2.5745e-05,  3.1583e-06, -4.6767e-05,
         8.1893e-06,  1.9740e-05, -4.8169e-07, -5.3248e-05,  4.2229e-05,
         2.1390e-05, -2.2946e-05, -4.0717e-05,  4.0165e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 2.8081e-05, -1.5594e-01, -2.9595e-05,  3.9011e-05,  2.9245e-05,
        -1.5761e-05, -3.9164e-06, -7.6372e-05,  2.9040e-05,  1.8804e-05,
         8.2628e-06, -8.5314e-05,  8.0758e-06, -8.1149e-06, -4.5430e-05,
        -2.3602e-05,  3.1571e-06,  1.4081e-05,  1.7903e-05, -5.6970e-05,
        -1.4836e-05,  4.4723e-05,  1.9436e-05, -5.5421e-05, -8.1750e-05,
         7.1293e-05, -3.4217e-05, -1.5055e-05,  4.0236e-05, -4.7083e-05,
         4.0690e-05,  1.2385e-05, -8.3393e-06, -2.4449e-05, -2.4849e-05,
        -4.2674e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 6.9874e-05, -1.9944e-01,  1.1586e-04,  7.8900e-05, -2.5098e-05,
        -3.4071e-05, -1.4114e-04, -1.0675e-04, -6.2349e-05,  6.5786e-05,
         8.2012e-06, -1.6446e-05,  6.8359e-05, -1.1595e-04, -7.8818e-06,
         4.7440e-06,  9.0866e-05,  1.1876e-04, -4.2167e-05,  1.8940e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.0551e-05, -1.3952e-01,  9.6016e-06,  3.7288e-05,  1.2064e-06,
         6.4534e-06,  5.4582e-05,  3.1604e-05, -4.3188e-05, -3.0098e-05,
         7.6561e-06,  7.5652e-06, -3.2611e-05,  6.9449e-06,  2.4308e-06,
        -1.9570e-06, -4.1145e-05,  9.0400e-06,  8.5645e-07,  1.8719e-05,
         1.5898e-05, -6.0829e-06, -1.3242e-05, -1.0643e-05, -1.4800e-05,
         1.6610e-05, -1.9638e-05, -7.7189e-06,  2.1239e-05,  1.3600e-05,
         3.2950e-05, -1.8983e-05,  3.5855e-05, -5.1477e-05, -3.1555e-06,
        -1.1414e-05,  3.4018e-05, -1.4851e-05,  5.6502e-07,  5.4872e-05,
         2.6043e-05, -4.9447e-06, -1.7611e-06, -9.9665e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 9.7151e-05,  8.3033e-02,  8.7103e-06, -2.4243e-06, -8.8363e-06,
         1.5275e-05,  9.1289e-06,  2.5862e-05,  1.7348e-05, -2.2803e-05,
        -9.1819e-06, -1.2891e-05,  2.0552e-05,  2.3279e-06,  3.4065e-05,
         1.3040e-05,  3.2483e-06,  1.4703e-05, -2.5157e-06, -1.5089e-05,
        -5.8994e-05, -2.1467e-05,  6.3588e-05, -1.0836e-05, -2.1280e-06,
         3.9543e-06,  6.6854e-06,  1.7649e-05, -4.4395e-06,  1.3382e-05,
         2.0966e-05, -4.3466e-06,  6.5752e-06,  1.2439e-07,  1.4654e-05,
         3.0014e-05, -4.1750e-05, -5.8946e-06, -9.3553e-06,  7.1630e-06,
        -1.3544e-05,  1.2079e-05, -4.3747e-05, -1.0566e-05, -4.2433e-05,
        -1.8257e-05, -3.2980e-05, -3.1978e-05,  8.8540e-06, -3.4421e-05,
        -2.2016e-05,  1.5457e-06,  1.2685e-06], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.1834e-04, -1.2768e-01, -5.8954e-06, -3.9757e-05, -3.1303e-05,
         4.4188e-05, -1.0144e-05, -2.5024e-05, -6.6985e-06, -8.5306e-07,
        -1.8326e-05, -4.3232e-05, -4.2521e-05, -3.1242e-05, -1.8102e-05,
        -5.7934e-06,  1.0008e-05,  1.3231e-05,  2.0063e-05,  7.7115e-06,
        -4.4700e-05,  2.3302e-05,  5.3715e-06, -1.3871e-05, -3.3008e-06,
        -3.3754e-05,  4.2850e-05,  9.3522e-06,  4.1373e-05,  3.1406e-05,
        -2.4375e-05, -7.9308e-06,  8.4573e-06,  4.0446e-05,  3.1321e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.2241e-04, -6.4604e-02,  6.1691e-06,  9.0648e-06, -3.7864e-05,
        -5.9419e-06,  2.0932e-05,  2.6709e-05,  1.5236e-05,  2.7407e-06,
        -2.4785e-05,  1.9819e-05,  1.9899e-05,  3.2113e-05,  5.5091e-06,
         9.9057e-06, -4.7012e-07,  1.3319e-05,  1.5154e-06,  3.5861e-05,
        -9.1050e-06,  3.4406e-05,  7.1812e-06, -2.7915e-05,  1.0179e-05,
        -2.4167e-05, -2.5900e-06,  3.1575e-05,  1.6599e-05, -7.7358e-06,
        -1.4187e-05, -2.8621e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.4198e-05, -7.1749e-02, -1.8948e-05, -3.8259e-06, -1.0050e-05,
         5.4728e-07, -1.3915e-06, -1.2301e-05, -7.9425e-06,  4.8951e-06,
        -6.4481e-06,  1.2026e-06,  2.6180e-05, -4.8295e-06, -1.1713e-05,
        -1.2950e-05, -8.2280e-06,  3.1379e-05, -1.3491e-05, -2.2197e-08,
        -1.0155e-05, -2.4993e-05, -8.4297e-06, -3.2060e-05, -8.1578e-06,
        -4.9226e-06, -6.1224e-06, -3.6763e-06, -3.1461e-05, -3.1269e-06,
         5.0949e-06, -1.6507e-05, -7.3712e-06, -7.3013e-06,  8.4400e-07,
        -2.7784e-05,  4.4496e-07,  1.3983e-05, -1.0829e-05, -1.7770e-05,
        -6.6578e-06, -5.6746e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #100: [tensor([ 3.3926e-05,  1.2346e-01, -1.0831e-04, -1.0517e-05, -2.0447e-05,
        -1.8259e-06, -3.1679e-05,  1.8956e-05, -1.9996e-05, -4.8256e-05,
        -1.4095e-05,  3.0763e-06,  1.0228e-05, -2.2737e-05, -4.7130e-05,
         1.7670e-05,  2.3007e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.2456e-05, -1.0848e-01,  1.5331e-05, -8.8390e-06, -6.5616e-06,
         4.7988e-07, -5.1619e-05, -9.6672e-06,  2.2562e-05, -2.7223e-05,
        -1.7654e-05,  1.2738e-05, -1.3407e-05,  7.4249e-07,  1.7601e-05,
         3.4591e-05,  1.3560e-05, -1.6730e-05, -7.3631e-06,  1.3831e-05,
        -1.4800e-05, -3.1498e-05,  2.3293e-05,  2.8993e-05, -1.0744e-05,
         8.5247e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-5.5360e-05,  5.4080e-02,  1.2927e-05, -1.5146e-05,  3.8736e-05,
        -3.1546e-05,  1.7776e-05, -5.2374e-05,  7.2302e-05,  3.2175e-05,
         9.4956e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-3.3704e-05, -1.2732e-01,  8.3987e-05,  3.1570e-05,  1.1426e-04,
         5.7368e-05,  6.5286e-05,  6.8672e-05,  4.6140e-05,  9.6188e-07,
         1.1261e-04,  9.5321e-06, -8.4597e-06,  7.8592e-05, -3.1993e-06,
         1.5026e-04,  6.3469e-05, -1.0591e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.0902e-04, -1.7171e-01, -5.5616e-05, -9.0718e-05, -1.6767e-04,
        -1.3593e-05, -5.1831e-05,  9.4512e-05,  1.2578e-04,  1.3468e-05,
        -1.3786e-04, -8.7048e-05,  1.5804e-06,  6.5828e-05, -4.2614e-05,
         8.2787e-07,  8.6524e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1331e-06, -1.2889e-01, -5.8136e-06, -2.8036e-05, -3.8793e-05,
        -2.1546e-05, -2.3279e-05,  6.2203e-05, -1.5715e-05,  6.6023e-06,
        -5.4285e-06,  4.6414e-05,  5.9132e-05, -3.8334e-06, -5.5042e-06,
        -1.2707e-05,  7.0302e-05,  1.1484e-05,  1.7751e-05,  1.1044e-05,
        -6.1104e-05,  1.0201e-05, -6.1437e-06,  3.1163e-05,  2.4302e-05,
         5.6535e-05,  3.3887e-05, -5.2083e-05, -8.2982e-06,  4.8108e-05,
         1.0636e-04, -2.2920e-05, -9.8673e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-4.2772e-05,  1.4421e-01,  1.2014e-04,  1.1045e-05, -1.7831e-05,
         5.1333e-05,  6.2275e-05, -9.8670e-06,  1.2304e-04, -2.3623e-05,
         1.2749e-05,  1.6936e-05,  5.5557e-05, -6.0518e-05, -5.6191e-05,
        -2.1781e-05,  1.2274e-04, -1.2216e-05, -2.8858e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.3858e-05,  1.6611e-01,  6.5806e-05,  9.0093e-05,  1.9158e-04,
         9.8334e-07,  4.9167e-07, -1.3188e-04, -2.2874e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-8.4582e-05, -1.4679e-01,  1.9293e-05, -9.5563e-05,  1.1932e-05,
        -1.5378e-05, -8.2379e-05,  3.6082e-05,  6.6053e-06,  2.5319e-05,
         1.4622e-05,  7.6637e-05, -6.3844e-05,  2.7187e-05, -2.3088e-05,
        -2.9654e-05,  4.0957e-05,  5.8644e-05, -3.6148e-05,  2.7735e-05,
        -9.1133e-05, -7.3641e-05, -3.2913e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-6.5492e-05, -1.0944e-01,  3.1392e-06,  1.2346e-05, -6.3118e-07,
        -9.4454e-06, -9.0601e-07, -4.2438e-05, -5.1616e-06,  1.0017e-05,
        -2.5755e-05, -7.8990e-06,  3.9562e-05,  3.9609e-06,  2.5549e-05,
         7.6718e-06,  1.3886e-05,  2.2594e-05,  2.8964e-05,  5.2306e-05,
         2.2362e-05, -2.8045e-05,  2.8072e-05, -4.3283e-06,  1.1086e-05,
        -2.2735e-05, -1.4261e-05, -2.4770e-05,  1.7195e-05,  1.0070e-06,
        -1.1266e-06, -9.9750e-06,  2.7574e-05,  3.0910e-05,  1.6106e-05,
        -6.2119e-06,  1.3250e-05, -5.7440e-05, -4.2226e-06,  3.0446e-07,
        -4.6675e-06, -7.6605e-06,  5.2028e-05, -3.9261e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7836e-05,  9.6552e-02,  9.1775e-06, -2.1590e-05,  3.2406e-05,
        -2.5789e-05, -6.1131e-05, -5.7523e-06, -4.9732e-05,  5.2241e-06,
        -2.3687e-05, -1.8647e-05,  2.3280e-05, -2.1269e-05, -1.9527e-05,
         1.0100e-05,  5.2552e-05, -3.8553e-05,  1.8841e-06, -6.7239e-06,
        -2.8996e-05, -8.4297e-06, -2.8435e-05, -1.7324e-05, -1.4395e-05,
         3.4155e-05,  1.8303e-05, -2.6554e-05,  1.8045e-06,  4.6171e-06,
        -1.5916e-05, -4.2854e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.5884e-06, -1.4782e-01, -3.1696e-05,  9.1098e-06, -4.2286e-05,
        -4.9471e-06,  5.4835e-05,  2.2341e-05, -3.2926e-05, -8.2897e-05,
         3.6277e-05, -1.0823e-05,  2.3986e-05,  4.3590e-06,  9.7559e-05,
         4.7579e-06, -2.1462e-05, -1.8286e-05,  2.9839e-05,  8.6250e-05,
         4.1164e-05, -9.4482e-06,  1.0309e-05, -2.1004e-05,  3.8827e-05,
        -4.2634e-06,  1.5801e-05,  1.7973e-05,  9.9774e-05,  2.1866e-05,
         7.9211e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #150: [tensor([-1.5405e-05,  7.5245e-02, -4.1155e-05, -5.2400e-06, -7.9721e-06,
         1.3288e-05,  9.0434e-07,  1.0692e-05,  9.4799e-06, -2.1250e-05,
         8.7809e-06, -4.7310e-05, -1.2563e-05,  1.7942e-05,  1.3939e-05,
         3.2981e-05, -1.6866e-05,  3.1128e-05, -1.4336e-05,  6.7565e-05,
         2.6513e-05, -2.2333e-06,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-9.0696e-05, -1.1926e-01, -5.4091e-06,  1.6816e-05, -2.4352e-05,
         2.3739e-05, -5.1255e-05,  3.3190e-06,  1.0501e-05,  1.0361e-05,
        -2.7854e-06,  1.5760e-05, -1.4994e-06,  3.0856e-05, -2.5967e-05,
         6.1931e-06,  1.0303e-06, -1.1501e-05, -1.7009e-05,  1.4081e-05,
         2.2311e-06,  2.5235e-05, -1.5576e-05,  3.0565e-05, -2.6745e-05,
         2.3024e-05, -1.2571e-07,  4.1014e-06,  6.3415e-06, -1.4600e-06,
        -1.9985e-06, -2.0062e-05,  3.3568e-05,  2.9870e-05, -2.3982e-05,
        -2.2108e-05,  1.3401e-05, -1.6477e-06, -1.1377e-06, -3.8530e-05,
        -4.0640e-05, -8.9552e-06, -2.3003e-05, -1.7141e-05, -1.0865e-05,
        -6.3571e-06, -4.5666e-06, -9.3080e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-2.4696e-05,  3.7043e-02,  4.1566e-05, -2.0164e-06, -3.7822e-05,
         3.4307e-06, -2.4841e-05, -1.3714e-05,  8.9787e-06, -4.1713e-05,
        -2.7276e-05,  3.8104e-05, -1.5380e-05, -5.4679e-06, -1.7353e-05,
         2.6444e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.7988e-05, -1.1388e-01, -3.3409e-06, -1.2429e-05,  5.8491e-06,
         1.2487e-05, -1.8751e-05, -1.2327e-07, -2.1011e-05, -9.0505e-06,
        -1.2744e-05,  1.0161e-05, -4.9789e-06,  4.6204e-06,  1.6118e-05,
        -2.3686e-05, -1.9052e-06,  3.1310e-06, -5.1591e-06, -1.4167e-05,
        -1.6286e-05, -1.4574e-05, -5.2379e-06,  2.8520e-06, -1.8868e-05,
        -1.5793e-05, -6.6397e-06, -6.8295e-06, -1.2714e-05, -4.7511e-06,
         2.4437e-05, -2.1527e-05,  9.9900e-06,  1.0879e-05,  6.7013e-07,
        -7.4444e-06, -1.1854e-05,  5.0253e-06, -1.3758e-05, -1.2883e-05,
         2.0312e-05,  4.4572e-06,  4.4719e-06, -1.8572e-05, -1.3847e-05,
        -1.3964e-05, -3.7073e-05, -1.3151e-05, -1.6588e-05,  7.5149e-07,
         4.7940e-06, -1.1897e-05, -9.6983e-08, -3.3243e-05, -7.0000e-06,
        -3.7412e-05,  2.6305e-05, -1.0475e-05], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 4.5040e-05, -1.5450e-01,  1.6730e-05, -1.1361e-05, -6.0489e-05,
        -8.4744e-05, -2.1069e-05,  4.0501e-05,  2.6205e-05, -1.2058e-05,
         4.9243e-05,  7.0600e-06, -3.6217e-05, -7.7039e-05, -1.0198e-05,
         1.5034e-05, -2.9989e-05,  1.3253e-05,  2.1365e-05,  3.7287e-05,
         1.9858e-06, -2.0598e-05,  1.8512e-05,  7.2727e-05, -1.8161e-05,
        -1.9844e-05, -1.7190e-05, -2.0897e-05,  1.5130e-05, -3.6777e-06,
         3.2551e-05,  5.7659e-06, -1.9967e-06, -4.7323e-06, -1.6982e-05,
         9.5237e-06, -6.0978e-06,  2.8274e-05, -1.5195e-05,  7.3583e-05,
        -4.9329e-05, -5.5856e-06,  2.0380e-05, -8.4896e-05,  3.2085e-05,
         3.1274e-05,  1.9403e-05, -1.4871e-05, -1.5895e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-1.7675e-05,  4.2141e-02,  3.4494e-06, -4.4823e-05, -1.8991e-06,
        -1.0534e-05,  8.1029e-06,  1.4162e-05,  1.2784e-05,  7.6289e-05,
        -1.4480e-05, -5.8795e-05, -3.9483e-05, -4.2857e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([-7.7690e-05, -1.0497e-01, -1.8663e-05,  2.6445e-05,  5.4172e-05,
        -3.4157e-05, -4.6652e-05, -3.2138e-05,  4.8180e-05, -2.1551e-05,
        -9.5651e-05, -1.5619e-05, -7.6737e-05, -5.1239e-07,  7.6228e-05,
         3.2764e-05, -1.0688e-07, -1.0947e-05,  6.2539e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 3.4536e-05,  2.1780e-01, -5.4914e-06,  2.0953e-06,  2.5878e-06,
         1.8674e-05, -2.2597e-05,  2.7594e-05, -4.9106e-05,  3.3392e-05,
        -3.0461e-05,  3.0086e-05,  9.7767e-06, -1.1531e-04, -6.4758e-05,
        -5.8299e-05,  1.7071e-04,  2.0204e-05, -8.3426e-05, -1.2093e-04,
         1.9562e-05, -2.3532e-05, -6.2013e-05, -4.3663e-05,  1.6124e-05,
         3.2017e-05,  1.7480e-05, -4.6380e-05, -2.4670e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.5157e-06,  3.3868e-02,  4.1720e-05, -6.5500e-05,  3.3016e-05,
         6.2970e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 7.1694e-05, -1.4905e-01,  3.2122e-05,  3.4765e-05,  2.0522e-05,
        -7.4580e-05,  2.0486e-05,  1.4375e-05,  3.0582e-05, -2.6043e-05,
         5.1891e-06, -1.1665e-05,  1.8187e-06, -7.2984e-05, -2.9374e-05,
        -2.7055e-05, -5.6273e-06,  3.9166e-06,  7.3118e-06, -2.4758e-05,
        -5.8969e-06,  1.4550e-05, -3.4856e-05, -3.5077e-05, -3.9131e-05,
         1.4225e-05, -2.1147e-05, -1.2065e-05,  3.9287e-05,  2.7847e-05,
        -2.8492e-06,  8.3311e-06,  1.9389e-05, -4.1877e-05,  7.7510e-06,
        -3.1105e-05, -5.2320e-06, -3.3748e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 5.3523e-05, -1.8256e-01, -3.8379e-05,  5.8455e-05,  4.1107e-05,
        -7.9255e-05, -2.9807e-05,  2.4005e-05,  3.3465e-05, -1.7243e-05,
        -7.6540e-05, -3.2914e-05,  3.1674e-05,  4.0103e-06,  5.2347e-05,
        -2.1900e-05,  2.6171e-05, -6.2052e-06,  4.6619e-05, -1.7490e-05,
         7.9862e-06,  2.5552e-05, -4.9317e-06,  8.5833e-07, -1.9673e-06,
        -3.1285e-05, -2.7328e-05,  3.3952e-06, -2.9541e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>), tensor([ 1.4561e-04,  1.2202e-01, -7.0526e-05,  8.0383e-06,  4.3602e-05,
         1.5568e-05, -1.3592e-05,  6.6397e-05, -3.9322e-05,  4.8307e-05,
         1.5037e-05,  6.3652e-06, -2.2793e-06,  4.3234e-05,  7.9603e-05,
         3.7080e-05, -7.2209e-05, -7.9255e-05, -2.3301e-05,  2.6488e-06,
        -5.7985e-05,  6.4382e-05,  3.9019e-05, -4.1822e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<SumBackward1>)]
Iter #200: [tensor([-5.5782e-05, -1.6416e-01,  1.1191e-05,  3.4160e-05, -1.4860e-05,
        -2.1197e-06,  1.3102e-05,  1.0104e-05, -2.6267e-05, -1.4476e-05,
        -1.5945e-05,  5.4746e-05,  3.5295e-05,  7.5799e-06, -1.0775e-05,
        -3.1286e-05,  1.2764e-05,  2.7577e-05,  9.0801e-06,  3.0111e-05,
         8.1820e-06, -2.0543e-05, -2.4488e-05,  2.8991e-05,  3.8759e-05,
        -3.1139e-05,  5.5707e-05,  5.0585e-05,  1.1756e-05,  3.6514e-05,
        -1.1595e-06,  3.4430e-05,  3.0905e-05,  2.1364e-05, -7.4481e-06,
        -3.4231e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.3199e-05, -9.8464e-02,  2.3463e-05,  1.3724e-05,  3.1334e-05,
        -1.9315e-05, -1.5127e-05,  2.9439e-05,  4.5977e-05,  2.4359e-05,
        -8.7892e-06,  4.9945e-06, -2.3844e-05,  1.3915e-05,  5.6193e-05,
         2.2149e-05, -2.4287e-05, -1.2043e-05, -1.9082e-05, -1.1608e-05,
         6.7202e-06,  6.3495e-05, -2.1163e-05,  3.0620e-05,  1.6220e-05,
         1.2718e-05,  2.8905e-05,  6.0610e-06,  6.4228e-06, -1.5903e-06,
         4.2510e-05,  9.9610e-06,  9.6719e-06, -2.0811e-06,  1.4342e-05,
        -4.1264e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7183e-05,  6.0092e-02, -1.4063e-06,  4.9895e-06, -2.8924e-06,
        -2.2583e-05,  6.5298e-06,  1.1368e-05,  1.5942e-06, -2.5404e-05,
        -1.5843e-05, -1.0857e-05,  8.7735e-06,  5.9608e-06,  3.4099e-06,
        -3.2347e-06,  5.6388e-07,  4.1279e-05,  4.3056e-05, -1.8184e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.0460e-04,  1.4446e-01,  1.7143e-05,  5.9932e-05, -7.4939e-05,
         6.1700e-05,  8.6655e-06,  2.3834e-06, -6.7354e-05,  3.3052e-05,
        -1.6722e-05, -1.5703e-04,  1.1393e-04, -7.5414e-05, -5.9193e-05,
         2.7179e-05, -1.0874e-05,  2.6884e-05,  1.4113e-05, -7.1128e-06,
         4.0020e-05, -3.5618e-05, -5.5688e-05, -1.4519e-04, -1.6884e-05,
         9.5906e-05, -3.2355e-05, -1.9462e-06, -7.4499e-05,  4.7961e-05,
        -7.3848e-05, -3.9040e-05, -3.6754e-05, -9.1786e-05,  2.9853e-07,
        -5.6192e-05, -2.2877e-05,  7.6038e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.8312e-05, -1.2081e-01,  2.3779e-05, -6.6333e-06, -6.4623e-05,
        -8.6015e-06,  6.3736e-05, -1.5852e-05,  1.3360e-05,  1.6656e-05,
         9.5790e-06, -2.4176e-05,  3.1002e-05, -2.3827e-05,  5.7263e-05,
        -1.2870e-05, -4.4280e-06,  8.3342e-06,  5.8172e-05, -2.6988e-05,
        -3.8189e-05, -3.7297e-06,  3.8132e-05, -2.1408e-05,  3.3957e-06,
         1.2622e-05,  8.6089e-06, -1.4325e-05, -2.2513e-05, -1.0280e-05,
        -2.1608e-05, -3.7600e-07, -3.0320e-05, -5.4831e-05, -7.8459e-06,
        -1.6213e-05, -1.3644e-06, -4.4469e-05,  6.3760e-05, -1.6803e-05,
        -3.6072e-05], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.8114e-05, -1.3133e-01, -1.5500e-05, -5.7738e-05,  1.3423e-05,
        -4.8460e-05,  1.5421e-05, -5.1220e-05,  5.5314e-05,  1.6435e-05,
        -3.4298e-05, -8.7334e-06, -3.6112e-06,  3.2511e-05, -5.2643e-05,
        -4.9590e-05,  4.6295e-05,  4.3689e-07,  7.9008e-05,  1.9645e-06,
         4.3413e-05,  1.2666e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.6582e-04,  2.9429e-02, -9.4406e-06,  1.5778e-06, -2.6154e-05,
        -2.8972e-06, -3.0579e-06, -2.1730e-05,  1.2803e-06, -1.7759e-05,
        -3.1089e-05,  6.3570e-06,  1.6591e-05,  6.0347e-06, -1.6388e-05,
         9.0838e-06, -1.9182e-05,  2.8280e-05, -1.4612e-06, -1.4096e-06,
         1.4674e-05,  1.7059e-05,  4.5285e-06,  6.3156e-06,  2.1720e-06,
         2.4020e-05,  1.3226e-05, -1.6453e-06, -6.0590e-06,  1.2447e-05,
        -1.1981e-05, -8.8647e-06, -5.9749e-06,  9.5322e-06,  1.2409e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 7.4977e-06, -1.2294e-01,  6.3777e-05, -6.3692e-05,  5.0050e-05,
        -3.5575e-07, -2.1493e-05,  2.4022e-06,  3.8438e-05,  7.4748e-05,
        -2.9476e-05, -1.6044e-05, -4.2065e-06, -2.5230e-05, -6.1887e-05,
         9.5457e-05,  5.4432e-05, -3.4011e-05,  3.7446e-06, -6.3561e-06,
         1.0701e-05,  3.6035e-05, -2.7532e-05,  3.1675e-05,  2.8541e-05,
        -1.3915e-05,  3.9984e-05, -3.3735e-05,  1.9023e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.3494e-06, -9.0743e-02,  2.5157e-05,  7.9418e-06,  1.1226e-06,
         9.9567e-06, -5.7863e-05,  1.0088e-05,  2.0712e-05,  1.6320e-05,
         2.6838e-05,  1.9488e-05, -4.7953e-05,  3.0871e-05,  2.3061e-05,
         2.6461e-05, -2.3070e-05,  1.5513e-05, -2.5750e-05,  6.8518e-05,
         3.8669e-05,  5.1645e-05,  1.4919e-05, -5.7724e-06, -1.5964e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 4.0699e-04, -7.7847e-02,  2.6867e-05,  3.0362e-05,  1.0124e-05,
         3.6197e-05, -4.3881e-05,  1.7410e-05, -6.2268e-06, -6.5289e-06,
        -7.5699e-06, -2.2850e-05, -3.1972e-05, -5.5070e-05, -4.6450e-05,
        -5.5858e-07,  4.3518e-05, -9.7262e-06, -6.1934e-05, -4.9105e-05,
        -5.3394e-05, -7.6724e-07, -6.4806e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-4.2460e-05, -1.0919e-01, -9.9795e-06, -2.4344e-06,  1.0284e-05,
        -5.9953e-05,  4.9796e-06,  9.1444e-06, -2.1248e-05, -2.7478e-05,
        -2.3761e-05,  1.0532e-05, -1.9964e-05,  5.9514e-06,  1.5473e-05,
        -6.7006e-05, -3.6868e-06, -9.7795e-06, -1.7205e-06,  3.6834e-05,
        -8.0749e-06,  1.7211e-05, -1.0004e-05, -6.0791e-06,  2.7148e-05,
         7.6946e-06, -8.2161e-06, -9.0830e-06,  4.2095e-05, -2.9819e-05,
        -6.3773e-06,  2.8434e-08,  3.1909e-05, -3.1171e-05, -2.7482e-05,
        -3.4649e-05,  2.2796e-05, -1.5118e-05, -1.0521e-05,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-7.5414e-05,  4.6703e-02, -7.2979e-05,  1.6378e-05,  2.7068e-05,
         6.1251e-05, -3.4088e-06,  2.4327e-05,  1.3577e-05, -2.1351e-05,
        -4.8391e-06, -2.0529e-07,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>)]
Iter #250: [tensor([ 1.2787e-04,  1.8812e-01,  8.6682e-05, -2.9935e-05,  1.5634e-05,
         6.1457e-06, -3.4436e-05, -9.8866e-05, -4.6483e-05,  7.4955e-07,
        -6.5995e-05,  2.2885e-05, -1.1107e-05,  2.0877e-06, -7.4543e-05,
        -7.1333e-05, -9.0540e-05,  6.2807e-05, -3.2943e-05, -1.8463e-05,
        -1.3998e-05, -4.4890e-05, -2.4129e-05,  1.0564e-04,  2.3989e-05,
        -9.6600e-05,  6.0033e-05, -1.5110e-05, -3.9244e-05,  1.0196e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.7555e-05,  1.3586e-01,  1.1266e-05, -1.7354e-05,  2.8681e-05,
        -1.4773e-05, -4.3275e-05,  2.4219e-05,  4.3273e-05, -2.7175e-05,
         2.2025e-06, -6.3455e-06, -1.8191e-05, -4.4781e-05,  1.1675e-05,
        -2.1568e-05, -3.3441e-05,  1.6082e-05,  5.3092e-05, -4.0833e-05,
         3.2185e-07,  7.7082e-06, -2.3848e-05, -3.4232e-05, -1.3748e-06,
        -1.3449e-05,  1.1439e-05,  5.4233e-08,  1.4945e-05, -1.9920e-05,
        -3.7602e-05, -4.2989e-05,  4.3572e-05, -4.3798e-05, -3.8571e-05,
        -2.6052e-05, -4.2959e-07, -1.4737e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9152e-04, -1.0764e-01, -3.8709e-05,  1.2298e-05,  1.9189e-05,
        -4.3338e-05, -3.7239e-05,  1.0929e-05,  2.2822e-05, -4.9336e-05,
        -1.4089e-05, -2.2245e-05, -2.6214e-05, -4.0332e-05, -3.2169e-05,
        -2.5064e-05,  1.7468e-05, -2.7197e-05, -2.8608e-05, -1.1415e-05,
        -1.8567e-05, -3.4877e-05, -4.4764e-06, -9.1756e-06,  2.3797e-05,
        -1.8252e-05,  9.5621e-06, -2.9648e-05, -5.6130e-05, -3.7154e-05,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 6.6579e-06, -1.5852e-01,  9.1075e-05,  3.3517e-05,  2.0592e-04,
        -2.0036e-05, -1.0292e-05,  1.2595e-04,  6.0702e-05,  3.4558e-06,
        -6.3754e-05, -2.9617e-05, -4.8501e-05,  3.3585e-05, -8.2649e-05,
        -3.7422e-05,  2.8696e-05, -3.9566e-07,  6.2956e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 3.1485e-05, -1.4985e-01,  7.6207e-05,  1.1591e-04,  2.6990e-05,
         5.9826e-05, -9.6363e-06,  1.2403e-07, -9.6960e-05, -6.7137e-06,
         1.1180e-04,  4.0265e-05,  8.1649e-05,  5.3975e-05, -7.0934e-05,
         8.4669e-07,  8.2420e-05,  2.1042e-05,  9.0313e-06,  3.7273e-05,
         2.5663e-05,  1.8907e-05,  4.2615e-05,  7.8019e-05,  5.3989e-05,
        -2.8935e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 9.9923e-05, -1.0917e-01, -4.6529e-05,  4.5474e-05,  1.2566e-04,
         1.1397e-05, -1.3434e-05,  2.2069e-05, -8.7485e-06, -3.9844e-07,
         6.8866e-05,  2.4794e-05,  9.8913e-06,  8.1547e-06,  2.4054e-05,
         2.4927e-05, -6.9311e-05, -4.6708e-05,  2.3502e-05,  3.1704e-05,
         1.0957e-04, -1.8300e-05, -9.0202e-06,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.3248e-04, -2.0202e-01, -2.7984e-05, -8.1648e-05,  1.3525e-04,
         2.4453e-04, -1.0614e-04, -1.7349e-04, -1.2857e-04, -2.6819e-05,
        -8.2050e-06,  5.0410e-05,  5.8458e-05, -3.1683e-04, -1.7447e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.7365e-04, -1.3690e-01, -5.3171e-05,  8.2233e-06, -1.3378e-05,
        -8.7608e-05,  1.5716e-05,  1.1814e-04, -2.0873e-05, -2.5120e-07,
         2.8461e-05,  1.0546e-04,  5.3549e-06,  2.9642e-05, -6.5719e-05,
        -4.3181e-05,  4.3253e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 1.6521e-04,  1.9375e-01, -1.6997e-05,  3.6195e-05, -7.5057e-05,
         2.4974e-05,  4.6059e-05, -2.7261e-05, -1.5152e-05,  1.2764e-04,
         1.5963e-05,  6.2101e-05,  4.1722e-06,  2.7202e-05,  5.2491e-06,
        -1.1900e-04,  5.9282e-05,  3.8800e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([ 8.5072e-06,  2.1021e-01, -5.6375e-05, -9.7482e-05,  6.1936e-05,
         1.1073e-04,  1.0525e-05, -2.1047e-05,  4.1258e-06,  6.3550e-05,
        -4.1363e-07, -5.3487e-05, -4.6070e-05, -8.2941e-05, -2.2167e-05,
        -5.1521e-05,  2.4302e-05, -3.6840e-05, -3.7263e-05,  2.4384e-05,
         4.7084e-05,  4.7150e-05,  5.3340e-06,  3.4384e-05,  2.9140e-05,
         4.4950e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-3.2096e-05,  9.2756e-02,  5.6371e-05, -3.8999e-05,  1.8451e-05,
        -6.0501e-06, -5.9873e-05,  1.6245e-06, -1.9944e-05, -3.5598e-05,
        -2.1959e-05, -2.0479e-06, -1.9628e-05, -1.4661e-05,  4.3977e-06,
        -3.7154e-05, -2.1632e-05,  2.5354e-05, -2.9204e-06,  4.3028e-05,
        -1.8065e-06,  1.5407e-05, -5.0632e-06, -7.3836e-05, -5.0208e-06,
         2.6578e-05,  1.8653e-05, -7.6938e-05,  3.6814e-05,  1.1298e-07,
        -3.5571e-05, -5.1506e-05,  2.0004e-05, -9.8052e-06,  1.4877e-05,
         1.8519e-05,  1.9869e-05, -8.0579e-06, -3.0978e-06,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<SumBackward1>), tensor([-1.9441e-05, -1.4487e-01, -2.8201e-05, -1.0730e-05,  5.8536e-06,
         1.3345e-05,  1.3228e-05,  4.7711e-05, -1.3702e-05,  5.3533e-07,
         1.3752e-05, -2.0632e-05, -5.3103e-05,  1.5644e-05,  2.2277e-05,
        -7.7433e-06, -7.1171e-05,  2.7539e-05,  2.1798e-05, -4.5725e-06,
         3.7711e-05,  2.6109e-05,  2.3827e-05,  7.3591e-06, -1.8874e-06,
         3.8770e-05,  3.2900e-05, -1.2190e-06,  1.5787e-05,  1.0310e-05,
         1.0687e-05,  8.6716e-06,  3.8372e-07,  1.3583e-06, -1.1497e-05,
        -2.2443e-05,  4.4882e-06, -1.6867e-06,  6.3893e-06,  3.5867e-05,
        -2.9283e-05,  1.0766e-05,  3.5776e-05,  2.8044e-05, -5.8610e-06,
        -7.9663e-06], device='cuda:0', grad_fn=<SumBackward1>)]
