Iter #50: [tensor([-0.3194,  0.0331,  0.0459,  0.0421,  0.0298,  0.0104,  0.0144, -0.0156,
         0.0137, -0.0194,  0.0346, -0.0099,  0.0160, -0.0064,  0.0470,  0.0628,
         0.0339, -0.0101,  0.0265,  0.0529,  0.0637,  0.0121,  0.0305,  0.0374,
        -0.0119,  0.0006,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.4453, -0.1310, -0.0285, -0.2026, -0.0097, -0.1502,  0.0220, -0.0107,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.2100,  0.0618, -0.0219,  0.1588,  0.1095, -0.0029,  0.0199, -0.0221,
         0.0531,  0.2063, -0.0194, -0.0156,  0.0239, -0.0220, -0.0170,  0.0042,
        -0.0211, -0.0105,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-0.4038,  0.1600,  0.0741,  0.3335,  0.0281, -0.0006,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.3752,  0.2343, -0.0982,  0.0327,  0.0637,  0.1495, -0.0120, -0.0344,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-0.0029,  0.2747, -0.0506,  0.0458,  0.0972,  0.1600, -0.0637, -0.0383,
        -0.0220,  0.1420, -0.0302, -0.0191,  0.0068,  0.0281, -0.0127,  0.0058,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.0870, -0.0307, -0.0200, -0.0758, -0.1277, -0.0185, -0.0493, -0.0122,
        -0.0081, -0.0763, -0.1406, -0.0163, -0.0031, -0.0263, -0.0701, -0.1327,
        -0.0249, -0.0056, -0.0618, -0.0039, -0.0042, -0.0050,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.4050,  0.0543, -0.0019,  0.0663,  0.0235, -0.0035,  0.0291, -0.0036,
        -0.0099,  0.0078,  0.0158, -0.0099, -0.0015, -0.0114,  0.0248,  0.0033,
         0.0579,  0.0222,  0.0194,  0.0226,  0.0355,  0.0233,  0.0193,  0.0212,
         0.0137,  0.0271,  0.0413,  0.0150,  0.0057, -0.0042], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.1158,  0.3508,  0.0059,  0.0585,  0.0862,  0.0381,  0.1737,  0.1085,
         0.0097, -0.0104,  0.0183, -0.0240,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.1427,  0.0864, -0.0031,  0.0835,  0.1662, -0.0008, -0.0011,  0.0452,
         0.0499,  0.0091,  0.0055,  0.0593,  0.0563,  0.0197, -0.0122,  0.0605,
         0.0452,  0.0585,  0.0298,  0.0357,  0.0243, -0.0047,  0.0005,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.1481, -0.0581, -0.0060, -0.0626, -0.0026, -0.0119, -0.0329, -0.0476,
         0.0154, -0.0574, -0.0488, -0.0011, -0.0044, -0.0381,  0.0031, -0.0400,
        -0.0045, -0.0596, -0.0094, -0.0611, -0.0759, -0.0177, -0.0735, -0.0515,
        -0.0359, -0.0276, -0.0018,  0.0034,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 9.2585e-02,  4.9499e-02,  2.9807e-02,  3.4182e-02,  3.1186e-02,
         5.8298e-03,  5.1167e-04,  1.6725e-01, -7.8561e-05,  9.8571e-02,
         7.2397e-02,  5.8458e-02,  5.0156e-02,  6.6712e-02,  3.5061e-02,
         6.1734e-02,  1.1863e-01,  8.6714e-03, -4.2808e-03, -1.4393e-02,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>)]
Iter #100: [tensor([ 0.0027, -0.0399, -0.0290, -0.1900, -0.0227, -0.0022,  0.0137, -0.0401,
        -0.0954, -0.0093, -0.0239, -0.0154,  0.0205, -0.0092, -0.0905, -0.0478,
         0.0196, -0.0008, -0.0369, -0.1002, -0.0110, -0.0693,  0.0040,  0.0110,
        -0.0152, -0.0477,  0.0202, -0.0040, -0.0077,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-0.0581,  0.0787,  0.2518,  0.3156,  0.0524, -0.0388, -0.0186, -0.0009,
         0.0505, -0.0052,  0.0221,  0.0196,  0.0297,  0.0288,  0.0228, -0.0064,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 2.8085e-02, -3.0936e-02, -1.0126e-02, -3.3834e-03, -1.4413e-02,
        -3.2393e-02,  1.0339e-02, -9.4182e-02, -3.3161e-02, -6.0568e-02,
        -1.2683e-03, -9.8931e-02, -1.2831e-02, -2.8286e-01, -4.6426e-02,
         1.7494e-02, -3.6813e-03, -1.4624e-02, -5.8437e-02, -5.5272e-02,
         4.8369e-03, -2.7410e-02, -5.5761e-02,  1.9616e-03, -1.7183e-04,
        -4.4986e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([-0.0647, -0.2661, -0.0941, -0.0248, -0.2239, -0.0342, -0.2535, -0.0182,
         0.0201, -0.0004,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.0537,  0.0036,  0.1415,  0.0149,  0.0179,  0.0042, -0.0175,  0.0509,
         0.0012,  0.0196,  0.0138,  0.0024,  0.0118,  0.0435,  0.0449,  0.0056,
         0.0361,  0.0287,  0.0062,  0.0184,  0.0582,  0.0296,  0.0200, -0.0069,
         0.0250,  0.2780, -0.0148,  0.0288,  0.0012, -0.0012,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-0.2111, -0.0203, -0.0549, -0.0142, -0.0650, -0.0237, -0.1436, -0.0198,
        -0.1135,  0.0062, -0.0260,  0.0216, -0.0605, -0.0029, -0.0171, -0.1220,
        -0.0248, -0.0357, -0.0042, -0.0026,  0.0087, -0.0016,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-0.0314,  0.2780,  0.0012,  0.0329,  0.1625,  0.0469, -0.0120,  0.0231,
        -0.0008,  0.0024,  0.0807,  0.1728,  0.0127,  0.0101,  0.1027, -0.0080,
         0.0112,  0.0015,  0.0091,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.1373,  0.0910,  0.0153,  0.0693,  0.0031,  0.0140,  0.0131, -0.0044,
         0.0505, -0.0016,  0.0619,  0.0183,  0.0253,  0.0068,  0.0684, -0.0098,
         0.0130,  0.0120,  0.0027,  0.0023,  0.0014, -0.0041,  0.0152,  0.0330,
         0.0211,  0.0045,  0.0015,  0.0035,  0.0143, -0.0020,  0.0741,  0.0155,
         0.0017,  0.0269,  0.1393, -0.0036, -0.0109, -0.0006,  0.0055, -0.0008,
         0.0002,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.7131e-01,  2.6482e-02,  8.7943e-04,  6.5856e-02, -3.4540e-03,
        -3.4918e-04, -2.9861e-03,  1.2451e-02,  6.6369e-02, -5.0561e-03,
         1.6559e-02,  3.1492e-02,  5.7474e-03,  1.8613e-02,  2.1636e-02,
        -2.2022e-03,  1.2754e-02,  1.2408e-03,  3.9534e-02,  5.0572e-03,
         4.0171e-02, -1.0943e-03,  1.1016e-02, -1.7204e-03,  3.6446e-05,
         1.3694e-02,  8.2354e-03,  9.3718e-03,  2.8129e-03,  3.1743e-02,
        -4.7506e-03, -2.6335e-03,  1.7136e-02,  8.6980e-02, -5.4295e-03,
         1.4526e-02,  5.8686e-03, -7.2124e-03,  3.7394e-02, -4.9249e-03,
         1.8151e-02,  5.9096e-04, -1.2201e-03,  1.3735e-02,  5.4442e-02,
         4.7414e-03,  1.6511e-02,  5.0620e-02, -6.4139e-03,  9.0085e-03,
        -2.6459e-03, -5.1391e-03], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 1.5509e-01,  3.9268e-02,  1.4426e-01,  2.0261e-02,  5.4234e-03,
         2.7334e-02, -1.1139e-02,  1.9472e-02, -2.3919e-03,  9.1726e-04,
        -1.9585e-03,  5.0788e-02,  1.3037e-01,  8.3712e-03,  1.9827e-02,
         1.1781e-01,  2.1847e-02,  2.5805e-02,  2.2575e-02,  5.1542e-03,
        -1.2711e-05,  8.6329e-02,  2.6331e-02,  1.2077e-03,  1.1825e-02,
         2.7005e-02,  1.0094e-02, -2.5075e-03,  4.6207e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([-0.1135, -0.0646, -0.0081, -0.0881, -0.2104,  0.0171, -0.0266, -0.0142,
        -0.0132, -0.0096, -0.0452, -0.1468,  0.0175, -0.0165, -0.0254, -0.0063,
         0.0029, -0.0174, -0.1051,  0.0034, -0.0439, -0.0037, -0.0004,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-0.0942, -0.0160, -0.1766, -0.0151, -0.0896, -0.0361, -0.0006, -0.0169,
        -0.0165,  0.0087, -0.0330, -0.1179, -0.0249, -0.1861, -0.0069, -0.0053,
        -0.0891,  0.0088,  0.0121,  0.0063, -0.0208, -0.0013, -0.0144,  0.0021,
        -0.0007,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>)]
Iter #150: [tensor([ 0.0146, -0.0116,  0.0029, -0.2801, -0.0080, -0.1297, -0.0259,  0.0027,
         0.0008, -0.0269, -0.0018, -0.0746, -0.0031, -0.0514, -0.0051, -0.1417,
        -0.0109, -0.0932, -0.0090,  0.0011,  0.0072,  0.0023, -0.0844,  0.0074,
        -0.0019,  0.0017,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<DivBackward0>), tensor([-3.7779e-02, -1.1247e-01,  3.3583e-04,  4.7815e-03, -8.2699e-02,
        -1.8513e-02, -3.3260e-02, -5.5699e-04, -1.8441e-02, -7.7938e-03,
        -1.2433e-01, -6.4113e-03, -7.9720e-03, -2.9600e-03, -1.7434e-02,
        -1.9034e-01, -3.9414e-03,  1.7655e-04, -1.0843e-03, -7.2557e-02,
        -1.3430e-01, -1.5737e-02, -4.0368e-03, -6.3310e-02, -1.0962e-02,
        -1.9991e-02,  3.1599e-03, -2.6780e-03, -1.9823e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-3.9251e-02, -9.5132e-03, -2.0851e-02, -1.4407e-03, -3.7960e-03,
         1.6476e-06,  1.8038e-03, -3.9489e-02, -2.4247e-02,  3.4907e-04,
         3.4047e-03, -2.5731e-03,  4.5348e-03,  1.9983e-03, -3.7275e-02,
        -2.7583e-01,  7.2791e-04, -5.7128e-04, -2.6936e-02, -6.0034e-03,
        -3.0631e-03,  4.4137e-03,  1.5668e-03, -1.8163e-03, -9.8751e-02,
        -1.3463e-04, -7.1659e-03,  4.3481e-03,  3.4865e-04, -5.4229e-02,
        -7.2165e-03,  7.2735e-03,  2.6475e-03, -1.2921e-03, -8.0072e-03,
         1.9063e-03,  4.4490e-03, -2.0841e-03, -3.3078e-03, -2.4938e-01,
         7.5378e-04,  6.9376e-03, -3.3747e-03, -1.1876e-02, -5.4953e-03,
        -2.2677e-03, -2.1160e-03,  9.7407e-04,  2.2064e-03], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.0162,  0.7125, -0.0027,  0.0137,  0.1157,  0.1210,  0.0102,  0.0079,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<DivBackward0>), tensor([-3.4459e-02, -4.1855e-03, -9.5505e-03, -4.4944e-02, -7.1946e-03,
        -7.7010e-02,  6.4933e-03,  4.9894e-03, -2.3937e-03,  1.3318e-03,
         2.8169e-03, -1.8948e-01,  4.4925e-03, -8.0023e-02, -9.2630e-02,
        -1.8896e-04, -2.3406e-02,  8.9145e-03, -7.9904e-03, -1.2420e-01,
        -2.6113e-03, -5.6476e-03, -4.1792e-03, -4.0090e-03, -4.2809e-02,
        -1.3801e-01, -1.0205e-02, -3.3058e-04, -5.2231e-02, -4.4733e-03,
         5.7149e-03,  6.6075e-04, -2.7313e-04,  2.1503e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 6.4461e-03,  3.5087e-02, -3.8614e-03, -1.0108e-02,  3.5056e-02,
         2.0790e-01,  3.1031e-03,  5.5683e-03,  1.4895e-02, -4.5901e-03,
        -2.1861e-03,  3.0864e-01,  1.6716e-02,  9.3090e-03, -1.2290e-03,
        -8.5470e-03,  6.2441e-03,  4.4792e-03,  8.7374e-03,  5.0265e-03,
         8.4834e-03,  1.8338e-03, -6.7643e-03,  1.5798e-02,  1.9303e-03,
         2.7031e-02, -2.4082e-03,  1.5774e-03,  2.0144e-01, -6.8016e-03,
         2.4244e-02,  3.0224e-03, -7.6848e-04,  1.6284e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.0567,  0.0393,  0.0015,  0.4038,  0.0129, -0.0099,  0.0093,  0.0096,
        -0.0022,  0.0028,  0.0112, -0.0041, -0.0061, -0.0098, -0.0022,  0.1777,
         0.0389,  0.0040,  0.0044,  0.1024,  0.0370,  0.0038,  0.0052,  0.0229,
         0.0177, -0.0016,  0.0032,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 0.0420,  0.2999,  0.0033,  0.0133,  0.0049, -0.0036,  0.0033, -0.0024,
         0.0667, -0.0020, -0.0022,  0.0295,  0.0493,  0.4625, -0.0058, -0.0053,
        -0.0018, -0.0023,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 5.6754e-02,  2.1251e-02,  1.9821e-02,  2.3485e-02,  6.3535e-02,
         1.5124e-01, -4.5101e-03, -1.9971e-04,  9.1963e-02,  4.5318e-05,
         4.3244e-01, -2.7027e-03,  1.0151e-01,  1.5939e-02,  2.2980e-04,
         5.9942e-03,  3.9469e-03,  4.4272e-03,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-4.6330e-03,  8.8706e-03,  9.9720e-03,  4.2757e-02,  1.3763e-02,
         4.7889e-03,  1.7646e-03,  2.2358e-02,  5.8055e-03,  2.0810e-02,
         3.0108e-02,  3.9657e-03,  3.0358e-02, -2.4828e-03,  6.9813e-04,
        -4.2544e-03,  2.1706e-04,  2.0754e-01,  5.6821e-03, -4.5826e-03,
         6.5445e-03,  1.3424e-02, -3.0475e-03,  1.1336e-02,  1.2179e-03,
         5.0966e-01, -1.1839e-02,  1.4940e-03,  9.8744e-03, -4.1650e-03,
        -1.9845e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-4.7811e-02,  1.9999e-02,  1.4057e-02,  6.8151e-03, -3.1809e-03,
        -7.6193e-03,  2.3852e-01,  7.8130e-03,  1.4780e-01, -5.2454e-03,
         2.5986e-02, -4.0307e-03,  1.9810e-02, -3.0080e-03, -2.6806e-03,
        -1.0647e-03,  1.9355e-02,  4.8131e-03, -3.2206e-03, -7.1281e-03,
        -1.2274e-02, -3.7773e-03, -3.6571e-03, -2.6822e-05,  1.3521e-02,
         1.7988e-02,  9.7001e-03,  2.4667e-01,  1.3391e-02,  1.8843e-02,
         3.5357e-03,  4.4371e-03, -1.1120e-02,  5.7562e-03, -2.8968e-04,
         1.3209e-02,  2.7384e-02, -1.4722e-03,  2.9852e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-1.6113e-02, -1.5575e-02, -1.9859e-02, -9.1346e-03, -7.7184e-03,
         5.9359e-03, -4.8586e-02, -3.0862e-02, -6.4770e-03, -5.1972e-02,
        -1.2956e-01, -1.5324e-02, -2.4700e-05,  7.8848e-04, -7.1146e-02,
        -2.3143e-03, -7.2744e-03, -6.1314e-03,  2.0901e-03, -2.1170e-01,
         8.7584e-03, -4.6772e-03,  6.5475e-04, -1.6119e-03,  3.6810e-03,
        -3.8215e-02, -1.1245e-01, -3.5064e-03,  1.5169e-03,  3.4922e-03,
        -1.4677e-01, -9.3501e-03,  2.6841e-03,  2.5779e-03,  1.4653e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>)]
Iter #200: [tensor([-5.4445e-03, -1.7998e-03, -1.7058e-03, -3.4131e-03, -3.9622e-02,
        -7.6016e-01, -5.2502e-05,  9.2848e-04, -2.3527e-04,  3.4476e-04,
        -4.5196e-03, -3.6773e-04,  2.5740e-03, -3.7156e-04,  1.1529e-03,
        -1.7030e-01, -1.6860e-04,  2.3539e-03,  2.8673e-03,  1.1432e-03,
        -4.7681e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.3866e-03, -1.0914e-03, -2.9092e-02, -7.1462e-03, -1.2028e-03,
        -6.9465e-01, -5.0254e-04,  1.8766e-05, -1.5032e-03, -4.5310e-04,
         1.3410e-04, -4.5280e-05,  2.9250e-04,  2.3375e-03, -2.5679e-01,
        -8.4511e-04, -4.0206e-04, -1.3104e-03, -7.9234e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-4.9410e-03,  1.0979e-03,  2.4652e-01, -5.3167e-04,  2.6569e-03,
         1.0709e-03,  2.4743e-03, -4.7241e-04, -2.3235e-04, -2.1004e-03,
        -1.7714e-03, -1.0409e-03,  3.2436e-03, -1.8403e-03, -2.4306e-03,
         2.0008e-03,  4.8467e-04,  1.3021e-02, -5.3502e-04, -2.3755e-03,
         8.3529e-03,  5.1526e-02,  3.1544e-04,  1.6055e-01,  4.0218e-01,
        -1.8216e-03,  4.6924e-02, -4.8783e-04,  2.1373e-02, -1.2300e-03,
         1.3975e-02, -2.3583e-04, -1.8440e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 0.0892,  0.0031,  0.2271,  0.0053,  0.0570,  0.0016,  0.5835,  0.0127,
        -0.0109,  0.0023, -0.0074,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-1.9558e-03, -4.3469e-03, -7.4490e-04, -2.9494e-03,  7.8889e-04,
         3.1227e-04,  5.2310e-05, -3.8049e-01,  1.3045e-04, -1.4102e-02,
         8.5397e-04,  3.3978e-04,  4.7044e-04, -1.0754e-02, -1.4507e-03,
        -9.3902e-04, -2.9475e-04, -2.4383e-01, -1.3502e-01,  1.0923e-04,
         1.9015e-04,  1.3195e-03, -6.1509e-03,  6.6242e-04, -1.4431e-03,
        -1.2439e-03,  6.9228e-04, -7.8022e-02, -1.0888e-01,  4.6323e-05,
         7.0347e-04,  7.1894e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.0107e-04,  1.1039e-03, -3.9912e-02, -2.2520e-03,  1.2400e-03,
        -6.5961e-04, -6.5303e-03, -2.6186e-04, -5.7954e-04,  3.5893e-04,
         1.1437e-03, -3.3190e-04, -3.4204e-01,  1.1349e-03,  9.9074e-04,
        -4.4829e-01,  3.2018e-05,  1.2646e-03,  1.8586e-04, -9.6815e-02,
         9.5759e-05, -2.7552e-02, -1.8614e-03, -4.4144e-04,  1.5612e-03,
        -4.0070e-03,  9.2320e-04,  1.3192e-03, -1.0851e-03, -4.4546e-03,
         1.9603e-03, -7.6428e-03,  1.4908e-03, -3.6955e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-7.4879e-03,  2.0588e-03,  2.7272e-03,  3.9863e-03,  1.5733e-03,
        -2.4951e-04, -5.9268e-01, -3.6139e-04, -1.4212e-02, -3.0897e-03,
         8.2014e-04, -2.7365e-01,  5.4638e-04,  1.4947e-03, -8.4445e-02,
        -6.3180e-03, -1.0738e-04,  1.3710e-03,  1.5796e-03, -2.0883e-04,
         8.7700e-04, -1.5603e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.2908e-02, -1.3478e-03, -3.6625e-04, -1.4753e-04, -4.2173e-04,
        -2.4708e-02,  2.3599e-03, -1.7588e-03, -2.7644e-04, -1.1846e-03,
        -3.7749e-01, -1.9730e-03, -9.4824e-04, -5.7055e-01, -2.5974e-03,
         4.6282e-04,  9.2963e-06,  4.9342e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 2.3870e-02,  2.1047e-02, -3.0747e-03, -1.7795e-04,  8.4270e-01,
        -3.5465e-03,  1.6011e-02,  8.1633e-02, -5.4696e-03,  2.4703e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 4.8680e-03,  3.4165e-02,  1.2521e-02,  2.6498e-03,  7.7768e-02,
        -1.6828e-03, -2.1772e-03,  6.8291e-03, -6.8279e-04,  1.1645e-04,
         4.5813e-01,  9.0159e-03,  1.0613e-01,  5.7708e-03, -3.3836e-03,
         3.6387e-03,  1.5334e-03, -7.0446e-04,  2.0475e-03,  1.9028e-01,
        -3.5367e-04,  4.5877e-04,  4.6312e-02, -9.6548e-04,  7.5202e-04,
         6.1381e-03,  1.2353e-02,  9.8577e-04,  3.9745e-03,  1.1270e-04,
        -6.7573e-04, -1.7563e-03, -1.8842e-04, -8.7508e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.1777e-02,  4.9121e-04, -3.7265e-03, -2.4724e-01,  3.3246e-04,
         9.5180e-04, -1.3502e-03, -1.9519e-01, -2.1677e-02,  8.1026e-04,
         4.6553e-04,  1.1739e-04, -2.4703e-03, -6.6305e-04,  1.2350e-03,
         5.3394e-04, -4.5548e-04, -2.5675e-01, -1.3575e-03,  4.4329e-04,
        -1.1558e-02,  8.7515e-04, -7.2042e-03, -2.2866e-01, -1.1777e-03,
         2.3043e-03, -3.5056e-05, -1.4696e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-7.3961e-03,  5.9782e-04,  9.0213e-04, -1.3802e-04, -3.3856e-03,
        -1.5099e-01,  2.1025e-04, -8.5123e-03,  1.4720e-03, -1.3366e-03,
        -7.2875e-04,  3.3413e-04,  1.0452e-03, -3.1398e-04,  6.6891e-04,
        -2.0828e-03, -2.2362e-01, -9.3406e-04, -1.8404e-03, -1.9900e-03,
        -5.9592e-02,  2.8683e-04,  6.9344e-04,  3.8649e-04, -1.9833e-04,
        -7.1659e-02,  9.9065e-04, -9.4824e-02,  4.4689e-04, -1.1244e-03,
        -2.4611e-01,  9.7968e-05,  1.2605e-03, -7.6949e-03, -1.0453e-01,
         7.8730e-04, -6.0934e-05, -7.5181e-04], device='cuda:0',
       grad_fn=<DivBackward0>)]
Iter #250: [tensor([ 4.4140e-03,  1.0670e-02,  2.6201e-05, -3.0376e-04,  1.5426e-04,
         3.2383e-04,  1.8995e-03, -7.4474e-05,  6.8452e-04,  6.5694e-03,
         2.6277e-01,  1.7600e-03,  5.5141e-05,  5.7787e-01,  2.7582e-03,
         2.3683e-04,  9.6497e-02,  1.3876e-03,  1.3418e-03, -4.5262e-04,
         1.6829e-03,  2.5699e-02,  1.8153e-03, -1.8753e-05,  2.3783e-04,
        -2.8986e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 3.2090e-03,  1.2419e-03,  1.1048e-04, -9.3070e-01,  5.2262e-04,
        -4.7299e-04, -8.1104e-05, -2.3797e-03,  1.1456e-03, -4.9877e-02,
        -7.5217e-04, -9.4674e-04, -1.2079e-03, -1.0439e-04,  3.5180e-04,
         5.7405e-04,  4.5927e-04,  1.0080e-03, -3.7226e-04, -2.2632e-03,
         8.5151e-04, -7.8165e-04, -5.8696e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.3094e-02,  1.1145e-03, -2.7633e-04, -3.5878e-05,  1.6032e-03,
         1.0616e-03,  4.8134e-04, -8.2997e-04, -1.0023e-03,  6.8025e-04,
         3.8094e-02,  8.7649e-01,  7.4011e-05, -1.6368e-03, -7.5162e-04,
         1.7683e-04,  8.3430e-04,  7.5763e-04, -4.3215e-04,  4.5288e-04,
         5.3751e-02, -1.8996e-03,  1.1134e-03,  8.4622e-04, -1.6346e-03,
        -4.5999e-04, -4.1729e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-1.0553e-02, -1.4458e-03,  6.1549e-04, -1.3581e-04, -1.9497e-04,
         7.7034e-04, -4.2867e-01,  1.1426e-03,  2.7775e-06,  2.8621e-04,
        -8.6632e-04,  1.0101e-03,  2.7904e-04, -6.0554e-04, -1.9328e-03,
        -5.4656e-01,  8.1711e-04, -2.0449e-04,  2.3659e-03, -7.0491e-04,
         4.6915e-04, -2.9031e-04, -7.6658e-05,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-4.1351e-03, -2.2567e-01,  5.4653e-04, -1.7730e-02,  6.0563e-04,
         5.6391e-04, -2.0702e-01,  9.7416e-04,  3.0413e-04, -8.2421e-04,
        -9.8609e-02,  1.3286e-03,  5.0368e-04, -1.6080e-04,  8.4719e-04,
         9.9967e-06, -4.3166e-01,  1.4581e-03,  1.6106e-04, -9.9754e-05,
        -4.0994e-03, -7.7619e-04, -1.6939e-03,  3.4735e-05,  1.7583e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 4.7691e-04,  2.2917e-03, -4.3504e-03, -1.1762e-03, -2.8807e-03,
        -3.5815e-01,  1.1203e-03, -4.7782e-02, -5.7759e-01, -1.2439e-03,
        -1.3504e-03, -1.5853e-03,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-2.0770e-03,  6.0269e-05,  3.8585e-05,  5.2729e-04, -3.1829e-04,
         3.6409e-03,  1.7381e-03, -8.3871e-04, -4.4017e-04, -7.2194e-04,
         4.9083e-04,  1.3372e-03,  5.5648e-04, -1.5283e-03, -5.1771e-04,
        -1.3609e-04,  5.6763e-01, -5.5832e-04, -1.0623e-03,  2.3483e-04,
         3.9967e-02,  2.3928e-04,  2.5971e-04,  6.7977e-04, -1.0802e-03,
        -7.8620e-04,  3.6507e-01, -5.9617e-04,  2.8141e-04,  7.7381e-05,
         3.3627e-04, -9.1126e-04, -1.7748e-03, -2.8146e-04,  2.0950e-03,
         4.9728e-04, -2.1647e-04, -1.8858e-04,  2.0754e-04], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 5.1672e-03, -1.7151e-03, -1.1762e-03, -7.3470e-01, -8.8675e-04,
        -1.4647e-03, -8.0316e-04,  4.9129e-04,  2.0692e-04, -5.1300e-04,
        -7.2479e-04,  4.2349e-04, -1.5886e-01, -1.5124e-03, -3.6817e-04,
         6.8306e-04,  6.0705e-04,  2.6090e-04, -1.1564e-03,  7.2642e-04,
        -5.1358e-04, -7.8502e-02, -1.3720e-03, -8.8987e-04, -1.3050e-03,
        -9.2039e-04,  7.3015e-04,  8.3439e-04,  8.9568e-04, -3.8543e-04,
        -4.4317e-04, -3.3107e-04,  4.2955e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 6.8774e-03, -3.5850e-04,  4.9067e-04, -8.1729e-03, -5.7695e-04,
         2.0150e-04, -2.6460e-02, -7.3364e-01, -1.7031e-03,  7.3929e-04,
        -1.2678e-03, -5.3996e-03, -5.2272e-03,  3.7617e-04, -1.7714e-01,
         7.0616e-04, -1.4842e-03, -1.9803e-03, -1.9681e-03, -6.2190e-04,
        -8.1336e-04,  9.2352e-04, -5.7928e-04,  1.6733e-04, -2.4341e-04,
        -1.9615e-02, -2.5080e-04,  6.0805e-04,  5.0616e-04, -6.8765e-04,
        -2.1970e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 2.4491e-03, -4.6555e-01,  5.4655e-04, -6.6083e-03,  1.7838e-04,
         9.1487e-04, -2.6360e-03,  9.6157e-04, -3.9946e-02, -2.5631e-04,
        -4.3571e-01, -8.0292e-04, -2.9163e-02, -1.0764e-02,  1.0827e-03,
        -1.3999e-03, -7.1939e-04,  1.3957e-04,  1.6671e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-8.1771e-03,  4.7004e-04, -1.3699e-04, -5.1606e-04, -4.4126e-04,
         4.4150e-05,  5.8555e-06, -1.0667e-03,  1.1365e-03, -5.1551e-04,
        -6.7573e-05, -9.9636e-04,  2.6589e-04,  9.7253e-01, -2.1812e-06,
         3.8150e-04,  2.3771e-03,  3.4721e-04, -1.0067e-04, -4.9047e-04,
        -1.1282e-04,  5.1234e-04, -3.4519e-04, -3.7054e-04,  1.6976e-03,
         1.1496e-03,  4.1802e-05, -5.2832e-04,  8.4224e-05,  1.0523e-03,
         1.0086e-04,  2.0342e-03, -2.0436e-04,  7.2720e-04,  2.1949e-04,
        -3.9004e-04,  3.3041e-05, -1.5603e-04, -1.6952e-04], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 7.0833e-03,  9.1887e-01,  7.5935e-04, -1.0590e-03, -5.1569e-04,
         4.0237e-03,  1.2401e-03,  8.8425e-04,  5.2865e-02,  1.8845e-03,
         1.0755e-03,  6.1603e-04, -3.1424e-03, -9.1910e-04, -2.2288e-04,
        -1.9245e-04,  6.1838e-04,  2.0426e-03,  1.2304e-03,  7.5640e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>)]
Iter #300: [tensor([ 1.2637e-02,  1.2321e-03,  2.2672e-02,  1.4262e-02, -7.5861e-03,
         9.3684e-01,  2.6558e-03, -8.8173e-04, -1.0619e-03,  1.7373e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 9.6301e-03,  5.2028e-04, -1.2698e-01,  1.0129e-03, -3.9403e-01,
         2.7350e-04,  2.2669e-04, -5.5717e-04, -4.2718e-01,  3.0053e-04,
         5.9649e-04,  7.1728e-04, -2.0681e-03, -3.3426e-02, -4.4197e-04,
        -4.1306e-04,  9.3443e-04, -6.8845e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 1.7709e-03, -1.7634e-04, -3.0727e-04, -1.9230e-04,  4.2663e-02,
         2.3258e-01,  1.0525e-03,  5.3827e-04,  5.7518e-04, -6.4114e-05,
         4.9712e-04,  9.2742e-04,  1.1370e-02,  7.1812e-04, -5.2203e-04,
        -7.6107e-06, -3.5375e-04,  3.1269e-02,  4.8419e-01,  1.8568e-03,
         1.8667e-01, -7.7766e-04,  1.5905e-04, -1.0000e-04,  3.0165e-04,
         1.0744e-04,  2.5789e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 2.6632e-03,  2.5262e-03,  6.6243e-04, -3.1235e-03, -9.1700e-01,
         1.4801e-03, -6.6043e-02,  2.2301e-03,  2.5510e-04,  1.2610e-03,
         8.3269e-04,  5.0742e-04,  4.5192e-04,  4.3807e-04, -5.2644e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-2.1757e-04, -5.8824e-04,  2.9689e-04,  1.3850e-04, -1.1765e-04,
        -6.3666e-04, -3.5899e-04,  6.9430e-04,  2.2833e-04,  1.4838e-03,
         2.5319e-04,  3.1836e-03, -3.2809e-04,  6.7867e-04,  5.7500e-04,
        -3.2074e-04, -1.1373e-04,  2.4709e-04,  4.0425e-01, -4.7896e-04,
         3.2854e-05,  3.6742e-04,  7.7352e-05, -4.6911e-04,  5.8174e-01,
        -2.0300e-04,  5.2433e-04,  2.2399e-04, -1.6340e-04, -1.1553e-04,
        -2.2679e-04,  6.7170e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-1.7993e-03, -2.1038e-04,  3.0543e-04, -3.6591e-01,  3.3716e-04,
        -2.2280e-04, -5.2123e-04, -1.1356e-03, -8.5960e-04,  1.0477e-03,
         3.9047e-04, -7.8628e-03,  4.0326e-04, -3.0975e-05, -2.8763e-04,
         2.4009e-04,  5.1699e-05, -2.6765e-04,  1.0284e-04, -1.2607e-01,
         2.9774e-04, -1.5127e-03, -2.6659e-03, -1.7693e-01,  4.3361e-04,
         5.6421e-05, -2.6865e-01, -7.1386e-05, -1.3954e-03, -6.5988e-05,
         5.6074e-04,  3.3838e-04, -9.0413e-03, -2.7088e-02,  1.0481e-03,
         4.4281e-04,  9.8723e-04, -1.1342e-04, -2.5177e-04], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-4.4499e-03, -1.0363e-03,  2.2574e-04, -5.4152e-04, -1.4096e-04,
        -2.2130e-04, -1.0746e-03,  4.7285e-01,  1.1878e-04,  6.3699e-04,
        -3.0099e-04,  1.7388e-02, -5.6126e-04,  3.4523e-04,  4.9473e-04,
         1.8073e-04, -7.6700e-04,  1.6699e-04,  1.9486e-01, -2.2869e-04,
         2.9026e-01, -3.8219e-04, -6.2455e-04, -4.9559e-04, -2.0587e-04,
         1.0056e-05,  1.0235e-03, -6.7019e-04, -2.3775e-06,  8.9709e-03,
         6.8660e-04,  1.8828e-06,  3.1941e-05, -4.3852e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-5.0080e-03,  3.8857e-02,  2.7408e-03, -5.8557e-04,  7.4626e-01,
         3.5104e-02,  1.1111e-01,  5.0989e-02, -1.5908e-03, -7.7590e-03,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 3.6969e-03, -7.1901e-04, -1.8812e-04, -7.2949e-05, -2.9834e-04,
         1.1023e-03, -6.4665e-06, -2.8878e-01,  4.3465e-05, -3.1897e-04,
        -8.5902e-04, -2.9509e-04,  1.6616e-04,  1.2124e-03, -2.3477e-04,
        -8.6585e-05, -4.3263e-01,  5.2843e-05,  1.6277e-04, -2.3528e-03,
         3.1816e-04, -5.7530e-04, -2.4810e-01, -1.2003e-04, -8.5927e-05,
         2.3981e-05,  4.5010e-04, -1.4732e-02, -1.6942e-04, -8.9578e-04,
        -4.9266e-04, -7.6234e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 4.5083e-03,  1.8623e-06,  1.3619e-04, -5.9979e-04, -2.5650e-04,
        -1.0600e-02,  4.9852e-04,  1.1394e-04,  7.3734e-04, -9.2026e-04,
         1.2260e-03,  4.4773e-04, -3.5824e-02, -7.6520e-05,  3.3044e-06,
        -5.7877e-05, -9.7533e-02, -3.6973e-01,  1.8450e-04,  6.7698e-04,
         5.0981e-04, -1.2347e-03, -3.6156e-01,  9.3639e-04,  6.1732e-04,
         4.2730e-04, -1.0960e-01, -1.9761e-04,  3.2389e-04,  2.6439e-04,
         9.4318e-05,  1.0671e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([-9.2847e-04,  7.7270e-01,  1.1219e-03,  1.3149e-03,  2.4638e-04,
        -1.9573e-03,  3.8214e-04, -2.0942e-05, -2.0220e-04,  2.3486e-03,
         2.7594e-03, -6.6015e-04, -2.0225e-03,  1.0090e-03, -5.0467e-04,
        -6.6832e-04,  1.9756e-04,  5.8357e-04,  2.0802e-01,  2.7321e-04,
         5.3255e-04,  4.4199e-06, -1.0225e-03,  5.2194e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 5.3612e-03, -3.2541e-03, -1.6766e-04, -1.3849e-04,  1.1531e-03,
        -1.8186e-03, -1.2115e-04, -1.2737e-03,  5.0693e-04, -1.3771e-03,
         1.1409e-03, -1.7055e-03, -4.2254e-04,  1.1958e-03, -4.6097e-05,
        -9.7810e-01,  1.0041e-03, -7.1192e-04,  1.4034e-04, -3.4365e-04,
        -1.6912e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00], device='cuda:0',
       grad_fn=<DivBackward0>)]
Iter #350: [tensor([ 1.8634e-03,  3.8826e-05, -1.8684e-04,  1.4401e-04, -8.5649e-05,
         6.0756e-04, -5.2408e-04, -3.2863e-04,  1.7979e-04,  4.5853e-07,
         3.0577e-04,  3.8822e-04, -3.6312e-04, -1.0245e-03, -2.5379e-04,
        -6.9723e-04, -1.8717e-04, -1.8779e-05, -1.5210e-05,  2.8262e-04,
        -1.0181e-04, -2.6128e-04, -3.9649e-04,  1.4677e-04, -2.6012e-01,
         7.4576e-04,  1.3105e-04, -3.4693e-04, -4.9037e-04, -1.9508e-04,
        -7.0112e-01, -4.9218e-04, -2.6496e-04, -1.1991e-04,  2.7964e-04,
         3.8905e-04, -2.6706e-02,  5.9065e-05,  1.9413e-05, -1.2241e-04],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([ 3.7103e-03,  3.2899e-04, -3.4112e-04,  1.0282e-03, -3.5629e-04,
         6.4760e-01,  4.4937e-04, -2.4272e-04,  3.8024e-04, -7.3398e-05,
         2.0831e-04, -2.1016e-05,  9.5029e-04,  8.7872e-02,  5.5972e-04,
        -9.3312e-05, -1.5211e-05,  2.4291e-03,  5.4419e-05, -2.3935e-05,
         1.0633e-03,  2.4330e-01,  8.5640e-04,  7.3731e-04, -3.0608e-04,
         4.5561e-03,  1.6790e-03,  3.5994e-04,  1.0298e-05,  6.7748e-05,
         3.2681e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([ 3.6103e-03, -2.2957e-01,  6.3684e-05, -1.8929e-01,  3.3911e-04,
        -3.1270e-05,  2.3405e-04, -3.5751e-04,  8.7629e-05,  1.2728e-04,
        -3.5201e-04,  2.1528e-04, -3.4582e-01,  1.2169e-04, -1.5902e-04,
        -7.3500e-04,  5.2055e-05,  3.9557e-04, -1.1578e-03, -2.4514e-04,
        -1.0567e-03, -1.1592e-03, -5.5375e-05, -1.0482e-03, -4.9363e-04,
        -2.4413e-04, -4.2425e-03,  8.1108e-05, -2.1807e-01, -6.9840e-05,
         1.6086e-04, -2.2730e-04,  1.3104e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([ 6.0886e-03, -4.3717e-03,  3.7501e-04, -1.4948e-03,  4.4024e-04,
        -6.4383e-04, -3.9422e-04, -1.7720e-03, -1.0357e-02,  4.1129e-04,
        -5.7244e-02, -4.4753e-01,  3.4505e-04, -4.6737e-01,  6.3429e-04,
        -3.5604e-04, -1.7430e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([-2.3621e-05, -4.8489e-01, -2.2490e-04, -2.2653e-02, -6.4010e-04,
        -1.9183e-04, -3.4882e-04,  7.9134e-04, -3.5316e-03,  1.3679e-04,
        -6.0499e-02, -1.0139e-04,  7.4935e-04, -5.3072e-04, -4.6543e-05,
        -1.3042e-04, -2.1985e-04,  1.5706e-04, -1.4684e-01,  2.7239e-04,
        -1.1761e-04, -1.7740e-03, -8.1062e-05,  8.5212e-06, -2.4362e-04,
        -9.1767e-02, -1.0370e-03,  1.5336e-04, -1.8140e-01, -3.9751e-04,
        -2.9651e-05, -1.9100e-05,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([-8.5416e-03,  9.7940e-03,  6.2875e-01,  4.1743e-02,  1.2344e-03,
         1.8796e-03, -7.6692e-04,  3.8517e-03, -3.3882e-03, -6.1485e-04,
        -4.3994e-04,  1.9152e-02,  4.8025e-02, -4.3173e-03,  2.0421e-01,
        -9.0813e-04,  1.6833e-02, -1.5747e-03, -3.9819e-03,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([-3.7002e-03, -2.8145e-01,  4.5550e-04, -5.8876e-05, -1.3575e-04,
         7.4395e-05,  6.1949e-04,  3.5928e-04, -2.7015e-04,  1.7816e-04,
        -4.2630e-04,  7.8392e-05, -4.6713e-01,  1.3389e-04,  4.8477e-04,
         6.6953e-04,  1.7397e-05, -2.9111e-04, -1.1120e-01,  7.7776e-04,
        -3.5256e-03,  2.3147e-04,  9.7631e-05,  5.1280e-05,  7.7369e-04,
        -2.8198e-04, -1.2620e-01,  1.5379e-04, -1.6923e-05,  1.7034e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([ 4.1940e-03,  2.3075e-03,  8.9585e-04, -5.5492e-04, -1.1783e-03,
        -4.2915e-04,  9.2534e-04, -4.6211e-04,  3.6668e-04,  9.0495e-04,
         5.6154e-04, -9.3320e-04,  8.4131e-01, -2.4378e-03,  5.6739e-04,
        -3.8952e-04, -9.4585e-05,  1.1164e-02,  1.2852e-01,  8.4825e-04,
         2.9187e-04,  6.6663e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([-5.4707e-03, -3.3019e-01,  1.5932e-04,  2.2658e-04,  1.6966e-05,
        -4.5282e-06,  3.9764e-05, -1.2443e-02,  5.9996e-04,  1.3463e-04,
        -3.2637e-04, -2.7581e-01, -2.1641e-05,  1.8256e-04, -2.4294e-04,
        -5.9668e-04, -1.6332e-03, -3.5608e-04, -3.0882e-04, -1.4948e-04,
         6.1361e-05, -2.1267e-04, -3.3589e-04, -1.3917e-03, -1.8342e-01,
         5.5760e-04, -5.2346e-04, -2.8429e-04,  3.5006e-04,  4.3558e-04,
        -1.2063e-01, -1.8815e-04,  3.8974e-05, -6.3096e-05, -6.1345e-02,
        -3.7752e-05,  9.0506e-05, -2.3602e-04, -8.8258e-04,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([-2.7193e-03, -6.9118e-04,  1.1351e-04,  2.6550e-03, -9.1534e-05,
         9.8283e-01,  6.5467e-04,  1.0840e-03, -8.9409e-04, -2.4996e-04,
        -1.1689e-04,  2.4148e-03, -6.6982e-04,  2.9523e-03,  6.5269e-04,
         4.9035e-04, -5.4716e-04, -1.7291e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([-5.7029e-03, -1.0225e-03,  4.3646e-04,  3.1606e-02, -8.4151e-04,
         7.0806e-04, -7.9795e-05, -3.1449e-04,  2.2163e-04,  3.1240e-04,
        -1.7573e-04, -7.7730e-05, -1.0666e-03,  8.9331e-05,  1.2227e-01,
         7.9014e-04, -5.5214e-04, -1.7286e-05,  1.8562e-03,  2.2897e-03,
        -2.1836e-04, -3.4779e-05,  6.8017e-01,  4.1184e-04,  1.4533e-01,
         5.7063e-04,  4.2147e-04,  4.8479e-04,  1.2208e-04,  1.1301e-03,
        -4.4164e-04, -8.9189e-05,  1.3632e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>), tensor([ 2.3180e-04,  6.5014e-04, -3.9073e-03, -5.0646e-04,  9.9389e-04,
         2.7011e-04,  2.3984e-04,  7.5935e-05, -1.1651e-04, -8.8634e-02,
        -3.4454e-04, -4.1637e-04,  3.2695e-04,  2.1344e-04, -1.4236e-01,
        -5.8333e-01,  1.0474e-04,  4.1352e-05,  5.4715e-05,  3.1405e-04,
         2.1703e-04,  5.3709e-05, -6.6684e-04, -6.5492e-05,  9.1920e-06,
         4.8119e-04,  2.7238e-04,  2.7275e-04, -2.4715e-03, -1.7001e-01,
        -5.5263e-04, -3.7814e-04, -1.5133e-05, -2.7327e-04, -3.0857e-04,
         3.4067e-04, -1.5978e-04, -3.1727e-04,  0.0000e+00,  0.0000e+00],
       device='cuda:0', grad_fn=<DivBackward0>)]
Iter #400: [tensor([ 5.9460e-03,  5.6252e-01, -3.0330e-04,  6.9945e-02,  1.3250e-03,
         3.6338e-03,  1.7586e-04,  6.2169e-04,  1.0440e-05,  3.0638e-03,
         3.6038e-03,  4.6429e-04, -1.0637e-03,  4.3439e-04,  9.8835e-05,
        -6.3772e-04,  8.1640e-04,  2.0115e-04,  3.6119e-04,  1.5325e-03,
         4.8861e-04,  2.0926e-04, -1.1537e-04,  5.0835e-05,  3.4155e-01,
         3.5145e-04, -1.0371e-04, -3.7133e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 1.9241e-03,  2.5953e-06, -5.1680e-01, -1.0708e-04,  3.3122e-04,
        -1.0684e-01, -3.0777e-04, -4.1715e-02,  5.6387e-04, -6.4582e-05,
        -8.7046e-04, -6.0495e-03,  4.7217e-04,  2.5918e-04,  3.2882e-04,
        -3.8404e-04, -4.2772e-04, -1.7960e-01,  1.1833e-04, -1.3363e-03,
        -6.4010e-04,  1.1532e-04,  1.3558e-04, -1.4185e-02,  1.8246e-04,
        -1.2929e-03, -1.8182e-04, -2.9475e-02,  1.0517e-04,  5.5132e-05,
        -9.2678e-02, -3.6086e-04,  3.1567e-05, -5.6671e-04,  6.2872e-05,
         5.9501e-04, -4.0960e-04,  1.1060e-04, -3.0681e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([-0.0413,  0.0781,  0.1470,  0.1604, -0.0176,  0.1687, -0.0470,  0.2739,
         0.0234, -0.0179, -0.0248,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000], device='cuda:0',
       grad_fn=<DivBackward0>), tensor([ 2.5384e-03, -1.1194e-03, -8.0177e-01,  2.2597e-04, -7.6418e-02,
        -7.0558e-02, -3.5302e-04,  2.7159e-03,  3.1431e-05,  5.0407e-04,
        -4.1156e-02,  3.8671e-04, -1.0149e-03, -8.7172e-04,  3.3050e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 8.6937e-04,  4.0960e-04, -4.0628e-01, -5.0802e-04,  3.7370e-04,
        -1.8236e-04, -1.0638e-04, -4.7127e-05,  1.2412e-04, -8.9956e-04,
        -1.1901e-02, -1.4986e-03,  3.3288e-04, -1.6643e-04, -3.1550e-04,
         7.4838e-07,  2.2100e-05, -5.5870e-02, -2.6385e-01, -7.0507e-04,
         4.9890e-05, -3.2943e-05, -1.7540e-02, -6.3980e-04, -2.4698e-04,
         1.3385e-04, -8.3757e-03, -4.5905e-02, -2.7074e-04, -2.3254e-02,
        -5.0571e-04,  3.6171e-04, -1.5745e-01,  3.9110e-04, -2.2756e-04,
         1.5417e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 6.5420e-03,  9.8773e-01,  1.0398e-03, -2.9788e-04,  1.2315e-03,
         1.8630e-03, -3.4561e-04,  1.4614e-04,  4.4928e-04,  3.5857e-04,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([-1.1997e-02,  5.1184e-04, -5.0908e-01, -3.7157e-04, -1.2889e-03,
        -8.0189e-05,  1.0001e-03,  2.5819e-04, -1.3488e-03, -3.8654e-04,
         5.8125e-04, -1.6941e-01, -1.7060e-05, -3.0742e-03,  4.6374e-04,
        -8.7053e-04,  5.1698e-04,  3.6161e-05, -4.0923e-04,  6.2328e-04,
        -1.3810e-03, -2.9404e-01,  5.8598e-04, -3.1587e-05, -1.3119e-04,
         9.7397e-04,  3.0877e-04, -1.7812e-04, -3.9301e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 2.9239e-05, -5.3767e-02, -4.0294e-01,  2.1516e-04, -3.9631e-04,
         3.2620e-04,  7.5577e-05, -5.5833e-05,  2.8730e-04,  2.2853e-04,
         6.5634e-05, -3.2860e-04,  7.8760e-05, -2.2458e-04,  8.8835e-05,
        -6.5615e-05, -2.4839e-03, -6.3853e-05,  3.4115e-04, -1.1945e-01,
         2.8481e-04,  4.6724e-05, -4.0208e-04, -1.2711e-04, -2.5071e-04,
         1.0538e-04,  1.2025e-04, -1.6594e-01, -2.5036e-01,  1.9728e-04,
        -8.2256e-06, -4.9722e-04,  8.0052e-05,  7.1064e-05,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 1.3044e-03,  3.8758e-04, -9.2799e-02,  2.7279e-05, -1.2185e-04,
        -2.8939e-04, -9.2225e-05, -2.0724e-04, -1.1621e-03, -7.4789e-05,
        -4.0255e-04, -7.7194e-05, -7.4190e-02,  5.2607e-04, -3.8190e-05,
        -1.7893e-04, -1.1829e-01,  4.3005e-04, -7.7646e-05,  1.5070e-04,
        -6.8808e-02, -1.5878e-04, -3.6053e-05, -1.3544e-02, -2.1364e-04,
         1.3951e-04, -2.2099e-05, -2.0280e-04,  7.1017e-07, -3.0164e-02,
        -2.9852e-04, -1.0109e-01,  3.2481e-04, -1.1908e-01, -1.9365e-04,
         3.8864e-04, -2.9992e-04, -3.2559e-01, -1.8856e-05, -4.7320e-02,
        -1.3315e-04,  2.7292e-04,  3.2337e-04,  2.5279e-04, -1.4980e-05,
         2.8224e-04], device='cuda:0', grad_fn=<DivBackward0>), tensor([ 8.6817e-04,  5.8476e-04, -3.2270e-02, -1.6497e-01, -5.5961e-05,
         5.0502e-04, -2.8855e-04, -2.7598e-05, -1.4568e-02,  5.2378e-04,
        -1.4831e-01, -2.8966e-05,  1.0746e-04, -1.8523e-01,  4.2789e-05,
        -4.4845e-01,  2.6491e-04, -1.3432e-04, -2.2429e-04, -3.4774e-04,
        -6.0016e-04, -8.0767e-06, -2.7585e-04,  1.6258e-05, -4.2683e-04,
         7.3325e-04, -1.3102e-04,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([-4.4266e-03, -4.8018e-04,  4.6290e-04,  1.1664e-04,  2.6103e-03,
        -1.6327e-04,  1.7862e-03,  2.8901e-03,  5.1990e-04,  2.4138e-02,
         1.2837e-03,  6.2239e-04, -3.7857e-04,  1.3289e-03,  3.1909e-04,
         1.5363e-04,  2.7227e-03, -3.7238e-04,  3.3929e-04,  7.1891e-04,
         1.3621e-04,  6.9216e-04,  3.6931e-03,  3.6960e-04,  9.4655e-01,
        -2.7040e-04,  3.2555e-04, -7.6999e-04,  5.0954e-05,  5.5224e-04,
         5.0070e-04, -3.3529e-05,  2.2086e-04,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>), tensor([-1.3647e-03,  4.5143e-04, -5.4459e-04, -1.4490e-04, -1.2675e-04,
        -7.8594e-04, -1.7166e-04, -4.6787e-03,  1.3029e-04,  4.1100e-04,
        -2.1872e-04,  1.0866e-04, -3.1226e-01,  2.0947e-04, -2.5098e-05,
         2.3140e-05, -1.3811e-04, -3.0448e-04, -5.2540e-04,  1.9878e-05,
        -2.7772e-04,  1.9317e-05, -1.7835e-01, -6.0055e-04, -1.8424e-04,
        -1.7378e-04, -4.7452e-01,  1.3855e-04, -5.1939e-04, -9.6863e-05,
        -2.1254e-02, -8.4614e-05,  6.9832e-05,  2.2031e-05,  4.7783e-04,
         7.0143e-05, -1.3162e-04,  1.3300e-04, -2.3954e-04,  0.0000e+00,
         0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00,
         0.0000e+00], device='cuda:0', grad_fn=<DivBackward0>)]
