Iter #50: tensor([[ 0.9501, -0.9088, -0.6548],
        [ 0.4274, -0.6124, -0.3517],
        [ 0.6331, -0.2570, -0.3707],
        [ 0.6458, -0.0437, -0.0334],
        [ 0.2847, -0.2669, -0.2021],
        [ 0.6826, -0.4705, -0.1821],
        [ 0.6528, -0.2993, -0.4806],
        [ 0.2753, -0.0209,  0.1612],
        [ 0.1471, -0.1476, -0.0085],
        [-0.0194, -0.0191,  0.3112],
        [-0.7750,  0.6987,  0.5683],
        [-0.5201,  0.1081,  0.1718]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #100: tensor([[-1.6500,  1.8232,  0.5496],
        [-2.5110,  1.2819,  1.4817],
        [-2.6349,  1.3426,  1.3855],
        [-2.5882,  0.8455,  2.0157],
        [-2.6677,  0.8666,  2.0387],
        [-2.6846,  0.7310,  2.0866],
        [-2.7111,  1.0158,  1.8165],
        [-2.7633,  1.0342,  1.7942],
        [-2.5826,  1.0511,  1.8718],
        [-2.6509,  0.9679,  1.8706],
        [-2.3466,  1.5273,  1.5254],
        [-2.4379,  1.2498,  1.3191]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #150: tensor([[-2.8152,  0.1121,  3.0899],
        [-2.8299, -0.1290,  2.9540],
        [-2.6562,  0.0033,  3.0366],
        [-2.6810, -0.2189,  3.1305],
        [-2.6543,  0.0652,  2.9220],
        [-2.7091,  0.0926,  2.8619],
        [-2.8067, -0.0426,  2.9952],
        [-2.5171,  0.0212,  3.2210],
        [-2.5402,  0.0236,  2.8764],
        [-2.9403, -0.1952,  2.9447],
        [-2.7435,  0.0056,  2.9171],
        [-2.5253, -0.0908,  2.8481]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #200: tensor([[-2.6423e+00,  2.1366e-02,  3.0029e+00],
        [-2.6385e+00, -1.9409e-01,  3.1487e+00],
        [-2.7087e+00, -3.3730e-02,  3.1387e+00],
        [-2.9026e+00, -1.7617e-01,  3.1699e+00],
        [-2.6752e+00, -5.6230e-02,  3.1758e+00],
        [-2.7028e+00, -2.0401e-03,  3.1053e+00],
        [-2.8392e+00, -8.2225e-02,  2.9834e+00],
        [-2.7667e+00, -7.9601e-02,  3.1667e+00],
        [-2.7604e+00,  3.2758e-02,  3.0686e+00],
        [-2.7672e+00, -1.0287e-01,  3.1035e+00],
        [-2.7904e+00, -9.2947e-02,  3.1014e+00],
        [-2.7003e+00, -2.7799e-01,  3.0393e+00]], device='cuda:0',
       grad_fn=<AddmmBackward>)
Iter #250: tensor([[-2.7867,  0.0800,  2.9714],
        [-2.7540,  0.0469,  2.9600],
        [-2.7812,  0.0617,  3.1282],
        [-2.7868, -0.1300,  3.1063],
        [-2.7492,  0.1416,  2.9878],
        [-2.7531, -0.0626,  3.1263],
        [-2.8501, -0.1255,  2.8389],
        [-2.8675, -0.0592,  3.0436],
        [-2.8124, -0.0735,  3.0501],
        [-2.9211, -0.0726,  3.0672],
        [-2.9519, -0.0604,  2.8983],
        [-2.8574, -0.1338,  3.0559]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #300: tensor([[-2.8117e+00,  1.4361e-01,  2.9874e+00],
        [-2.7530e+00,  1.0134e-01,  2.8311e+00],
        [-2.6772e+00,  2.2773e-01,  2.9870e+00],
        [-2.9588e+00,  2.9828e-03,  2.9416e+00],
        [-2.8396e+00,  1.2772e-02,  3.0263e+00],
        [-2.7467e+00,  2.4451e-01,  3.0394e+00],
        [-2.8815e+00,  6.6948e-03,  2.9569e+00],
        [-2.6570e+00, -5.2686e-02,  3.0303e+00],
        [-2.8225e+00,  9.4350e-02,  2.9083e+00],
        [-2.9180e+00,  4.0473e-02,  2.8341e+00],
        [-2.9328e+00,  1.1400e-01,  3.0768e+00],
        [-2.9317e+00,  6.4215e-02,  2.8890e+00]], device='cuda:0',
       grad_fn=<AddmmBackward>)
Iter #350: tensor([[-2.8821,  0.1574,  2.9150],
        [-2.8164,  0.0994,  2.9550],
        [-2.7760,  0.2416,  2.9465],
        [-2.8919,  0.0146,  2.8396],
        [-2.7850,  0.2098,  2.6431],
        [-2.7196,  0.1293,  2.8630],
        [-2.8419,  0.2581,  2.9637],
        [-2.9571,  0.0614,  2.8488],
        [-2.7807,  0.2372,  2.8107],
        [-2.9332,  0.1725,  2.9163],
        [-2.9228,  0.1500,  2.7638],
        [-2.8222,  0.2899,  2.9865]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #400: tensor([[-2.8496,  0.4009,  2.6701],
        [-2.8545,  0.3281,  2.6618],
        [-2.9529,  0.3934,  2.6881],
        [-2.9021,  0.4025,  2.7560],
        [-2.8855,  0.1627,  2.6306],
        [-2.9641,  0.4052,  2.8784],
        [-2.9258,  0.3449,  2.6743],
        [-3.1840,  0.2696,  2.5768],
        [-2.9683,  0.3759,  2.6178],
        [-2.7601,  0.3799,  2.5512],
        [-3.0406,  0.3954,  2.5952],
        [-3.0667,  0.3812,  2.5923]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #450: tensor([[-3.1617,  0.5780,  2.3285],
        [-2.7801,  0.4663,  2.4817],
        [-2.8417,  0.7283,  2.2995],
        [-3.0156,  0.5758,  2.5120],
        [-3.1426,  0.6659,  2.4540],
        [-2.9815,  0.7151,  2.4345],
        [-2.8225,  0.6531,  2.5144],
        [-3.0043,  0.4905,  2.4695],
        [-3.0607,  0.4678,  2.4505],
        [-2.9648,  0.6545,  2.6020],
        [-2.8736,  0.6298,  2.5837],
        [-2.9386,  0.6365,  2.5572]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #500: tensor([[-3.0862,  1.2159,  1.6384],
        [-3.0892,  1.2385,  1.8709],
        [-3.2680,  1.1803,  1.7079],
        [-3.0300,  1.1919,  1.7328],
        [-3.1211,  1.1129,  1.8100],
        [-3.1505,  1.2923,  1.8233],
        [-3.1601,  1.2240,  1.7876],
        [-3.1486,  1.0459,  1.6707],
        [-3.1490,  1.2111,  1.6520],
        [-3.0729,  1.4396,  1.6413],
        [-3.2030,  1.2275,  1.7456],
        [-2.8845,  1.2078,  1.8007]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #550: tensor([[ 2.2254,  0.4694, -3.1226],
        [ 2.3700,  0.4786, -2.9103],
        [ 2.5941,  0.3000, -2.8673],
        [ 2.3950,  0.2608, -3.1885],
        [ 1.5045,  1.0735, -2.9793],
        [ 1.6049,  0.8950, -3.2536],
        [ 1.9450,  0.7060, -3.2187],
        [ 1.2283,  1.5729, -2.6831],
        [-2.6143,  2.0270,  0.4535],
        [ 2.6076,  0.3160, -2.9536],
        [ 2.2164,  0.5487, -3.0449],
        [ 2.5720,  0.1614, -3.2397]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #600: tensor([[ 3.3162, -0.5064, -2.9715],
        [ 3.2054, -0.3630, -2.9761],
        [ 3.1652, -0.6136, -3.0283],
        [ 3.1787, -0.4156, -2.9563],
        [ 3.1641, -0.5452, -2.9211],
        [ 3.1250, -0.5199, -2.8931],
        [ 3.2400, -0.4247, -3.0719],
        [ 3.2127, -0.5304, -2.9357],
        [ 3.2180, -0.6696, -2.7342],
        [ 3.1768, -0.5806, -2.8058],
        [ 3.2943, -0.5706, -2.9204],
        [ 3.2312, -0.5425, -2.8589]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #650: tensor([[ 3.2047, -0.3645, -3.1262],
        [ 3.1217, -0.4280, -2.8879],
        [ 3.1208, -0.4043, -2.8501],
        [ 3.2604, -0.6191, -2.7825],
        [ 2.9988, -0.4598, -2.9359],
        [ 3.2858, -0.4724, -2.9368],
        [ 3.2016, -0.3121, -2.8614],
        [ 3.2266, -0.3751, -2.8029],
        [ 3.2748, -0.4220, -3.0022],
        [ 3.1053, -0.4833, -2.8590],
        [ 3.3065, -0.4318, -2.9770],
        [ 3.1832, -0.4305, -2.9148]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #700: tensor([[ 3.0741, -0.5816, -2.8754],
        [ 3.3811, -0.4705, -2.9219],
        [ 3.0170, -0.4000, -2.8022],
        [ 3.3352, -0.2426, -3.0957],
        [ 3.1742, -0.4143, -3.1887],
        [ 3.1043, -0.3781, -2.9853],
        [ 3.2503, -0.4816, -2.9236],
        [ 3.3441, -0.4542, -2.9415],
        [ 3.2119, -0.3583, -3.0936],
        [ 3.1471, -0.4638, -3.1535],
        [ 3.2135, -0.3789, -2.8656],
        [ 3.0507, -0.3518, -3.1095]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #750: tensor([[ 3.1621, -0.3002, -3.1041],
        [ 3.1932, -0.3055, -2.9199],
        [ 3.1742, -0.3610, -2.8550],
        [ 3.1706, -0.5321, -2.8153],
        [ 3.2521, -0.4074, -2.9262],
        [ 3.1867, -0.3152, -3.0826],
        [ 2.9609, -0.2987, -2.9341],
        [ 3.0910, -0.5536, -3.1161],
        [ 3.0026, -0.5293, -2.8683],
        [ 3.2684, -0.3268, -2.9431],
        [ 2.8836, -0.2424, -2.9683],
        [ 3.0359, -0.1863, -3.1261]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #800: tensor([[ 3.2662, -0.2860, -3.0290],
        [ 2.9307, -0.0242, -3.0944],
        [ 3.1983, -0.2991, -3.0548],
        [ 3.1359, -0.3085, -3.1109],
        [ 3.1772, -0.2899, -3.2271],
        [ 2.9557, -0.2482, -2.9956],
        [ 3.1994, -0.3700, -2.9035],
        [ 2.6759, -0.1442, -3.0436],
        [ 2.9359, -0.1782, -3.1542],
        [ 2.7251,  0.0429, -2.8589],
        [ 3.1318, -0.2910, -3.0910],
        [ 2.9566, -0.2733, -3.1430]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #850: tensor([[ 2.0042,  0.7631, -3.1663],
        [ 2.4631,  0.4043, -3.1706],
        [ 2.9022, -0.0394, -2.9491],
        [ 2.8039, -0.2127, -2.9887],
        [ 2.7619, -0.0995, -3.1240],
        [ 2.8715, -0.1255, -3.1280],
        [ 2.8359, -0.1563, -3.0641],
        [ 3.0111, -0.2343, -2.9288],
        [ 3.0125, -0.2138, -3.1143],
        [ 2.8706, -0.1078, -3.2671],
        [ 2.8724, -0.0354, -3.1952],
        [ 2.9732, -0.1747, -3.1705]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #900: tensor([[ 2.7755, -0.0344, -3.1621],
        [ 2.5704,  0.1082, -3.3128],
        [ 2.8157,  0.1355, -3.1932],
        [ 2.9123,  0.1802, -3.2113],
        [ 2.8455,  0.0925, -3.1947],
        [ 2.9559,  0.0927, -3.0213],
        [ 2.8695, -0.0403, -3.2392],
        [ 2.7338, -0.0908, -3.1376],
        [ 2.8948,  0.1668, -3.2080],
        [ 2.9002,  0.1584, -3.2060],
        [ 2.8720,  0.2130, -3.0570],
        [ 2.8941,  0.2391, -3.3016]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #950: tensor([[ 2.0213,  0.9051, -3.3942],
        [ 1.5161,  1.4730, -3.3030],
        [ 0.6605,  1.7058, -2.8641],
        [ 1.3377,  1.4963, -3.3344],
        [ 1.7912,  1.3593, -3.5650],
        [-2.8654, -0.0453,  3.3696],
        [-2.9046, -0.0781,  3.1186],
        [-2.9069, -0.2016,  3.2090],
        [ 1.4499,  1.2834, -3.2895],
        [-0.9936,  2.8804, -1.8790],
        [ 1.1772,  1.6907, -3.2922],
        [-3.2266,  1.8914,  1.0923]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1000: tensor([[-3.0446, -0.4198,  3.5867],
        [-2.8601, -0.3333,  3.7269],
        [-2.9687, -0.4652,  3.7875],
        [-3.1371, -0.3376,  3.6701],
        [-2.9293, -0.3734,  3.6557],
        [-3.1636, -0.7216,  3.8882],
        [-3.0835, -0.6478,  3.9166],
        [-3.0608, -0.4503,  3.8566],
        [-3.0822, -0.5339,  3.8790],
        [-3.2439, -0.4687,  3.7366],
        [-3.0349, -0.4742,  3.9719],
        [-3.0262, -0.5703,  4.0246]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1050: tensor([[-3.1311, -0.4650,  3.7825],
        [-3.1383, -0.5636,  3.8915],
        [-3.1183, -0.4229,  3.7536],
        [-3.0287, -0.4471,  3.7837],
        [-2.9092, -0.3779,  3.6777],
        [-2.9721, -0.5111,  3.9351],
        [-3.0654, -0.4849,  3.6686],
        [-2.9954, -0.6687,  3.8267],
        [-3.0288, -0.5981,  3.9421],
        [-2.9919, -0.5933,  3.9318],
        [-3.0614, -0.4083,  3.8848],
        [-3.0747, -0.4539,  4.1297]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1100: tensor([[-3.0667, -0.4478,  3.9735],
        [-2.8761, -0.4470,  3.7947],
        [-3.0222, -0.5925,  3.9400],
        [-2.9930, -0.4930,  3.9219],
        [-3.0083, -0.5062,  3.8372],
        [-3.1501, -0.4813,  3.9423],
        [-3.1085, -0.4878,  3.8209],
        [-3.0075, -0.5316,  3.9214],
        [-2.9377, -0.5119,  3.9644],
        [-3.1663, -0.5248,  3.8456],
        [-3.0033, -0.4843,  3.8602],
        [-3.0307, -0.5038,  4.0448]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1150: tensor([[-3.0594, -0.5291,  3.9269],
        [-2.9929, -0.5018,  3.8427],
        [-3.2425, -0.4419,  3.9265],
        [-3.2465, -0.5198,  3.8209],
        [-3.0867, -0.4766,  3.7698],
        [-3.0589, -0.5441,  3.9310],
        [-3.2509, -0.5009,  3.8580],
        [-3.1245, -0.4419,  3.8007],
        [-3.0098, -0.4215,  3.7594],
        [-2.9508, -0.5814,  3.6208],
        [-3.0681, -0.5389,  3.9856],
        [-2.9614, -0.5996,  3.9025]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1200: tensor([[-3.0262, -0.4956,  3.4455],
        [-3.0446, -0.4332,  3.7443],
        [-3.1552, -0.3080,  3.8880],
        [-3.0632, -0.5458,  3.8372],
        [-3.0217, -0.3169,  3.7293],
        [-2.9964, -0.5600,  3.8183],
        [-3.1536, -0.4796,  3.8904],
        [-3.1149, -0.5151,  3.8903],
        [-2.9476, -0.6491,  3.8822],
        [-3.0085, -0.5262,  3.7849],
        [-3.0128, -0.4285,  3.9619],
        [-3.0258, -0.4909,  3.9227]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1250: tensor([[-3.0771, -0.5953,  3.7853],
        [-3.1714, -0.4404,  3.8827],
        [-3.1329, -0.5338,  3.8447],
        [-2.9650, -0.4162,  3.8158],
        [-3.0964, -0.4539,  3.9795],
        [-3.1573, -0.5433,  3.8039],
        [-2.9603, -0.5417,  3.8023],
        [-3.1988, -0.3544,  3.7996],
        [-2.9937, -0.5957,  3.9213],
        [-3.1562, -0.4675,  3.9429],
        [-3.1705, -0.4146,  3.7282],
        [-3.0793, -0.4520,  3.5211]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1300: tensor([[-3.1283, -0.5482,  3.8904],
        [-2.9407, -0.4111,  3.8227],
        [-3.1594, -0.4799,  3.9113],
        [-3.0393, -0.5328,  3.8011],
        [-3.1115, -0.4992,  3.6780],
        [-3.0594, -0.4015,  3.9187],
        [-3.0530, -0.4629,  3.8698],
        [-2.9939, -0.3757,  3.5177],
        [-3.2367, -0.4269,  3.7588],
        [-3.1010, -0.3318,  3.7505],
        [-2.9039, -0.4065,  3.7133],
        [-3.1034, -0.4446,  3.6991]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1350: tensor([[-3.1085, -0.4155,  3.9071],
        [-3.1391, -0.5845,  3.7237],
        [-3.1059, -0.4525,  3.8887],
        [-3.1382, -0.4298,  3.8128],
        [-3.1846, -0.3345,  3.9467],
        [-3.0050, -0.4340,  3.5938],
        [-3.0982, -0.4033,  3.7548],
        [-3.0501, -0.4853,  3.8077],
        [-3.0875, -0.4549,  3.8282],
        [-3.1078, -0.4496,  3.8774],
        [-3.0870, -0.4358,  3.7431],
        [-3.0878, -0.3883,  3.8870]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1400: tensor([[-3.0698, -0.4850,  3.8178],
        [-3.0389, -0.3691,  3.6313],
        [-3.0922, -0.5292,  3.9271],
        [-3.0240, -0.2494,  3.5784],
        [-3.1007, -0.4020,  3.9068],
        [-3.0225, -0.4465,  3.8265],
        [-3.0448, -0.3694,  3.7271],
        [-3.1177, -0.4505,  3.7278],
        [-3.1212, -0.4464,  3.6993],
        [-3.2425, -0.4677,  3.8714],
        [-3.0961, -0.3023,  3.7189],
        [-3.0661, -0.2049,  3.7142]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1450: tensor([[-3.2122, -0.2713,  3.9069],
        [-3.1855, -0.3941,  3.8248],
        [-2.9600, -0.2859,  3.7847],
        [-3.0183, -0.2689,  3.6177],
        [-3.1102, -0.4502,  3.6690],
        [-2.9427, -0.3986,  3.7377],
        [-3.0082, -0.3596,  3.6209],
        [-3.0032, -0.3167,  3.8091],
        [-3.1414, -0.4215,  3.6830],
        [-3.0945, -0.5338,  3.7336],
        [-3.1162, -0.4111,  3.8402],
        [-2.8484, -0.4751,  3.7004]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1500: tensor([[-3.2098, -0.3380,  3.7467],
        [-2.9051, -0.3305,  3.6456],
        [-3.1250, -0.4619,  3.7001],
        [-3.0243, -0.3434,  3.7459],
        [-3.0880, -0.4719,  3.8189],
        [-3.2119, -0.4021,  3.6243],
        [-2.9636, -0.4468,  3.7777],
        [-3.0741, -0.3036,  3.6540],
        [-3.1666, -0.2059,  3.8162],
        [-3.2473, -0.3201,  3.7391],
        [-3.1454, -0.5051,  3.8708],
        [-3.1245, -0.3121,  3.7651]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1550: tensor([[-3.0689, -0.4264,  3.7451],
        [-3.0062, -0.4666,  3.7367],
        [-3.0066, -0.3469,  3.7368],
        [-3.2105, -0.4313,  3.7297],
        [-3.1395, -0.4143,  3.7250],
        [-3.1630, -0.2931,  3.7906],
        [-3.1185, -0.4065,  3.8355],
        [-3.3145, -0.4238,  3.8945],
        [-3.1045, -0.2258,  3.7569],
        [-3.1761, -0.3810,  3.8393],
        [-3.0699, -0.4645,  3.7959],
        [-3.0610, -0.4681,  3.8544]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1600: tensor([[-3.1771, -0.2990,  3.5696],
        [-2.9196, -0.4169,  3.7300],
        [-3.1614, -0.4395,  3.4882],
        [-3.3039, -0.4170,  3.5894],
        [-3.2801, -0.4132,  3.8218],
        [-3.1061, -0.4287,  3.5718],
        [-3.2325, -0.4262,  3.8175],
        [-2.9390, -0.3719,  3.7729],
        [-2.9322, -0.4067,  3.6884],
        [-3.1901, -0.2847,  3.4771],
        [-3.0908, -0.4364,  3.7505],
        [-3.2035, -0.3599,  3.7334]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1650: tensor([[-3.1981, -0.3015,  3.6337],
        [-3.1437, -0.2524,  3.7356],
        [-3.1490, -0.3868,  3.7922],
        [-3.0628, -0.3017,  3.7165],
        [-3.0798, -0.2940,  3.6626],
        [-3.1894, -0.3046,  3.4671],
        [-3.1082, -0.2070,  3.5249],
        [-3.2455, -0.3499,  3.6022],
        [-3.2247, -0.5101,  3.6425],
        [-3.1475, -0.3813,  3.7350],
        [-3.1073, -0.2830,  3.5617],
        [-3.2186, -0.3890,  3.6775]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1700: tensor([[-3.1688, -0.2009,  3.6961],
        [-2.9989, -0.3959,  3.4750],
        [-3.0031, -0.2571,  3.7876],
        [-3.0201, -0.3203,  3.7067],
        [-3.3610, -0.2632,  3.5674],
        [-2.9673, -0.3075,  3.7330],
        [-3.0137, -0.3095,  3.8004],
        [-3.2893, -0.3629,  3.5379],
        [-3.3300, -0.3851,  3.7808],
        [-3.1883, -0.2335,  3.4734],
        [-3.0047, -0.2006,  3.6383],
        [-2.9847, -0.1988,  3.6254]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1750: tensor([[-3.1916, -0.2154,  3.5820],
        [-3.2309, -0.3525,  3.6556],
        [-3.1935, -0.2629,  3.6399],
        [-2.9529, -0.2280,  3.4884],
        [-3.0901, -0.1690,  3.7762],
        [-3.1707, -0.2004,  3.7258],
        [-3.2210, -0.3555,  3.8038],
        [-3.1928, -0.2801,  3.7840],
        [-3.1941, -0.2392,  3.7581],
        [-2.9745, -0.4205,  3.6177],
        [-3.1917, -0.2123,  3.7247],
        [-3.1516, -0.2807,  3.5374]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1800: tensor([[-3.0262, -0.2745,  3.7007],
        [-3.0878, -0.2396,  3.5809],
        [-3.4108, -0.1970,  3.8467],
        [-3.0743, -0.3131,  3.7301],
        [-3.3202, -0.1961,  3.7215],
        [-3.2019, -0.2784,  3.5493],
        [-3.1178, -0.3299,  3.6848],
        [-3.0877, -0.2016,  3.6776],
        [-3.1901, -0.2432,  3.5681],
        [-3.2100, -0.2134,  3.7898],
        [-3.2488, -0.1021,  3.6339],
        [-3.2731, -0.2902,  3.6805]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1850: tensor([[-3.2452, -0.3309,  3.6496],
        [-3.1282, -0.2402,  3.5381],
        [-3.0524, -0.1153,  3.6571],
        [-3.1410, -0.2576,  3.7442],
        [-3.3010, -0.2893,  3.6293],
        [-3.2111, -0.1156,  3.3501],
        [-3.3749, -0.2563,  3.6197],
        [-3.0856, -0.3531,  3.6253],
        [-2.9510, -0.2430,  3.6317],
        [-3.2393, -0.2425,  3.7799],
        [-3.1356, -0.1964,  3.6343],
        [-3.0878, -0.0844,  3.4367]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1900: tensor([[-3.2063, -0.0608,  3.6372],
        [-3.1018, -0.1884,  3.5669],
        [-3.1936, -0.2186,  3.6818],
        [-3.1677, -0.1349,  3.5750],
        [-3.1225, -0.1031,  3.6307],
        [-3.2168, -0.1521,  3.5754],
        [-3.2600, -0.2237,  3.6301],
        [-3.0868, -0.2388,  3.4356],
        [-3.0634, -0.3373,  3.5514],
        [-3.0907, -0.0719,  3.5112],
        [-3.0431, -0.1511,  3.5075],
        [-3.1334, -0.3260,  3.5452]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #1950: tensor([[-3.3272, -0.0644,  3.5220],
        [-3.2404, -0.1679,  3.3754],
        [-3.0170, -0.2216,  3.5811],
        [-3.1441, -0.1092,  3.5179],
        [-3.3163, -0.2366,  3.5085],
        [-3.1857, -0.1230,  3.4285],
        [-3.3658, -0.2187,  3.4283],
        [-3.0995, -0.1163,  3.6479],
        [-3.0837, -0.1673,  3.5903],
        [-3.1975, -0.2090,  3.6931],
        [-3.3470, -0.3277,  3.5516],
        [-3.2888, -0.2972,  3.5746]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2000: tensor([[-3.1228, -0.3235,  3.7929],
        [-3.3343, -0.0931,  3.6655],
        [-3.2172, -0.1544,  3.4235],
        [-3.1239, -0.1767,  3.5567],
        [-3.3735, -0.1699,  3.5688],
        [-3.2236, -0.1859,  3.5834],
        [-3.2993, -0.0871,  3.6357],
        [-3.2897, -0.1020,  3.5363],
        [-3.1871, -0.0583,  3.5301],
        [-3.1262, -0.2605,  3.4932],
        [-3.1531, -0.2033,  3.5128],
        [-3.3549, -0.2106,  3.5710]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2050: tensor([[-3.3520, -0.1608,  3.4605],
        [-3.2987, -0.0480,  3.6013],
        [-2.7468, -0.0379,  3.4371],
        [-3.3265, -0.0841,  3.4424],
        [-3.2307, -0.1271,  3.4705],
        [-3.2739, -0.0622,  3.3026],
        [-3.3639, -0.0426,  3.3883],
        [-3.2460, -0.0969,  3.7031],
        [-3.1715, -0.0333,  3.2251],
        [-3.3741, -0.1157,  3.5063],
        [-3.2733, -0.1142,  3.5938],
        [-3.3322, -0.0133,  3.4396]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2100: tensor([[-3.2244e+00,  1.5926e-01,  3.3678e+00],
        [-3.2929e+00, -3.5282e-03,  3.3375e+00],
        [-3.4244e+00, -1.9569e-01,  3.4068e+00],
        [-3.1358e+00, -6.6343e-02,  3.5266e+00],
        [-3.4309e+00, -1.6224e-01,  3.4554e+00],
        [-3.1503e+00, -7.9820e-02,  3.7599e+00],
        [-3.1702e+00, -9.4342e-02,  3.4554e+00],
        [-3.3749e+00, -4.4748e-02,  3.4800e+00],
        [-3.1483e+00, -3.9708e-02,  3.2988e+00],
        [-3.2277e+00,  4.8932e-02,  3.5068e+00],
        [-3.1949e+00, -4.2023e-02,  3.3557e+00],
        [-3.2171e+00, -2.7006e-01,  3.6178e+00]], device='cuda:0',
       grad_fn=<AddmmBackward>)
Iter #2150: tensor([[-3.2929e+00, -7.9635e-04,  3.3403e+00],
        [-3.2727e+00, -3.3186e-02,  3.3954e+00],
        [-3.2497e+00,  2.0231e-02,  3.3502e+00],
        [-3.2040e+00, -7.7798e-03,  3.4459e+00],
        [-3.2038e+00, -5.3163e-02,  3.3054e+00],
        [-3.2459e+00, -7.7399e-02,  3.3386e+00],
        [-3.3051e+00, -7.1923e-02,  3.4631e+00],
        [-3.3201e+00,  1.9346e-01,  3.2825e+00],
        [-3.3675e+00,  4.7287e-02,  3.5807e+00],
        [-3.2536e+00, -4.6279e-02,  3.5586e+00],
        [-3.2471e+00,  1.2796e-01,  3.2217e+00],
        [-3.4030e+00, -7.2434e-02,  3.4988e+00]], device='cuda:0',
       grad_fn=<AddmmBackward>)
Iter #2200: tensor([[-3.3539e+00,  2.0786e-02,  3.4147e+00],
        [-3.3808e+00, -2.6675e-02,  3.3678e+00],
        [-3.3484e+00, -4.8571e-02,  3.5337e+00],
        [-3.3254e+00,  4.5879e-02,  3.4683e+00],
        [-3.4098e+00,  1.2353e-01,  3.5235e+00],
        [-3.1978e+00, -5.8905e-02,  3.3566e+00],
        [-3.2616e+00,  1.0522e-01,  3.4722e+00],
        [-3.2625e+00,  1.3888e-02,  3.4778e+00],
        [-3.2583e+00,  6.8534e-02,  3.3558e+00],
        [-3.1852e+00,  1.7624e-01,  3.5195e+00],
        [-3.3004e+00, -7.2705e-03,  3.3812e+00],
        [-3.4260e+00,  4.7900e-04,  3.3459e+00]], device='cuda:0',
       grad_fn=<AddmmBackward>)
Iter #2250: tensor([[-3.4263,  0.0347,  3.4236],
        [-3.2896,  0.1613,  3.4152],
        [-3.3347,  0.2636,  3.3206],
        [-3.1917,  0.0565,  3.2867],
        [-3.3410,  0.2408,  3.2530],
        [-3.3817, -0.0140,  3.3207],
        [-3.2926,  0.1971,  3.3193],
        [-3.2606,  0.0929,  3.2540],
        [-3.3032, -0.0301,  3.4104],
        [-3.3348,  0.1147,  3.4178],
        [-3.4321,  0.1147,  3.4516],
        [-3.2591,  0.0913,  3.2064]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2300: tensor([[-3.3220,  0.0727,  3.3281],
        [-3.2257,  0.0579,  2.9307],
        [-3.4215,  0.2277,  3.2118],
        [-3.3520,  0.1446,  3.3559],
        [-3.4371,  0.2117,  3.3691],
        [-3.4771,  0.1656,  3.3623],
        [-3.4861,  0.2642,  3.3176],
        [-3.4593,  0.2114,  3.2982],
        [-3.4805,  0.1919,  3.2410],
        [-3.3624,  0.0245,  3.1664],
        [-3.3133,  0.3150,  3.0660],
        [-3.2324,  0.2190,  3.3003]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2350: tensor([[-3.3883,  0.1752,  3.0356],
        [-3.5664,  0.1768,  3.0606],
        [-3.4421,  0.2238,  3.2828],
        [-3.3145,  0.3483,  2.8113],
        [-3.4468,  0.3294,  3.2752],
        [-3.5375,  0.3210,  3.1026],
        [-3.4741,  0.2986,  3.1394],
        [-3.1270,  0.1361,  3.2896],
        [-3.3314,  0.1911,  3.1230],
        [-3.4023,  0.2818,  3.2583],
        [-3.4105,  0.2110,  3.2133],
        [-3.3819,  0.1459,  3.0976]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2400: tensor([[-3.5012,  0.3796,  2.9826],
        [-3.4822,  0.4218,  3.0755],
        [-3.3561,  0.4211,  3.1472],
        [-3.3573,  0.3538,  3.1220],
        [-3.4297,  0.3098,  3.0192],
        [-3.5315,  0.4600,  3.0754],
        [-3.4442,  0.3520,  3.1446],
        [-3.5104,  0.4369,  3.0584],
        [-3.4630,  0.4354,  3.0697],
        [-3.3463,  0.4924,  2.9252],
        [-3.5643,  0.3578,  3.1467],
        [-3.4505,  0.4075,  3.0345]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2450: tensor([[-3.4546,  0.4964,  2.9619],
        [-3.4303,  0.5483,  3.2402],
        [-3.5334,  0.5940,  2.9527],
        [-3.6075,  0.5992,  3.0241],
        [-3.4820,  0.5381,  3.0400],
        [-3.4780,  0.5309,  3.0083],
        [-3.2582,  0.6023,  2.9031],
        [-3.4011,  0.3889,  2.8641],
        [-3.4873,  0.3800,  2.9997],
        [-3.6054,  0.4689,  2.9313],
        [-3.4015,  0.5229,  3.0870],
        [-3.5712,  0.5287,  2.9271]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2500: tensor([[-3.4478,  0.6708,  2.8062],
        [-3.3643,  0.6649,  2.7453],
        [-3.3756,  0.4653,  2.9144],
        [-3.4490,  0.6081,  2.7767],
        [-3.4328,  0.7173,  2.9537],
        [-3.5194,  0.6112,  2.7404],
        [-3.4109,  0.5933,  2.7912],
        [-3.4444,  0.6712,  2.8598],
        [-3.5688,  0.6664,  2.8100],
        [-3.6072,  0.6292,  2.9404],
        [-3.6815,  0.4446,  2.7951],
        [-3.4938,  0.7401,  2.8414]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2550: tensor([[-3.5809,  0.9285,  2.5221],
        [-3.6332,  0.8143,  2.7631],
        [-3.4758,  0.8081,  2.5323],
        [-3.4680,  0.8765,  2.7054],
        [-3.6522,  0.8781,  2.6696],
        [-3.6734,  0.8763,  2.6142],
        [-3.5781,  0.9939,  2.6574],
        [-3.3753,  0.8469,  2.6505],
        [-3.6180,  0.8284,  2.5665],
        [-3.6072,  0.9325,  2.7998],
        [-3.6159,  0.8245,  2.4820],
        [-3.5843,  0.8574,  2.7003]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2600: tensor([[-3.6537,  1.1006,  2.3608],
        [-3.8081,  1.2237,  2.2103],
        [-3.7060,  1.2588,  2.2850],
        [-3.8374,  1.1994,  2.3605],
        [-3.7582,  1.1089,  2.1526],
        [-3.6566,  1.0506,  2.2577],
        [-3.6463,  1.2070,  2.4029],
        [-3.4634,  1.2719,  2.2173],
        [-3.6114,  1.0480,  2.4226],
        [-3.7869,  0.9917,  2.3522],
        [-3.6311,  0.9746,  2.3758],
        [-3.7270,  1.1716,  2.2483]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2650: tensor([[-3.5898,  1.4945,  2.1214],
        [-3.7917,  1.4519,  2.0981],
        [-3.6711,  1.4242,  2.0779],
        [-3.7293,  1.3388,  2.1220],
        [-3.8043,  1.3307,  2.0588],
        [-3.7454,  1.4080,  1.9492],
        [-3.7738,  1.4242,  2.0324],
        [-3.6493,  1.5251,  2.0905],
        [-3.3919,  1.3212,  2.1075],
        [-3.8640,  1.3380,  2.0500],
        [-3.6504,  1.3623,  2.1092],
        [-3.5607,  1.3546,  1.9269]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2700: tensor([[-3.9196,  1.6883,  1.7505],
        [-3.6175,  1.7169,  1.7180],
        [-3.7059,  1.6572,  2.0489],
        [-3.8050,  1.6657,  1.5895],
        [-3.7627,  1.6033,  1.8367],
        [-3.6973,  1.6893,  1.8015],
        [-3.7105,  1.6620,  1.6427],
        [-3.6430,  1.7620,  1.7640],
        [-3.6810,  1.5824,  1.7646],
        [-3.6241,  1.6131,  1.5290],
        [-3.6644,  1.6749,  1.6950],
        [-3.7009,  1.7474,  1.8300]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2750: tensor([[-3.7767,  1.5979,  1.8599],
        [-3.8020,  1.8061,  1.7718],
        [-3.7619,  1.8315,  1.7676],
        [-3.8576,  1.7413,  1.7390],
        [-3.7926,  1.6610,  1.8077],
        [-3.6163,  1.7149,  1.6064],
        [-3.8396,  1.6415,  1.8717],
        [-3.8581,  1.7829,  1.8765],
        [-3.7447,  1.7655,  1.5964],
        [-3.7162,  1.6508,  1.7446],
        [-3.8940,  1.6249,  1.9308],
        [-3.6826,  1.7715,  1.8357]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2800: tensor([[-3.8160,  1.6880,  1.7371],
        [-3.5937,  1.7096,  1.7994],
        [-3.8129,  1.7126,  1.7625],
        [-3.8396,  1.6776,  1.4630],
        [-3.8188,  1.6117,  1.8946],
        [-3.6906,  1.8072,  1.6329],
        [-3.9274,  1.6512,  1.7881],
        [-3.7215,  1.7261,  1.8487],
        [-3.6484,  1.9370,  1.5134],
        [-3.8082,  1.7235,  1.8277],
        [-3.7110,  1.7786,  1.7203],
        [-3.7532,  1.8003,  1.8021]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2850: tensor([[-3.8079,  1.8991,  1.6262],
        [-3.8581,  1.8219,  1.6924],
        [-3.7920,  1.7379,  1.8506],
        [-3.7565,  1.7672,  1.5335],
        [-3.7613,  1.5482,  1.4705],
        [-3.8326,  1.8732,  1.8067],
        [-3.7117,  1.7831,  1.7287],
        [-3.7737,  1.7290,  1.6568],
        [-3.7712,  1.7618,  1.5792],
        [-3.7303,  1.6986,  1.7096],
        [-3.7223,  1.6242,  1.7524],
        [-3.7627,  1.7043,  1.6738]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2900: tensor([[-3.7817,  1.6378,  1.8847],
        [-3.7474,  1.6680,  1.7004],
        [-3.7512,  1.7345,  1.6737],
        [-3.7352,  1.7443,  1.8816],
        [-3.5222,  1.6993,  1.6592],
        [-3.7881,  1.6667,  1.7333],
        [-3.8303,  1.5225,  1.6142],
        [-3.6470,  1.8183,  1.6269],
        [-3.6217,  1.9062,  1.5336],
        [-3.6929,  1.6833,  1.7468],
        [-3.7955,  1.6552,  1.6914],
        [-3.7669,  1.7812,  1.7220]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #2950: tensor([[-3.7612,  1.8056,  1.6550],
        [-3.6388,  1.7852,  1.7537],
        [-3.6689,  1.8183,  1.6762],
        [-3.7066,  1.9124,  1.8194],
        [-3.5943,  1.6557,  1.5273],
        [-3.7114,  1.6720,  1.5406],
        [-3.8578,  1.7468,  1.7938],
        [-3.8036,  1.7259,  1.7468],
        [-3.7127,  1.7926,  1.8703],
        [-3.6642,  1.7990,  1.5968],
        [-3.8267,  1.7858,  1.6960],
        [-3.7485,  1.8186,  1.6861]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3000: tensor([[-3.4510,  1.7358,  1.6379],
        [-3.6401,  1.8977,  1.7174],
        [-3.7831,  1.6681,  1.9229],
        [-3.9093,  1.8746,  1.6102],
        [-3.7019,  1.7933,  1.6549],
        [-3.8300,  1.9384,  1.6975],
        [-3.8372,  1.9532,  1.7103],
        [-3.6025,  1.6886,  1.8574],
        [-3.8006,  1.7063,  1.8155],
        [-3.6935,  1.8935,  1.6209],
        [-3.6194,  1.5569,  1.8657],
        [-3.8767,  1.7536,  1.7092]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3050: tensor([[-3.6431,  1.0091,  2.5179],
        [-3.5788,  2.6887,  0.7195],
        [-3.5877,  1.0864,  2.6420],
        [-3.5248,  1.2435,  2.4372],
        [-3.5998,  2.1171,  1.1922],
        [-3.7172,  2.1634,  1.3426],
        [-3.5917,  0.9439,  2.6897],
        [-3.5327,  1.1663,  2.5641],
        [-3.4440,  2.6743,  0.8329],
        [-3.5009,  0.8887,  2.6515],
        [-3.4182,  1.3223,  2.3093],
        [-3.5065,  0.9275,  2.3416]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3100: tensor([[ 3.0566,  0.3719, -3.8728],
        [ 3.0969,  0.3334, -3.7565],
        [ 2.9931,  0.2022, -3.5971],
        [ 3.1276,  0.2461, -3.7738],
        [ 3.0165,  0.2565, -3.9208],
        [ 3.1228,  0.2826, -3.9128],
        [ 3.0769,  0.3601, -3.7607],
        [ 2.9128,  0.2915, -3.7517],
        [ 2.7941,  0.4497, -3.7286],
        [ 2.8052,  0.6316, -3.9421],
        [ 2.4738,  0.9151, -3.9363],
        [ 2.9770,  0.5079, -3.8274]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3150: tensor([[ 3.3708e+00,  9.4916e-02, -3.9126e+00],
        [ 3.5031e+00,  4.4835e-02, -3.7338e+00],
        [ 3.5285e+00,  1.4404e-02, -3.8216e+00],
        [ 3.3826e+00, -2.7715e-01, -3.5884e+00],
        [ 2.6908e+00,  6.7040e-01, -3.9088e+00],
        [-3.4063e+00, -1.5926e-01,  3.7486e+00],
        [ 3.3624e+00, -2.8222e-02, -3.8209e+00],
        [ 3.5473e+00,  4.0856e-02, -3.7637e+00],
        [ 3.4876e+00, -2.1779e-01, -3.6532e+00],
        [ 3.5519e+00, -3.0568e-03, -3.8774e+00],
        [ 3.2918e+00,  9.5832e-02, -3.7712e+00],
        [ 3.5749e+00,  2.0412e-02, -3.8066e+00]], device='cuda:0',
       grad_fn=<AddmmBackward>)
Iter #3200: tensor([[ 3.4732,  0.0404, -3.7701],
        [ 3.4858, -0.0250, -3.9960],
        [ 3.4338, -0.0826, -3.9704],
        [ 3.4024, -0.0710, -3.7907],
        [ 3.3917,  0.1520, -3.7259],
        [ 3.2048,  0.0691, -3.9347],
        [ 3.3951,  0.2926, -3.9782],
        [ 3.4075,  0.1653, -3.8444],
        [ 3.2247,  0.1652, -4.0786],
        [ 3.3234,  0.2700, -4.1086],
        [ 3.2679,  0.3046, -3.9231],
        [ 3.2968,  0.2713, -4.1097]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3250: tensor([[ 3.3677,  0.1221, -4.0324],
        [ 3.2431,  0.2000, -4.0624],
        [ 3.3768,  0.1644, -3.9496],
        [ 3.3688,  0.1581, -3.9643],
        [ 3.4350,  0.1910, -4.1298],
        [ 3.3701,  0.1426, -3.8525],
        [ 3.3455,  0.2260, -3.9311],
        [ 3.2914,  0.0192, -3.8667],
        [ 3.2667,  0.3971, -3.9601],
        [ 2.4964,  1.1195, -4.3101],
        [-3.4712,  0.2330,  3.3627],
        [ 3.2420,  0.3244, -4.0794]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3300: tensor([[ 3.0688,  0.6043, -4.2840],
        [ 3.1037,  0.5530, -4.2613],
        [ 2.9541,  0.5121, -4.2013],
        [ 2.9145,  0.7218, -4.2737],
        [ 2.9816,  0.7108, -4.2065],
        [ 2.9762,  0.5777, -4.2004],
        [ 3.0806,  0.6182, -4.0497],
        [ 3.0255,  0.6369, -4.1298],
        [ 3.0714,  0.6443, -4.3456],
        [ 3.0460,  0.6134, -4.1174],
        [ 2.3540,  0.9468, -4.2089],
        [ 2.7540,  0.8788, -4.2657]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3350: tensor([[-2.5349, -1.3929,  4.2075],
        [-2.4135, -1.4425,  4.5120],
        [-1.9885,  3.5644, -1.7202],
        [-3.0533, -0.3938,  4.0141],
        [-1.9423, -2.0783,  4.4850],
        [-1.8104, -1.9937,  4.2365],
        [-1.7844, -1.8471,  4.3011],
        [-1.7959, -2.0323,  4.3347],
        [-2.0612, -1.7831,  4.4348],
        [-1.6915, -1.9872,  4.4158],
        [-2.3047, -1.4832,  3.8691],
        [-2.2422, -1.7563,  4.4334]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3400: tensor([[-1.7433, -2.2276,  4.3808],
        [-1.7718, -1.9539,  4.3862],
        [-1.7888, -2.2393,  4.5077],
        [-1.5432, -2.2256,  4.3312],
        [-1.5528, -2.1594,  4.4806],
        [-1.7761, -2.1489,  4.3588],
        [-1.6227, -2.2472,  4.4753],
        [-1.6102, -2.0607,  4.3955],
        [-1.6577, -2.0162,  4.3880],
        [-1.6199, -2.0201,  4.3675],
        [-1.7004, -2.0926,  4.3892],
        [-1.6289, -2.1121,  4.3542]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3450: tensor([[-1.7300, -1.9530,  4.3263],
        [-1.6864, -2.0571,  4.2606],
        [-1.6733, -2.3053,  4.4971],
        [-1.5947, -1.9464,  4.3140],
        [-1.7308, -2.2132,  4.4974],
        [-1.7938, -2.1826,  4.6529],
        [-1.6308, -2.2509,  4.3909],
        [-1.5370, -2.1344,  4.5487],
        [-1.5785, -2.1884,  4.4269],
        [-1.6260, -2.0985,  4.2984],
        [-1.7653, -1.8870,  4.4147],
        [-1.8245, -2.1906,  4.4632]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3500: tensor([[-1.7060, -2.2885,  4.5278],
        [-1.5749, -2.2796,  4.4452],
        [-1.6626, -2.1208,  4.2887],
        [-1.6339, -2.1027,  4.3843],
        [-1.9004, -2.1573,  4.5922],
        [-1.8279, -2.1801,  4.4048],
        [-1.7065, -2.3162,  4.4508],
        [-1.6041, -2.2752,  4.4712],
        [-1.4893, -2.2766,  4.4115],
        [-1.7519, -2.2828,  4.3259],
        [-1.7122, -2.3348,  4.2678],
        [-1.8400, -2.0472,  4.5088]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3550: tensor([[-1.7012, -2.0732,  4.3689],
        [-1.5181, -2.1099,  4.3629],
        [-1.9044, -2.2415,  4.4728],
        [-1.6207, -2.3046,  4.2015],
        [-1.7430, -2.1742,  4.2790],
        [-1.7152, -2.0634,  4.5219],
        [-1.6805, -2.2565,  4.5967],
        [-1.7570, -2.1615,  4.4123],
        [-1.6028, -2.2627,  4.4109],
        [-1.8031, -2.0407,  4.4026],
        [-1.6720, -2.3238,  4.5449],
        [-1.5675, -2.0653,  4.4249]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3600: tensor([[-1.5767, -2.3313,  4.2749],
        [-1.6042, -2.0977,  4.5180],
        [-1.6265, -2.0034,  4.3172],
        [-1.6285, -2.3096,  4.4712],
        [-1.8018, -2.2489,  4.5183],
        [-1.5730, -2.1983,  4.3614],
        [-1.8751, -2.0755,  4.3698],
        [-1.7511, -2.1843,  4.4855],
        [-1.7131, -2.1564,  4.4136],
        [-1.6843, -2.2480,  4.5746],
        [-1.7078, -2.2314,  4.4924],
        [-1.7310, -2.1903,  4.3273]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3650: tensor([[-1.7123, -2.0878,  4.3019],
        [-1.5372, -2.2011,  4.3672],
        [-1.8066, -2.2692,  4.5577],
        [-1.8653, -2.0452,  4.3844],
        [-1.7234, -2.1589,  4.5539],
        [-1.8467, -2.0551,  4.3666],
        [-1.5518, -2.3525,  4.3239],
        [-1.8494, -2.0432,  4.3220],
        [-1.6247, -2.0894,  4.4826],
        [-1.7704, -1.9715,  4.4874],
        [-1.6283, -2.2168,  4.2984],
        [-1.5071, -2.2152,  4.4351]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3700: tensor([[-1.6906, -2.1862,  4.3614],
        [-1.7578, -2.0929,  4.4950],
        [-1.7534, -2.0463,  4.3845],
        [-1.8415, -2.1124,  4.5795],
        [-1.6107, -2.2553,  4.3913],
        [-1.7293, -2.0875,  4.2297],
        [-1.6860, -2.1673,  4.4510],
        [-1.7145, -2.1696,  4.2245],
        [-1.7179, -2.2159,  4.1926],
        [-1.7465, -2.0884,  4.4481],
        [-1.6858, -2.2639,  4.3905],
        [-1.5472, -2.0958,  4.4415]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3750: tensor([[-1.8268, -2.2122,  4.5187],
        [-1.7039, -2.2508,  4.4859],
        [-1.5495, -2.0602,  4.3720],
        [-1.7887, -1.9987,  4.4577],
        [-1.9306, -2.0713,  4.4081],
        [-1.6490, -2.1978,  4.5713],
        [-1.6687, -2.3292,  4.4123],
        [-1.8740, -2.1635,  4.5706],
        [-1.6783, -2.2591,  4.4244],
        [-1.8264, -2.2599,  4.5248],
        [-1.8829, -2.0123,  4.5017],
        [-1.7189, -2.1876,  4.3541]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3800: tensor([[-1.7273, -2.1892,  4.5994],
        [-1.5487, -2.2115,  4.4390],
        [-1.4708, -2.2351,  4.5047],
        [-1.4800, -2.3010,  4.3254],
        [-1.7195, -2.1589,  4.5073],
        [-1.8572, -2.1020,  4.3845],
        [-1.5205, -2.2776,  4.3578],
        [-1.8516, -2.0941,  4.4165],
        [-1.5960, -2.1980,  4.5262],
        [-1.5615, -2.1624,  4.2241],
        [-1.6598, -2.1120,  4.3304],
        [-1.9315, -2.0234,  4.4244]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3850: tensor([[-1.8440, -2.0598,  4.2058],
        [-1.5210, -2.1510,  4.3088],
        [-1.7148, -2.2408,  4.2269],
        [-1.7794, -2.1584,  4.2861],
        [-1.6333, -2.1910,  4.3066],
        [-1.6623, -2.2488,  4.4502],
        [-1.6067, -2.2167,  4.4745],
        [-1.4538, -2.1224,  4.5261],
        [-1.7092, -2.0598,  4.4312],
        [-1.4437, -2.0050,  4.2856],
        [-1.8032, -2.1306,  4.3389],
        [-1.5397, -2.2453,  4.4896]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3900: tensor([[-1.6531, -2.1886,  4.5647],
        [-1.6378, -2.2142,  4.4403],
        [-1.7224, -2.0775,  4.2882],
        [-1.4602, -2.0918,  4.3388],
        [-1.7102, -2.2050,  4.3325],
        [-1.6350, -2.2827,  4.5090],
        [-1.6848, -2.2550,  4.3051],
        [-1.5794, -2.1616,  4.2541],
        [-1.6773, -2.1350,  4.4299],
        [-1.7586, -2.2568,  4.4317],
        [-1.5005, -2.1446,  4.3224],
        [-1.6517, -2.2034,  4.4236]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #3950: tensor([[-1.6419, -2.2141,  4.3521],
        [-1.6878, -2.2029,  4.4073],
        [-1.3834, -2.1854,  4.2922],
        [-1.7602, -2.1390,  4.2521],
        [-1.5133, -2.2683,  4.3622],
        [-1.5526, -2.2383,  4.1474],
        [-1.6208, -2.1802,  4.2714],
        [-1.5832, -2.2331,  4.4831],
        [-1.8077, -2.0417,  4.4181],
        [-1.7104, -2.1050,  4.3719],
        [-1.7219, -2.2247,  4.4229],
        [-1.5021, -2.2123,  4.2376]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4000: tensor([[-1.5991, -2.2668,  4.3458],
        [-1.6366, -2.2748,  4.4838],
        [-1.8844, -2.1613,  4.5120],
        [-1.7552, -2.1622,  4.2130],
        [-1.6165, -2.0056,  4.4253],
        [-1.7498, -2.1610,  4.3694],
        [-1.7510, -2.2043,  4.4346],
        [-1.6816, -1.9990,  4.3138],
        [-1.6143, -2.1755,  4.4568],
        [-1.8760, -2.1259,  4.4489],
        [-1.5827, -2.1116,  4.3648],
        [-1.7083, -2.3418,  4.4083]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4050: tensor([[-1.7682, -2.2264,  4.4576],
        [-1.8027, -2.1484,  4.3423],
        [-1.6938, -2.2333,  4.2383],
        [-1.5013, -2.1370,  4.4137],
        [-1.7287, -2.1090,  4.5207],
        [-1.7151, -2.1934,  4.3973],
        [-1.5289, -2.2471,  4.2119],
        [-1.7799, -2.0335,  4.4113],
        [-1.7404, -2.1875,  4.2280],
        [-1.4528, -2.2306,  4.4162],
        [-1.6187, -2.1323,  4.4946],
        [-1.7560, -2.1188,  4.4652]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4100: tensor([[-1.5981, -2.1782,  4.3800],
        [-1.7309, -2.1946,  4.2601],
        [-1.6332, -2.1688,  4.4797],
        [-1.2273, -2.2197,  4.1611],
        [-1.6506, -2.2557,  4.5419],
        [-1.6352, -2.2874,  4.2748],
        [-1.5984, -2.1744,  4.4478],
        [-1.8860, -1.8711,  4.3251],
        [-1.7494, -2.1710,  4.3436],
        [-1.6796, -2.2537,  4.4014],
        [-1.6209, -2.1841,  4.4882],
        [-1.6484, -2.2322,  4.4676]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4150: tensor([[-1.7228, -1.9022,  4.2911],
        [-1.7650, -2.1809,  4.5167],
        [-1.7573, -2.1440,  4.4193],
        [-1.4744, -2.2983,  4.2540],
        [-1.6352, -2.2997,  4.5865],
        [-1.5964, -2.1295,  4.3338],
        [-1.5324, -2.0947,  4.2536],
        [-1.6013, -2.1251,  4.4588],
        [-1.5338, -2.2900,  4.4361],
        [-1.5835, -2.2235,  4.3754],
        [-1.4887, -2.0568,  4.2441],
        [-1.5024, -2.0877,  4.4789]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4200: tensor([[-1.7712, -2.0042,  4.4397],
        [-1.6977, -2.1705,  4.3967],
        [-1.5638, -2.1877,  4.1847],
        [-1.5341, -2.1789,  4.5094],
        [-1.5576, -2.1984,  4.5094],
        [-1.4851, -1.9899,  4.2157],
        [-1.5488, -2.3001,  4.3960],
        [-1.6398, -2.1462,  4.5370],
        [-1.6613, -2.1406,  4.2315],
        [-1.7493, -2.1231,  4.5543],
        [-1.6622, -2.0955,  4.2808],
        [-1.8349, -2.2369,  4.4589]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4250: tensor([[-1.6441, -2.1522,  4.4423],
        [-1.7391, -2.2610,  4.4054],
        [-1.6916, -2.0865,  4.4046],
        [-1.6874, -2.1440,  4.2770],
        [-1.7379, -2.1566,  4.2913],
        [-1.5028, -2.3160,  4.4021],
        [-1.6272, -2.2278,  4.5404],
        [-1.5544, -2.3268,  4.3528],
        [-1.4784, -2.2836,  4.2011],
        [-1.3851, -2.1320,  4.2096],
        [-1.4236, -2.0751,  4.1623],
        [-1.6449, -2.0223,  4.4087]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4300: tensor([[-1.5715, -2.0608,  4.2599],
        [-1.5940, -2.2829,  4.1204],
        [-1.7252, -2.1674,  4.1223],
        [-1.7617, -2.2412,  4.2814],
        [-1.4771, -2.2636,  4.3849],
        [-1.4410, -2.2473,  4.3495],
        [-1.7982, -2.2316,  4.3102],
        [-1.4454, -2.2063,  4.2387],
        [-1.5267, -2.4145,  4.2874],
        [-1.3270, -2.3327,  4.2818],
        [-1.6472, -2.3286,  4.2608],
        [-1.5071, -2.4476,  4.2192]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4350: tensor([[-1.5976, -2.1636,  4.0881],
        [-1.8119, -1.9658,  4.4453],
        [-1.8239, -2.0961,  4.4370],
        [-1.8447, -2.1402,  4.3864],
        [-1.7540, -1.9718,  4.3060],
        [-1.4058, -2.1815,  4.2234],
        [-1.6178, -2.1319,  4.3605],
        [-1.5270, -2.3719,  4.4683],
        [-1.7653, -1.9649,  4.3030],
        [-1.5300, -2.1236,  4.3781],
        [-1.5204, -2.3080,  4.3870],
        [-1.5237, -2.1127,  4.3635]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4400: tensor([[-1.6996, -2.1644,  4.3681],
        [-1.7106, -2.1552,  4.4239],
        [-1.4296, -2.3243,  4.4050],
        [-1.6505, -2.0688,  4.3018],
        [-1.7631, -2.1667,  4.4644],
        [-1.6784, -2.0923,  4.4589],
        [-1.5458, -2.1588,  4.1034],
        [-1.7962, -2.0523,  4.3894],
        [-1.7787, -2.1512,  4.3071],
        [-1.4754, -2.3053,  4.3723],
        [-1.4699, -2.1525,  4.2252],
        [-1.4367, -2.2400,  4.4307]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4450: tensor([[-1.6696, -2.2096,  4.3803],
        [-1.6125, -2.1736,  4.3752],
        [-1.6229, -1.9995,  4.1837],
        [-1.6226, -2.3142,  4.3494],
        [-1.3093, -2.3814,  4.0322],
        [-1.4566, -2.1318,  4.2756],
        [-1.5161, -2.1240,  4.4062],
        [-1.4902, -2.1570,  4.1392],
        [-1.4102, -2.2247,  4.5153],
        [-1.6283, -2.1855,  4.3334],
        [-1.6705, -2.1404,  4.5613],
        [-1.6278, -2.0952,  3.9383]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4500: tensor([[-1.4834, -2.0558,  4.3604],
        [-1.4450, -2.0811,  4.3119],
        [-1.4395, -2.4098,  4.4139],
        [-1.5775, -2.2506,  4.2828],
        [-1.5489, -2.3537,  4.2300],
        [-1.5094, -2.0165,  4.1522],
        [-1.5534, -2.3041,  4.4753],
        [-1.6391, -2.2481,  4.4704],
        [-1.5555, -2.1675,  4.3189],
        [-1.5801, -2.3078,  4.3245],
        [-1.5359, -2.3605,  4.3083],
        [-1.6134, -2.1488,  4.5434]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4550: tensor([[-1.5545, -2.2435,  4.3351],
        [-1.5972, -2.2576,  4.4965],
        [-1.6570, -2.1378,  4.4080],
        [-1.2774, -2.2570,  4.3700],
        [-1.4830, -2.1346,  4.2744],
        [-1.5813, -2.0902,  4.3161],
        [-1.6608, -2.2029,  4.4148],
        [-1.8162, -1.9237,  4.3047],
        [-1.4399, -2.2305,  4.3130],
        [-1.4632, -2.1195,  4.1526],
        [-1.3550, -2.2501,  4.3776],
        [-1.6976, -2.1061,  4.2186]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4600: tensor([[-1.4764, -2.2570,  4.2911],
        [-1.5661, -2.1957,  4.2987],
        [-1.4618, -2.1358,  4.2971],
        [-1.6076, -2.3554,  4.0658],
        [-1.5727, -2.2486,  4.3526],
        [-1.5094, -2.1443,  4.4758],
        [-1.4873, -2.3175,  4.2655],
        [-1.5164, -2.2848,  4.4974],
        [-1.4915, -2.1876,  4.2763],
        [-1.5418, -2.2229,  4.0510],
        [-1.6630, -2.1916,  4.3957],
        [-1.4366, -2.3019,  4.2011]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4650: tensor([[-1.5751, -2.1319,  4.1745],
        [-1.2835, -2.2071,  3.7320],
        [-1.5920, -2.2207,  4.3131],
        [-1.5014, -2.3457,  4.2420],
        [-1.5678, -2.1479,  4.4700],
        [-1.6407, -2.2035,  4.3340],
        [-1.3713, -2.2971,  4.1965],
        [-1.7172, -2.2668,  4.3231],
        [-1.6329, -2.1860,  4.1888],
        [-1.4105, -2.1806,  4.1028],
        [-1.5962, -2.1993,  4.3086],
        [-1.4713, -2.2211,  4.0886]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4700: tensor([[-1.5581, -2.2261,  3.9346],
        [-1.8037, -2.2755,  4.2359],
        [-1.7122, -1.9892,  4.1079],
        [-1.4774, -2.0694,  4.3412],
        [-1.4747, -2.3198,  4.2996],
        [-1.6653, -2.0534,  4.3509],
        [-1.3824, -2.1975,  4.1914],
        [-1.5451, -2.0479,  4.2673],
        [-1.3102, -2.1804,  4.2348],
        [-1.6504, -2.0599,  4.4493],
        [-1.6235, -2.2521,  4.4685],
        [-1.4282, -2.0816,  4.4137]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4750: tensor([[-1.1521, -2.1199,  4.2762],
        [-1.3659, -2.2914,  4.3338],
        [-1.3654, -2.4052,  4.3405],
        [-1.5575, -2.3073,  4.1940],
        [-1.4157, -2.3278,  4.1741],
        [-1.5602, -2.0654,  4.1725],
        [-1.7240, -2.0568,  4.2481],
        [-1.3234, -2.3199,  4.1834],
        [-1.6597, -2.1375,  4.3686],
        [-1.3739, -2.3034,  4.2948],
        [-1.6145, -2.3250,  4.4069],
        [-1.5477, -2.0924,  4.3404]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4800: tensor([[-1.3454, -2.4040,  4.2912],
        [-1.3396, -2.3765,  4.0907],
        [-1.7009, -2.0067,  4.4548],
        [-1.3402, -2.2697,  4.2751],
        [-1.5258, -2.3163,  4.3104],
        [-1.7368, -2.0985,  4.3930],
        [-1.6250, -2.2088,  4.3383],
        [-1.6747, -2.2738,  4.2899],
        [-1.5060, -2.2259,  4.0829],
        [-1.5991, -2.2537,  4.1855],
        [-1.3498, -2.0932,  4.2121],
        [-1.5239, -2.2304,  4.2421]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4850: tensor([[-1.3384, -2.3192,  4.1125],
        [-1.5746, -2.3158,  4.2853],
        [-1.4935, -2.2452,  4.2791],
        [-1.6782, -2.0646,  4.5189],
        [-1.6168, -1.9577,  4.1756],
        [-1.6358, -2.1685,  4.4349],
        [-1.5864, -2.1806,  4.5041],
        [-1.5265, -2.2170,  4.1236],
        [-1.6301, -2.2176,  4.2383],
        [-1.4653, -2.3858,  4.2835],
        [-1.3521, -2.2721,  4.4186],
        [-1.5579, -2.2014,  4.2458]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4900: tensor([[-1.4975, -2.3325,  4.4233],
        [-1.5657, -2.3365,  4.1828],
        [-1.5872, -2.0098,  4.2725],
        [-1.6642, -2.2614,  4.2441],
        [-1.5209, -2.3453,  4.2274],
        [-1.4711, -2.2042,  4.3361],
        [-1.3436, -2.3739,  4.2459],
        [-1.4773, -2.2146,  4.2996],
        [-1.4089, -2.1908,  4.3191],
        [-1.4113, -2.3503,  4.4346],
        [-1.5406, -2.2755,  4.1241],
        [-1.5234, -2.3193,  4.1125]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #4950: tensor([[-1.3823, -2.2393,  4.0869],
        [-1.4231, -2.3776,  4.2582],
        [-1.2734, -2.3244,  4.1782],
        [-1.5428, -2.3193,  4.3268],
        [-1.5047, -2.2050,  4.2808],
        [-1.4933, -2.2569,  4.2094],
        [-1.5968, -2.0798,  4.2775],
        [-1.5499, -2.2056,  4.3321],
        [-1.6095, -2.2965,  4.1284],
        [-1.4491, -2.3499,  4.0862],
        [-1.6368, -2.1221,  4.2566],
        [-1.6326, -2.2705,  4.1691]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5000: tensor([[-1.4799, -2.1112,  4.2150],
        [-1.5414, -2.1640,  4.4437],
        [-1.3231, -2.2896,  4.2636],
        [-1.3407, -2.2272,  4.1906],
        [-1.4507, -2.2804,  4.2873],
        [-1.5439, -2.1057,  4.1272],
        [-1.2972, -2.2429,  4.3229],
        [-1.5760, -2.1764,  4.1854],
        [-1.4596, -2.2921,  4.1189],
        [-1.3763, -2.3587,  4.1504],
        [-1.6469, -2.1865,  4.2817],
        [-1.3238, -2.2633,  4.2042]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5050: tensor([[-1.5180, -2.1759,  4.3336],
        [-1.3320, -2.0946,  4.2636],
        [-1.6425, -2.3328,  4.3273],
        [-1.5594, -2.2967,  4.1755],
        [-1.5608, -2.3121,  4.1833],
        [-1.4256, -2.4347,  4.1427],
        [-1.2047, -2.2071,  4.2827],
        [-1.4108, -2.2207,  3.8736],
        [-1.5468, -2.1945,  4.3355],
        [-1.3077, -2.2744,  4.1872],
        [-1.3160, -2.2167,  4.2561],
        [-1.4671, -2.2406,  4.1372]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5100: tensor([[-1.5021, -2.1476,  4.1886],
        [-1.4968, -2.3618,  4.2407],
        [-1.3484, -2.3426,  4.2753],
        [-1.4765, -2.2078,  4.2034],
        [-1.1918, -2.2730,  3.9134],
        [-1.3640, -2.3477,  4.0673],
        [-1.4864, -2.0563,  4.1824],
        [-1.3584, -2.3154,  4.3527],
        [-1.3783, -2.1212,  4.1420],
        [-1.3176, -2.2875,  4.0333],
        [-1.5241, -2.3031,  4.1890],
        [-1.4512, -2.1816,  4.3202]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5150: tensor([[-1.4929, -2.2552,  4.2401],
        [-1.3125, -2.3861,  4.1776],
        [-1.4008, -2.3405,  4.1729],
        [-1.3264, -2.2148,  3.9964],
        [-1.4510, -2.2144,  4.2507],
        [-1.3589, -2.3842,  4.2232],
        [-1.3159, -2.1304,  3.9788],
        [-1.1630, -2.3127,  4.0301],
        [-1.3485, -2.3371,  4.0259],
        [-1.3936, -2.1719,  4.1028],
        [-1.4833, -2.3269,  4.2252],
        [-1.4755, -2.1526,  4.1255]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5200: tensor([[-1.3689, -2.2916,  4.1528],
        [-1.5138, -2.3061,  4.1280],
        [-1.3986, -2.3572,  4.2321],
        [-1.1284, -2.3026,  3.8316],
        [-1.2760, -2.4228,  4.1794],
        [-1.5167, -2.2419,  4.1243],
        [-1.3727, -2.2387,  4.1725],
        [-1.4404, -2.3246,  4.2271],
        [-1.4675, -2.2922,  4.3708],
        [-1.5224, -2.2713,  4.0486],
        [-1.2092, -2.3296,  4.2943],
        [-1.4196, -2.3149,  4.1955]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5250: tensor([[-1.2835, -2.2768,  4.3193],
        [-1.4037, -2.0363,  4.3247],
        [-1.3603, -2.3222,  4.1492],
        [-1.4461, -2.2537,  4.2997],
        [-1.2863, -2.3510,  4.2815],
        [-1.5209, -2.0508,  4.2014],
        [-1.4705, -2.3046,  4.2935],
        [-1.4027, -2.2193,  4.0952],
        [-1.4340, -2.3121,  4.1875],
        [-1.4325, -2.0797,  4.1136],
        [-1.2100, -2.3471,  4.1847],
        [-1.4095, -2.3602,  4.1366]], device='cuda:0', grad_fn=<AddmmBackward>)
Iter #5300: tensor([[-1.3096, -2.3944,  4.1374],
        [-1.3550, -2.3021,  4.2173],
        [-1.1461, -2.3307,  4.1097],
        [-1.5822, -2.1053,  4.1815],
        [-1.2899, -2.3189,  4.2493],
        [-1.2948, -2.3822,  4.1843],
        [-1.3866, -2.1401,  4.1190],
        [-1.3352, -2.3691,  4.0681],
        [-1.4445, -2.0735,  4.3051],
        [-1.4691, -2.3688,  4.1317],
        [-1.1853, -2.3160,  4.1135],
        [-1.5634, -2.2074,  4.2145]], device='cuda:0', grad_fn=<AddmmBackward>)
