Iter #50: tensor([[0.7371, 0.1149, 0.1481],
        [0.5518, 0.1951, 0.2532],
        [0.5627, 0.2311, 0.2062],
        [0.4978, 0.2498, 0.2524],
        [0.4565, 0.2629, 0.2806],
        [0.5757, 0.1817, 0.2425],
        [0.5855, 0.2260, 0.1885],
        [0.3794, 0.2821, 0.3385],
        [0.3845, 0.2864, 0.3291],
        [0.2948, 0.2949, 0.4103],
        [0.1087, 0.4746, 0.4166],
        [0.2053, 0.3847, 0.4100]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0237, 0.7629, 0.2135],
        [0.0100, 0.4457, 0.5443],
        [0.0091, 0.4848, 0.5061],
        [0.0076, 0.2350, 0.7574],
        [0.0069, 0.2348, 0.7583],
        [0.0067, 0.2036, 0.7897],
        [0.0074, 0.3076, 0.6850],
        [0.0071, 0.3164, 0.6765],
        [0.0080, 0.3032, 0.6888],
        [0.0077, 0.2863, 0.7060],
        [0.0103, 0.4953, 0.4944],
        [0.0119, 0.4769, 0.5111]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0026, 0.0483, 0.9491],
        [0.0029, 0.0437, 0.9534],
        [0.0032, 0.0458, 0.9510],
        [0.0029, 0.0338, 0.9633],
        [0.0036, 0.0541, 0.9423],
        [0.0036, 0.0588, 0.9376],
        [0.0029, 0.0456, 0.9515],
        [0.0031, 0.0391, 0.9579],
        [0.0042, 0.0543, 0.9415],
        [0.0027, 0.0414, 0.9560],
        [0.0033, 0.0514, 0.9453],
        [0.0044, 0.0500, 0.9456]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.0034, 0.0481, 0.9485],
        [0.0030, 0.0340, 0.9630],
        [0.0028, 0.0401, 0.9571],
        [0.0022, 0.0339, 0.9638],
        [0.0028, 0.0379, 0.9594],
        [0.0029, 0.0427, 0.9545],
        [0.0028, 0.0444, 0.9528],
        [0.0025, 0.0374, 0.9601],
        [0.0028, 0.0457, 0.9515],
        [0.0027, 0.0388, 0.9585],
        [0.0026, 0.0393, 0.9581],
        [0.0031, 0.0349, 0.9620]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0030, 0.0524, 0.9446],
        [0.0031, 0.0513, 0.9455],
        [0.0026, 0.0444, 0.9530],
        [0.0026, 0.0377, 0.9596],
        [0.0030, 0.0547, 0.9422],
        [0.0027, 0.0395, 0.9578],
        [0.0032, 0.0489, 0.9479],
        [0.0026, 0.0429, 0.9545],
        [0.0027, 0.0420, 0.9553],
        [0.0024, 0.0414, 0.9562],
        [0.0027, 0.0492, 0.9481],
        [0.0026, 0.0395, 0.9580]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.0029, 0.0548, 0.9423],
        [0.0035, 0.0610, 0.9355],
        [0.0033, 0.0594, 0.9374],
        [0.0026, 0.0501, 0.9473],
        [0.0027, 0.0467, 0.9506],
        [0.0029, 0.0574, 0.9397],
        [0.0028, 0.0496, 0.9476],
        [0.0032, 0.0437, 0.9531],
        [0.0031, 0.0564, 0.9405],
        [0.0030, 0.0575, 0.9395],
        [0.0023, 0.0490, 0.9487],
        [0.0028, 0.0558, 0.9414]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.0028, 0.0595, 0.9377],
        [0.0029, 0.0542, 0.9428],
        [0.0031, 0.0625, 0.9344],
        [0.0031, 0.0558, 0.9411],
        [0.0040, 0.0803, 0.9156],
        [0.0035, 0.0608, 0.9357],
        [0.0028, 0.0625, 0.9347],
        [0.0028, 0.0578, 0.9393],
        [0.0035, 0.0706, 0.9259],
        [0.0027, 0.0603, 0.9370],
        [0.0031, 0.0680, 0.9288],
        [0.0028, 0.0630, 0.9342]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0036, 0.0934, 0.9030],
        [0.0037, 0.0880, 0.9083],
        [0.0032, 0.0913, 0.9055],
        [0.0032, 0.0865, 0.9103],
        [0.0037, 0.0779, 0.9185],
        [0.0027, 0.0776, 0.9198],
        [0.0034, 0.0884, 0.9082],
        [0.0029, 0.0903, 0.9069],
        [0.0034, 0.0957, 0.9009],
        [0.0044, 0.1019, 0.8937],
        [0.0032, 0.0995, 0.8973],
        [0.0031, 0.0985, 0.8984]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0035, 0.1475, 0.8490],
        [0.0046, 0.1171, 0.8784],
        [0.0048, 0.1712, 0.8240],
        [0.0035, 0.1256, 0.8709],
        [0.0032, 0.1429, 0.8540],
        [0.0038, 0.1514, 0.8449],
        [0.0041, 0.1340, 0.8619],
        [0.0037, 0.1210, 0.8753],
        [0.0035, 0.1206, 0.8759],
        [0.0033, 0.1244, 0.8723],
        [0.0037, 0.1237, 0.8726],
        [0.0036, 0.1273, 0.8691]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0053, 0.3938, 0.6009],
        [0.0046, 0.3454, 0.6500],
        [0.0043, 0.3695, 0.6262],
        [0.0054, 0.3660, 0.6286],
        [0.0048, 0.3309, 0.6643],
        [0.0043, 0.3687, 0.6270],
        [0.0045, 0.3611, 0.6344],
        [0.0052, 0.3469, 0.6479],
        [0.0050, 0.3896, 0.6054],
        [0.0049, 0.4475, 0.5476],
        [0.0044, 0.3717, 0.6239],
        [0.0059, 0.3539, 0.6402]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.8493, 0.1467, 0.0040],
        [0.8651, 0.1305, 0.0044],
        [0.9049, 0.0913, 0.0038],
        [0.8912, 0.1055, 0.0034],
        [0.6020, 0.3912, 0.0068],
        [0.6669, 0.3279, 0.0052],
        [0.7720, 0.2236, 0.0044],
        [0.4113, 0.5805, 0.0082],
        [0.0079, 0.8217, 0.1704],
        [0.9050, 0.0915, 0.0035],
        [0.8376, 0.1580, 0.0043],
        [0.9151, 0.0821, 0.0027]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #600: tensor([[0.9768, 0.0214, 0.0018],
        [0.9706, 0.0274, 0.0020],
        [0.9757, 0.0223, 0.0020],
        [0.9712, 0.0267, 0.0021],
        [0.9739, 0.0239, 0.0022],
        [0.9722, 0.0254, 0.0024],
        [0.9733, 0.0249, 0.0018],
        [0.9748, 0.0231, 0.0021],
        [0.9774, 0.0200, 0.0025],
        [0.9748, 0.0228, 0.0025],
        [0.9776, 0.0205, 0.0020],
        [0.9754, 0.0224, 0.0022]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #650: tensor([[0.9709, 0.0274, 0.0017],
        [0.9698, 0.0279, 0.0024],
        [0.9690, 0.0285, 0.0025],
        [0.9775, 0.0202, 0.0023],
        [0.9670, 0.0304, 0.0026],
        [0.9753, 0.0227, 0.0019],
        [0.9689, 0.0289, 0.0023],
        [0.9712, 0.0265, 0.0023],
        [0.9740, 0.0242, 0.0018],
        [0.9707, 0.0268, 0.0025],
        [0.9750, 0.0232, 0.0018],
        [0.9716, 0.0262, 0.0022]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #700: tensor([[0.9723, 0.0251, 0.0025],
        [0.9774, 0.0208, 0.0018],
        [0.9655, 0.0317, 0.0029],
        [0.9713, 0.0271, 0.0016],
        [0.9715, 0.0269, 0.0017],
        [0.9681, 0.0298, 0.0022],
        [0.9746, 0.0233, 0.0020],
        [0.9763, 0.0219, 0.0018],
        [0.9709, 0.0273, 0.0018],
        [0.9719, 0.0263, 0.0018],
        [0.9710, 0.0267, 0.0022],
        [0.9658, 0.0322, 0.0020]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #750: tensor([[0.9678, 0.0303, 0.0018],
        [0.9686, 0.0293, 0.0021],
        [0.9694, 0.0283, 0.0023],
        [0.9735, 0.0240, 0.0024],
        [0.9729, 0.0251, 0.0020],
        [0.9690, 0.0292, 0.0018],
        [0.9605, 0.0369, 0.0026],
        [0.9726, 0.0254, 0.0020],
        [0.9689, 0.0283, 0.0027],
        [0.9714, 0.0267, 0.0019],
        [0.9553, 0.0419, 0.0027],
        [0.9597, 0.0383, 0.0020]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #800: tensor([[0.9704, 0.0278, 0.0018],
        [0.9483, 0.0494, 0.0023],
        [0.9688, 0.0293, 0.0019],
        [0.9672, 0.0309, 0.0019],
        [0.9682, 0.0302, 0.0016],
        [0.9586, 0.0389, 0.0025],
        [0.9705, 0.0273, 0.0022],
        [0.9408, 0.0561, 0.0031],
        [0.9554, 0.0424, 0.0022],
        [0.9327, 0.0638, 0.0035],
        [0.9666, 0.0315, 0.0019],
        [0.9599, 0.0380, 0.0022]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #850: tensor([[0.7724, 0.2233, 0.0044],
        [0.8840, 0.1128, 0.0032],
        [0.9473, 0.0500, 0.0027],
        [0.9506, 0.0465, 0.0029],
        [0.9434, 0.0540, 0.0026],
        [0.9502, 0.0475, 0.0024],
        [0.9497, 0.0477, 0.0026],
        [0.9601, 0.0374, 0.0025],
        [0.9598, 0.0381, 0.0021],
        [0.9496, 0.0483, 0.0021],
        [0.9462, 0.0517, 0.0022],
        [0.9569, 0.0411, 0.0021]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #900: tensor([[0.9409, 0.0566, 0.0025],
        [0.9191, 0.0783, 0.0026],
        [0.9337, 0.0640, 0.0023],
        [0.9370, 0.0610, 0.0021],
        [0.9380, 0.0598, 0.0022],
        [0.9437, 0.0539, 0.0024],
        [0.9463, 0.0516, 0.0021],
        [0.9415, 0.0559, 0.0027],
        [0.9367, 0.0612, 0.0021],
        [0.9375, 0.0604, 0.0021],
        [0.9322, 0.0653, 0.0025],
        [0.9325, 0.0656, 0.0019]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #950: tensor([[0.7508, 0.2459, 0.0033],
        [0.5087, 0.4872, 0.0041],
        [0.2582, 0.7342, 0.0076],
        [0.4585, 0.5372, 0.0043],
        [0.6046, 0.3925, 0.0029],
        [0.0019, 0.0318, 0.9663],
        [0.0023, 0.0392, 0.9585],
        [0.0021, 0.0319, 0.9660],
        [0.5390, 0.4563, 0.0047],
        [0.0202, 0.9715, 0.0083],
        [0.3728, 0.6229, 0.0043],
        [0.0041, 0.6869, 0.3089]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1000: tensor([[1.2933e-03, 1.7849e-02, 9.8086e-01],
        [1.3530e-03, 1.6931e-02, 9.8172e-01],
        [1.1461e-03, 1.4011e-02, 9.8484e-01],
        [1.0848e-03, 1.7831e-02, 9.8108e-01],
        [1.3550e-03, 1.7455e-02, 9.8119e-01],
        [8.5658e-04, 9.8467e-03, 9.8930e-01],
        [9.0166e-04, 1.0300e-02, 9.8880e-01],
        [9.7628e-04, 1.3283e-02, 9.8574e-01],
        [9.3579e-04, 1.1964e-02, 9.8710e-01],
        [9.1533e-04, 1.4683e-02, 9.8440e-01],
        [8.9441e-04, 1.1577e-02, 9.8753e-01],
        [8.5726e-04, 9.9935e-03, 9.8915e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1050: tensor([[9.7923e-04, 1.4085e-02, 9.8494e-01],
        [8.7420e-04, 1.1475e-02, 9.8765e-01],
        [1.0198e-03, 1.5105e-02, 9.8388e-01],
        [1.0831e-03, 1.4316e-02, 9.8460e-01],
        [1.3530e-03, 1.7006e-02, 9.8164e-01],
        [9.8800e-04, 1.1576e-02, 9.8744e-01],
        [1.1701e-03, 1.5450e-02, 9.8338e-01],
        [1.0763e-03, 1.1025e-02, 9.8790e-01],
        [9.2803e-04, 1.0549e-02, 9.8852e-01],
        [9.7274e-04, 1.0707e-02, 9.8832e-01],
        [9.4837e-04, 1.3466e-02, 9.8559e-01],
        [7.3531e-04, 1.0108e-02, 9.8916e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1100: tensor([[8.6482e-04, 1.1866e-02, 9.8727e-01],
        [1.2478e-03, 1.4160e-02, 9.8459e-01],
        [9.3600e-04, 1.0630e-02, 9.8843e-01],
        [9.8011e-04, 1.1940e-02, 9.8708e-01],
        [1.0495e-03, 1.2812e-02, 9.8614e-01],
        [8.2080e-04, 1.1839e-02, 9.8734e-01],
        [9.6466e-04, 1.3260e-02, 9.8578e-01],
        [9.6688e-04, 1.1499e-02, 9.8753e-01],
        [9.9338e-04, 1.1236e-02, 9.8777e-01],
        [8.8908e-04, 1.2478e-02, 9.8663e-01],
        [1.0308e-03, 1.2799e-02, 9.8617e-01],
        [8.3604e-04, 1.0463e-02, 9.8870e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1150: tensor([[9.1298e-04, 1.1464e-02, 9.8762e-01],
        [1.0599e-03, 1.2799e-02, 9.8614e-01],
        [7.5988e-04, 1.2503e-02, 9.8674e-01],
        [8.4077e-04, 1.2850e-02, 9.8631e-01],
        [1.0366e-03, 1.4099e-02, 9.8486e-01],
        [9.0992e-04, 1.1251e-02, 9.8784e-01],
        [8.0681e-04, 1.2621e-02, 9.8657e-01],
        [9.6782e-04, 1.4153e-02, 9.8488e-01],
        [1.1300e-03, 1.5037e-02, 9.8383e-01],
        [1.3769e-03, 1.4722e-02, 9.8390e-01],
        [8.5417e-04, 1.0715e-02, 9.8843e-01],
        [1.0323e-03, 1.0953e-02, 9.8801e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1200: tensor([[1.5147e-03, 1.9028e-02, 9.7946e-01],
        [1.1080e-03, 1.5089e-02, 9.8380e-01],
        [8.5966e-04, 1.4819e-02, 9.8432e-01],
        [9.9396e-04, 1.2322e-02, 9.8668e-01],
        [1.1482e-03, 1.7169e-02, 9.8168e-01],
        [1.0827e-03, 1.2378e-02, 9.8654e-01],
        [8.6097e-04, 1.2482e-02, 9.8666e-01],
        [8.9542e-04, 1.2053e-02, 9.8705e-01],
        [1.0684e-03, 1.0640e-02, 9.8829e-01],
        [1.1051e-03, 1.3226e-02, 9.8567e-01],
        [9.2298e-04, 1.2233e-02, 9.8684e-01],
        [9.4766e-04, 1.1954e-02, 9.8710e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1250: tensor([[1.0324e-03, 1.2350e-02, 9.8662e-01],
        [8.5180e-04, 1.3074e-02, 9.8607e-01],
        [9.2013e-04, 1.2377e-02, 9.8670e-01],
        [1.1179e-03, 1.4300e-02, 9.8458e-01],
        [8.3458e-04, 1.1725e-02, 9.8744e-01],
        [9.3503e-04, 1.2766e-02, 9.8630e-01],
        [1.1400e-03, 1.2803e-02, 9.8606e-01],
        [8.9843e-04, 1.5446e-02, 9.8366e-01],
        [9.8108e-04, 1.0793e-02, 9.8823e-01],
        [8.1524e-04, 1.1995e-02, 9.8719e-01],
        [9.9229e-04, 1.5614e-02, 9.8339e-01],
        [1.3329e-03, 1.8442e-02, 9.8023e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1300: tensor([[8.8375e-04, 1.1664e-02, 9.8745e-01],
        [1.1375e-03, 1.4274e-02, 9.8459e-01],
        [8.3848e-04, 1.2223e-02, 9.8694e-01],
        [1.0546e-03, 1.2932e-02, 9.8601e-01],
        [1.1073e-03, 1.5093e-02, 9.8380e-01],
        [9.1901e-04, 1.3111e-02, 9.8597e-01],
        [9.7139e-04, 1.2949e-02, 9.8608e-01],
        [1.4542e-03, 1.9940e-02, 9.7861e-01],
        [9.0148e-04, 1.4970e-02, 9.8413e-01],
        [1.0392e-03, 1.6572e-02, 9.8239e-01],
        [1.3140e-03, 1.5966e-02, 9.8272e-01],
        [1.0924e-03, 1.5600e-02, 9.8331e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1350: tensor([[8.8526e-04, 1.3080e-02, 9.8604e-01],
        [1.0310e-03, 1.3265e-02, 9.8570e-01],
        [9.0423e-04, 1.2842e-02, 9.8625e-01],
        [9.4329e-04, 1.4153e-02, 9.8490e-01],
        [7.8816e-04, 1.3626e-02, 9.8559e-01],
        [1.3365e-03, 1.7479e-02, 9.8118e-01],
        [1.0389e-03, 1.5380e-02, 9.8358e-01],
        [1.0360e-03, 1.3466e-02, 9.8550e-01],
        [9.7761e-04, 1.3599e-02, 9.8542e-01],
        [9.1257e-04, 1.3023e-02, 9.8606e-01],
        [1.0633e-03, 1.5068e-02, 9.8387e-01],
        [9.2148e-04, 1.3704e-02, 9.8537e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1400: tensor([[1.0057e-03, 1.3336e-02, 9.8566e-01],
        [1.2437e-03, 1.7956e-02, 9.8080e-01],
        [8.8340e-04, 1.1462e-02, 9.8765e-01],
        [1.3264e-03, 2.1265e-02, 9.7741e-01],
        [8.9228e-04, 1.3260e-02, 9.8585e-01],
        [1.0448e-03, 1.3733e-02, 9.8522e-01],
        [1.1255e-03, 1.6342e-02, 9.8253e-01],
        [1.0471e-03, 1.5077e-02, 9.8388e-01],
        [1.0730e-03, 1.5570e-02, 9.8336e-01],
        [8.0253e-04, 1.2870e-02, 9.8633e-01],
        [1.0768e-03, 1.7597e-02, 9.8133e-01],
        [1.1126e-03, 1.9450e-02, 9.7944e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1450: tensor([[7.9659e-04, 1.5082e-02, 9.8412e-01],
        [8.8861e-04, 1.4488e-02, 9.8462e-01],
        [1.1560e-03, 1.6761e-02, 9.8208e-01],
        [1.2842e-03, 2.0076e-02, 9.7864e-01],
        [1.1177e-03, 1.5979e-02, 9.8290e-01],
        [1.2341e-03, 1.5711e-02, 9.8305e-01],
        [1.2954e-03, 1.8310e-02, 9.8039e-01],
        [1.0815e-03, 1.5877e-02, 9.8304e-01],
        [1.0682e-03, 1.6213e-02, 9.8272e-01],
        [1.0669e-03, 1.3810e-02, 9.8512e-01],
        [9.3821e-04, 1.4033e-02, 9.8503e-01],
        [1.4082e-03, 1.5113e-02, 9.8348e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1500: tensor([[9.3585e-04, 1.6534e-02, 9.8253e-01],
        [1.4008e-03, 1.8387e-02, 9.8021e-01],
        [1.0684e-03, 1.5321e-02, 9.8361e-01],
        [1.1273e-03, 1.6458e-02, 9.8242e-01],
        [9.8637e-04, 1.3495e-02, 9.8552e-01],
        [1.0543e-03, 1.7507e-02, 9.8144e-01],
        [1.1627e-03, 1.4405e-02, 9.8443e-01],
        [1.1730e-03, 1.8729e-02, 9.8010e-01],
        [9.1056e-04, 1.7586e-02, 9.8150e-01],
        [9.0784e-04, 1.6954e-02, 9.8214e-01],
        [8.8527e-04, 1.2410e-02, 9.8670e-01],
        [1.0003e-03, 1.6656e-02, 9.8234e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1550: tensor([[1.0804e-03, 1.5179e-02, 9.8374e-01],
        [1.1604e-03, 1.4708e-02, 9.8413e-01],
        [1.1577e-03, 1.6546e-02, 9.8230e-01],
        [9.5231e-04, 1.5339e-02, 9.8371e-01],
        [1.0268e-03, 1.5668e-02, 9.8331e-01],
        [9.3847e-04, 1.6550e-02, 9.8251e-01],
        [9.4035e-04, 1.4162e-02, 9.8490e-01],
        [7.2968e-04, 1.3139e-02, 9.8613e-01],
        [1.0272e-03, 1.8275e-02, 9.8070e-01],
        [8.8416e-04, 1.4468e-02, 9.8465e-01],
        [1.0273e-03, 1.3905e-02, 9.8507e-01],
        [9.7837e-04, 1.3081e-02, 9.8594e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1600: tensor([[1.1493e-03, 2.0437e-02, 9.7841e-01],
        [1.2728e-03, 1.5548e-02, 9.8318e-01],
        [1.2679e-03, 1.9285e-02, 9.7945e-01],
        [9.9546e-04, 1.7855e-02, 9.8115e-01],
        [8.1115e-04, 1.4263e-02, 9.8493e-01],
        [1.2343e-03, 1.7956e-02, 9.8081e-01],
        [8.5437e-04, 1.4139e-02, 9.8501e-01],
        [1.1959e-03, 1.5580e-02, 9.8322e-01],
        [1.3091e-03, 1.6359e-02, 9.8233e-01],
        [1.2416e-03, 2.2687e-02, 9.7607e-01],
        [1.0516e-03, 1.4951e-02, 9.8400e-01],
        [9.5443e-04, 1.6394e-02, 9.8265e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1650: tensor([[1.0570e-03, 1.9147e-02, 9.7980e-01],
        [1.0091e-03, 1.8180e-02, 9.8081e-01],
        [9.5166e-04, 1.5069e-02, 9.8398e-01],
        [1.1157e-03, 1.7647e-02, 9.8124e-01],
        [1.1563e-03, 1.8746e-02, 9.8010e-01],
        [1.2551e-03, 2.2469e-02, 9.7628e-01],
        [1.2836e-03, 2.3358e-02, 9.7536e-01],
        [1.0408e-03, 1.8834e-02, 9.8013e-01],
        [1.0243e-03, 1.5465e-02, 9.8351e-01],
        [1.0081e-03, 1.6028e-02, 9.8296e-01],
        [1.2416e-03, 2.0919e-02, 9.7784e-01],
        [9.9371e-04, 1.6832e-02, 9.8217e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1700: tensor([[1.0220e-03, 1.9878e-02, 9.7910e-01],
        [1.5094e-03, 2.0384e-02, 9.7811e-01],
        [1.1036e-03, 1.7195e-02, 9.8170e-01],
        [1.1759e-03, 1.7495e-02, 9.8133e-01],
        [9.5787e-04, 2.1217e-02, 9.7783e-01],
        [1.2077e-03, 1.7264e-02, 9.8153e-01],
        [1.0792e-03, 1.6128e-02, 9.8279e-01],
        [1.0612e-03, 1.9804e-02, 9.7914e-01],
        [8.0311e-04, 1.5266e-02, 9.8393e-01],
        [1.2468e-03, 2.3936e-02, 9.7482e-01],
        [1.2740e-03, 2.1037e-02, 9.7769e-01],
        [1.3162e-03, 2.1340e-02, 9.7734e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1750: tensor([[1.1173e-03, 2.1913e-02, 9.7697e-01],
        [1.0023e-03, 1.7826e-02, 9.8117e-01],
        [1.0548e-03, 1.9764e-02, 9.7918e-01],
        [1.5541e-03, 2.3709e-02, 9.7474e-01],
        [1.0216e-03, 1.8961e-02, 9.8002e-01],
        [9.9082e-04, 1.9318e-02, 9.7969e-01],
        [8.7512e-04, 1.5365e-02, 9.8376e-01],
        [9.1676e-04, 1.6873e-02, 9.8221e-01],
        [9.3842e-04, 1.8017e-02, 9.8105e-01],
        [1.3456e-03, 1.7301e-02, 9.8135e-01],
        [9.7148e-04, 1.9114e-02, 9.7991e-01],
        [1.2163e-03, 2.1471e-02, 9.7731e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1800: tensor([[1.1748e-03, 1.8408e-02, 9.8042e-01],
        [1.2413e-03, 2.1421e-02, 9.7734e-01],
        [6.9221e-04, 1.7218e-02, 9.8209e-01],
        [1.0886e-03, 1.7221e-02, 9.8169e-01],
        [8.5681e-04, 1.9484e-02, 9.7966e-01],
        [1.1432e-03, 2.1271e-02, 9.7759e-01],
        [1.0900e-03, 1.7709e-02, 9.8120e-01],
        [1.1285e-03, 2.0225e-02, 9.7865e-01],
        [1.1349e-03, 2.1616e-02, 9.7725e-01],
        [8.9495e-04, 1.7914e-02, 9.8119e-01],
        [1.0005e-03, 2.3270e-02, 9.7573e-01],
        [9.3661e-04, 1.8494e-02, 9.8057e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1850: tensor([[9.9343e-04, 1.8315e-02, 9.8069e-01],
        [1.2432e-03, 2.2322e-02, 9.7643e-01],
        [1.1904e-03, 2.2453e-02, 9.7636e-01],
        [1.0035e-03, 1.7937e-02, 9.8106e-01],
        [9.5776e-04, 1.9463e-02, 9.7958e-01],
        [1.3695e-03, 3.0265e-02, 9.6837e-01],
        [8.9746e-04, 2.0295e-02, 9.7881e-01],
        [1.1937e-03, 1.8349e-02, 9.8046e-01],
        [1.3541e-03, 2.0312e-02, 9.7833e-01],
        [8.7802e-04, 1.7580e-02, 9.8154e-01],
        [1.1222e-03, 2.1209e-02, 9.7767e-01],
        [1.4229e-03, 2.8678e-02, 9.6990e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #1900: tensor([[0.0010, 0.0241, 0.9748],
        [0.0012, 0.0228, 0.9759],
        [0.0010, 0.0198, 0.9792],
        [0.0011, 0.0239, 0.9750],
        [0.0011, 0.0233, 0.9755],
        [0.0011, 0.0235, 0.9754],
        [0.0010, 0.0207, 0.9783],
        [0.0014, 0.0247, 0.9739],
        [0.0013, 0.0200, 0.9787],
        [0.0013, 0.0270, 0.9717],
        [0.0014, 0.0251, 0.9735],
        [0.0012, 0.0204, 0.9784]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1950: tensor([[0.0010, 0.0269, 0.9720],
        [0.0013, 0.0281, 0.9706],
        [0.0013, 0.0218, 0.9769],
        [0.0012, 0.0259, 0.9729],
        [0.0011, 0.0231, 0.9759],
        [0.0013, 0.0278, 0.9709],
        [0.0011, 0.0254, 0.9735],
        [0.0011, 0.0226, 0.9762],
        [0.0012, 0.0228, 0.9760],
        [0.0010, 0.0198, 0.9792],
        [0.0010, 0.0202, 0.9788],
        [0.0010, 0.0204, 0.9786]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2000: tensor([[9.7523e-04, 1.6025e-02, 9.8300e-01],
        [8.9052e-04, 2.2765e-02, 9.7634e-01],
        [1.2690e-03, 2.7140e-02, 9.7159e-01],
        [1.2243e-03, 2.3326e-02, 9.7545e-01],
        [9.4272e-04, 2.3209e-02, 9.7585e-01],
        [1.0800e-03, 2.2525e-02, 9.7639e-01],
        [9.4932e-04, 2.3574e-02, 9.7548e-01],
        [1.0563e-03, 2.5596e-02, 9.7335e-01],
        [1.1760e-03, 2.6869e-02, 9.7195e-01],
        [1.3019e-03, 2.2864e-02, 9.7583e-01],
        [1.2418e-03, 2.3721e-02, 9.7504e-01],
        [9.5917e-04, 2.2257e-02, 9.7678e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #2050: tensor([[1.0701e-03, 2.6023e-02, 9.7291e-01],
        [9.8131e-04, 2.5325e-02, 9.7369e-01],
        [1.9964e-03, 2.9970e-02, 9.6803e-01],
        [1.1149e-03, 2.8537e-02, 9.7035e-01],
        [1.1952e-03, 2.6626e-02, 9.7218e-01],
        [1.3445e-03, 3.3369e-02, 9.6529e-01],
        [1.1304e-03, 3.1307e-02, 9.6756e-01],
        [9.3768e-04, 2.1862e-02, 9.7720e-01],
        [1.6029e-03, 3.6968e-02, 9.6143e-01],
        [9.9996e-04, 2.6007e-02, 9.7299e-01],
        [1.0155e-03, 2.3914e-02, 9.7507e-01],
        [1.1093e-03, 3.0649e-02, 9.6824e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #2100: tensor([[1.3161e-03, 3.8796e-02, 9.5989e-01],
        [1.2729e-03, 3.4145e-02, 9.6458e-01],
        [1.0499e-03, 2.6505e-02, 9.7244e-01],
        [1.2423e-03, 2.6747e-02, 9.7201e-01],
        [9.9397e-04, 2.6117e-02, 9.7289e-01],
        [9.7563e-04, 2.1026e-02, 9.7800e-01],
        [1.2873e-03, 2.7893e-02, 9.7082e-01],
        [1.0231e-03, 2.8588e-02, 9.7039e-01],
        [1.5284e-03, 3.4222e-02, 9.6425e-01],
        [1.1515e-03, 3.0500e-02, 9.6835e-01],
        [1.3811e-03, 3.2323e-02, 9.6630e-01],
        [1.0529e-03, 2.0058e-02, 9.7889e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #2150: tensor([[1.2694e-03, 3.4146e-02, 9.6458e-01],
        [1.2294e-03, 3.1376e-02, 9.6739e-01],
        [1.3117e-03, 3.4511e-02, 9.6418e-01],
        [1.2528e-03, 3.0621e-02, 9.6813e-01],
        [1.4376e-03, 3.3569e-02, 9.6499e-01],
        [1.3359e-03, 3.1755e-02, 9.6691e-01],
        [1.1159e-03, 2.8300e-02, 9.7058e-01],
        [1.2961e-03, 4.3503e-02, 9.5520e-01],
        [9.3221e-04, 2.8349e-02, 9.7072e-01],
        [1.0700e-03, 2.6444e-02, 9.7249e-01],
        [1.4815e-03, 4.3300e-02, 9.5522e-01],
        [9.7755e-04, 2.7326e-02, 9.7170e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #2200: tensor([[1.1108e-03, 3.2451e-02, 9.6644e-01],
        [1.1331e-03, 3.2432e-02, 9.6643e-01],
        [9.9727e-04, 2.7034e-02, 9.7197e-01],
        [1.0842e-03, 3.1567e-02, 9.6735e-01],
        [9.4246e-04, 3.2267e-02, 9.6679e-01],
        [1.3766e-03, 3.1771e-02, 9.6685e-01],
        [1.1491e-03, 3.3306e-02, 9.6555e-01],
        [1.1451e-03, 3.0321e-02, 9.6853e-01],
        [1.2913e-03, 3.5963e-02, 9.6275e-01],
        [1.1819e-03, 3.4077e-02, 9.6474e-01],
        [1.2114e-03, 3.2619e-02, 9.6617e-01],
        [1.1052e-03, 3.4007e-02, 9.6489e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #2250: tensor([[0.0010, 0.0326, 0.9664],
        [0.0012, 0.0371, 0.9617],
        [0.0012, 0.0449, 0.9539],
        [0.0015, 0.0380, 0.9605],
        [0.0013, 0.0468, 0.9519],
        [0.0012, 0.0344, 0.9645],
        [0.0013, 0.0421, 0.9566],
        [0.0014, 0.0406, 0.9580],
        [0.0012, 0.0310, 0.9678],
        [0.0011, 0.0354, 0.9635],
        [0.0010, 0.0343, 0.9647],
        [0.0015, 0.0424, 0.9561]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2300: tensor([[0.0012, 0.0371, 0.9617],
        [0.0020, 0.0534, 0.9446],
        [0.0013, 0.0481, 0.9507],
        [0.0012, 0.0387, 0.9601],
        [0.0011, 0.0408, 0.9582],
        [0.0010, 0.0392, 0.9597],
        [0.0011, 0.0450, 0.9539],
        [0.0011, 0.0436, 0.9553],
        [0.0011, 0.0452, 0.9536],
        [0.0014, 0.0414, 0.9572],
        [0.0016, 0.0599, 0.9385],
        [0.0014, 0.0438, 0.9548]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2350: tensor([[0.0015, 0.0541, 0.9444],
        [0.0013, 0.0529, 0.9459],
        [0.0011, 0.0448, 0.9541],
        [0.0020, 0.0783, 0.9197],
        [0.0011, 0.0499, 0.9490],
        [0.0012, 0.0583, 0.9405],
        [0.0013, 0.0551, 0.9436],
        [0.0016, 0.0409, 0.9575],
        [0.0015, 0.0505, 0.9480],
        [0.0012, 0.0484, 0.9503],
        [0.0013, 0.0473, 0.9515],
        [0.0015, 0.0496, 0.9490]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2400: tensor([[0.0014, 0.0688, 0.9297],
        [0.0013, 0.0657, 0.9330],
        [0.0014, 0.0614, 0.9372],
        [0.0014, 0.0590, 0.9396],
        [0.0015, 0.0623, 0.9362],
        [0.0013, 0.0681, 0.9307],
        [0.0013, 0.0576, 0.9411],
        [0.0013, 0.0677, 0.9310],
        [0.0014, 0.0669, 0.9318],
        [0.0017, 0.0806, 0.9177],
        [0.0011, 0.0579, 0.9410],
        [0.0014, 0.0673, 0.9313]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2450: tensor([[0.0015, 0.0782, 0.9203],
        [0.0012, 0.0634, 0.9354],
        [0.0014, 0.0863, 0.9123],
        [0.0012, 0.0812, 0.9176],
        [0.0014, 0.0756, 0.9230],
        [0.0014, 0.0773, 0.9213],
        [0.0019, 0.0909, 0.9072],
        [0.0018, 0.0775, 0.9208],
        [0.0014, 0.0678, 0.9308],
        [0.0013, 0.0784, 0.9202],
        [0.0014, 0.0714, 0.9272],
        [0.0014, 0.0832, 0.9154]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2500: tensor([[0.0017, 0.1055, 0.8928],
        [0.0020, 0.1108, 0.8872],
        [0.0017, 0.0794, 0.9189],
        [0.0018, 0.1024, 0.8958],
        [0.0015, 0.0964, 0.9021],
        [0.0017, 0.1061, 0.8922],
        [0.0018, 0.0998, 0.8984],
        [0.0016, 0.1006, 0.8977],
        [0.0015, 0.1048, 0.8937],
        [0.0013, 0.0901, 0.9086],
        [0.0014, 0.0869, 0.9117],
        [0.0016, 0.1088, 0.8896]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2550: tensor([[0.0019, 0.1686, 0.8296],
        [0.0015, 0.1245, 0.8740],
        [0.0021, 0.1510, 0.8469],
        [0.0018, 0.1381, 0.8601],
        [0.0015, 0.1427, 0.8558],
        [0.0016, 0.1493, 0.8491],
        [0.0016, 0.1590, 0.8393],
        [0.0021, 0.1411, 0.8568],
        [0.0017, 0.1493, 0.8490],
        [0.0014, 0.1337, 0.8649],
        [0.0019, 0.1598, 0.8383],
        [0.0016, 0.1365, 0.8619]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2600: tensor([[0.0019, 0.2205, 0.7776],
        [0.0018, 0.2711, 0.7271],
        [0.0018, 0.2633, 0.7348],
        [0.0015, 0.2381, 0.7604],
        [0.0020, 0.2599, 0.7381],
        [0.0021, 0.2297, 0.7682],
        [0.0018, 0.2318, 0.7664],
        [0.0025, 0.2791, 0.7184],
        [0.0019, 0.2015, 0.7966],
        [0.0017, 0.2038, 0.7945],
        [0.0020, 0.1972, 0.8008],
        [0.0019, 0.2537, 0.7444]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2650: tensor([[0.0022, 0.3475, 0.6504],
        [0.0018, 0.3432, 0.6550],
        [0.0021, 0.3414, 0.6565],
        [0.0020, 0.3130, 0.6850],
        [0.0019, 0.3250, 0.6731],
        [0.0021, 0.3671, 0.6307],
        [0.0019, 0.3518, 0.6463],
        [0.0020, 0.3616, 0.6364],
        [0.0028, 0.3121, 0.6851],
        [0.0018, 0.3285, 0.6696],
        [0.0021, 0.3208, 0.6771],
        [0.0026, 0.3597, 0.6376]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2700: tensor([[0.0018, 0.4836, 0.5146],
        [0.0024, 0.4985, 0.4991],
        [0.0019, 0.4026, 0.5956],
        [0.0022, 0.5179, 0.4799],
        [0.0021, 0.4410, 0.5569],
        [0.0022, 0.4709, 0.5269],
        [0.0023, 0.5037, 0.4940],
        [0.0022, 0.4984, 0.4994],
        [0.0023, 0.4535, 0.5442],
        [0.0028, 0.5196, 0.4777],
        [0.0024, 0.4938, 0.5038],
        [0.0021, 0.4784, 0.5196]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2750: tensor([[0.0020, 0.4340, 0.5640],
        [0.0019, 0.5076, 0.4905],
        [0.0019, 0.5150, 0.4831],
        [0.0018, 0.4996, 0.4985],
        [0.0020, 0.4625, 0.5355],
        [0.0025, 0.5257, 0.4717],
        [0.0018, 0.4419, 0.5563],
        [0.0017, 0.4758, 0.5225],
        [0.0022, 0.5410, 0.4568],
        [0.0022, 0.4755, 0.5223],
        [0.0017, 0.4234, 0.5749],
        [0.0021, 0.4830, 0.5150]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2800: tensor([[0.0020, 0.4868, 0.5113],
        [0.0024, 0.4764, 0.5212],
        [0.0019, 0.4866, 0.5115],
        [0.0022, 0.5522, 0.4456],
        [0.0019, 0.4289, 0.5692],
        [0.0022, 0.5423, 0.4555],
        [0.0018, 0.4650, 0.5332],
        [0.0020, 0.4684, 0.5296],
        [0.0023, 0.6030, 0.3947],
        [0.0019, 0.4731, 0.5251],
        [0.0021, 0.5135, 0.4844],
        [0.0019, 0.4986, 0.4995]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2850: tensor([[0.0019, 0.5667, 0.4314],
        [0.0018, 0.5313, 0.4668],
        [0.0019, 0.4710, 0.5271],
        [0.0022, 0.5569, 0.4409],
        [0.0026, 0.5181, 0.4794],
        [0.0017, 0.5157, 0.4825],
        [0.0021, 0.5125, 0.4854],
        [0.0021, 0.5170, 0.4809],
        [0.0022, 0.5443, 0.4535],
        [0.0022, 0.4962, 0.5016],
        [0.0022, 0.4670, 0.5308],
        [0.0021, 0.5065, 0.4913]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2900: tensor([[0.0019, 0.4377, 0.5603],
        [0.0022, 0.4908, 0.5070],
        [0.0021, 0.5141, 0.4838],
        [0.0019, 0.4648, 0.5332],
        [0.0027, 0.5086, 0.4886],
        [0.0021, 0.4824, 0.5156],
        [0.0023, 0.4760, 0.5217],
        [0.0023, 0.5464, 0.4512],
        [0.0023, 0.5907, 0.4069],
        [0.0022, 0.4831, 0.5147],
        [0.0021, 0.4899, 0.5080],
        [0.0020, 0.5138, 0.4842]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2950: tensor([[0.0021, 0.5365, 0.4615],
        [0.0022, 0.5067, 0.4910],
        [0.0022, 0.5343, 0.4635],
        [0.0019, 0.5222, 0.4759],
        [0.0028, 0.5306, 0.4666],
        [0.0024, 0.5315, 0.4660],
        [0.0018, 0.4874, 0.5108],
        [0.0020, 0.4938, 0.5042],
        [0.0019, 0.4797, 0.5184],
        [0.0023, 0.5491, 0.4486],
        [0.0019, 0.5215, 0.4766],
        [0.0020, 0.5320, 0.4660]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3000: tensor([[0.0029, 0.5229, 0.4742],
        [0.0021, 0.5438, 0.4541],
        [0.0019, 0.4358, 0.5623],
        [0.0017, 0.5647, 0.4335],
        [0.0022, 0.5334, 0.4644],
        [0.0017, 0.5590, 0.4393],
        [0.0017, 0.5595, 0.4388],
        [0.0023, 0.4568, 0.5409],
        [0.0019, 0.4718, 0.5263],
        [0.0021, 0.5665, 0.4314],
        [0.0024, 0.4224, 0.5752],
        [0.0018, 0.5102, 0.4880]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3050: tensor([[0.0017, 0.1808, 0.8175],
        [0.0017, 0.8761, 0.1223],
        [0.0016, 0.1740, 0.8244],
        [0.0020, 0.2321, 0.7659],
        [0.0024, 0.7144, 0.2833],
        [0.0019, 0.6931, 0.3050],
        [0.0016, 0.1483, 0.8501],
        [0.0018, 0.1978, 0.8004],
        [0.0019, 0.8615, 0.1366],
        [0.0018, 0.1462, 0.8520],
        [0.0024, 0.2709, 0.7268],
        [0.0023, 0.1951, 0.8025]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3100: tensor([[9.3526e-01, 6.3823e-02, 9.1526e-04],
        [9.3974e-01, 5.9269e-02, 9.9220e-04],
        [9.4096e-01, 5.7742e-02, 1.2927e-03],
        [9.4602e-01, 5.3029e-02, 9.5208e-04],
        [9.3962e-01, 5.9468e-02, 9.1229e-04],
        [9.4402e-01, 5.5146e-02, 8.3071e-04],
        [9.3707e-01, 6.1928e-02, 1.0051e-03],
        [9.3111e-01, 6.7698e-02, 1.1875e-03],
        [9.1126e-01, 8.7397e-02, 1.3393e-03],
        [8.9691e-01, 1.0204e-01, 1.0530e-03],
        [8.2505e-01, 1.7359e-01, 1.3571e-03],
        [9.2100e-01, 7.7975e-02, 1.0213e-03]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #3150: tensor([[9.6296e-01, 3.6382e-02, 6.6135e-04],
        [9.6880e-01, 3.0503e-02, 6.9714e-04],
        [9.7048e-01, 2.8897e-02, 6.2361e-04],
        [9.7402e-01, 2.5069e-02, 9.1428e-04],
        [8.8187e-01, 1.1693e-01, 1.2001e-03],
        [7.6506e-04, 1.9672e-02, 9.7956e-01],
        [9.6670e-01, 3.2567e-02, 7.3390e-04],
        [9.7024e-01, 2.9112e-02, 6.4829e-04],
        [9.7525e-01, 2.3982e-02, 7.7251e-04],
        [9.7165e-01, 2.7773e-02, 5.7677e-04],
        [9.5989e-01, 3.9284e-02, 8.2187e-04],
        [9.7161e-01, 2.7784e-02, 6.0495e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #3200: tensor([[9.6804e-01, 3.1265e-02, 6.9213e-04],
        [9.7046e-01, 2.8989e-02, 5.4659e-04],
        [9.7058e-01, 2.8829e-02, 5.9075e-04],
        [9.6921e-01, 3.0058e-02, 7.2863e-04],
        [9.6155e-01, 3.7671e-02, 7.7959e-04],
        [9.5761e-01, 4.1627e-02, 7.5953e-04],
        [9.5642e-01, 4.2982e-02, 6.0044e-04],
        [9.6174e-01, 3.7582e-02, 6.8168e-04],
        [9.5458e-01, 4.4780e-02, 6.4273e-04],
        [9.5439e-01, 4.5045e-02, 5.6498e-04],
        [9.5021e-01, 4.9075e-02, 7.1582e-04],
        [9.5316e-01, 4.6261e-02, 5.7883e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #3250: tensor([[9.6195e-01, 3.7463e-02, 5.8793e-04],
        [9.5387e-01, 4.5485e-02, 6.4082e-04],
        [9.6069e-01, 3.8677e-02, 6.3206e-04],
        [9.6063e-01, 3.8742e-02, 6.2779e-04],
        [9.6198e-01, 3.7524e-02, 4.9866e-04],
        [9.6118e-01, 3.8116e-02, 7.0157e-04],
        [9.5706e-01, 4.2282e-02, 6.6184e-04],
        [9.6274e-01, 3.6508e-02, 7.4953e-04],
        [9.4567e-01, 5.3643e-02, 6.8735e-04],
        [7.9778e-01, 2.0134e-01, 8.8284e-04],
        [1.0305e-03, 4.1856e-02, 9.5711e-01],
        [9.4812e-01, 5.1256e-02, 6.2690e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #3300: tensor([[9.2107e-01, 7.8338e-02, 5.9025e-04],
        [9.2707e-01, 7.2339e-02, 5.8687e-04],
        [9.1932e-01, 7.9966e-02, 7.1763e-04],
        [8.9898e-01, 1.0034e-01, 6.7916e-04],
        [9.0581e-01, 9.3506e-02, 6.8433e-04],
        [9.1607e-01, 8.3231e-02, 7.0018e-04],
        [9.2078e-01, 7.8483e-02, 7.3712e-04],
        [9.1530e-01, 8.3990e-02, 7.1463e-04],
        [9.1836e-01, 8.1086e-02, 5.5187e-04],
        [9.1863e-01, 8.0662e-02, 7.1143e-04],
        [8.0242e-01, 1.9645e-01, 1.1328e-03],
        [8.6639e-01, 1.3283e-01, 7.7468e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #3350: tensor([[1.1741e-03, 3.6784e-03, 9.9515e-01],
        [9.7895e-04, 2.5850e-03, 9.9644e-01],
        [3.8418e-03, 9.9113e-01, 5.0239e-03],
        [8.4145e-04, 1.2024e-02, 9.8713e-01],
        [1.6121e-03, 1.4070e-03, 9.9698e-01],
        [2.3549e-03, 1.9605e-03, 9.9568e-01],
        [2.2656e-03, 2.1280e-03, 9.9561e-01],
        [2.1669e-03, 1.7108e-03, 9.9612e-01],
        [1.5042e-03, 1.9863e-03, 9.9651e-01],
        [2.2179e-03, 1.6502e-03, 9.9613e-01],
        [2.0691e-03, 4.7052e-03, 9.9323e-01],
        [1.2572e-03, 2.0437e-03, 9.9670e-01]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Iter #3400: tensor([[0.0022, 0.0013, 0.9965],
        [0.0021, 0.0018, 0.9961],
        [0.0018, 0.0012, 0.9970],
        [0.0028, 0.0014, 0.9958],
        [0.0024, 0.0013, 0.9963],
        [0.0022, 0.0015, 0.9964],
        [0.0022, 0.0012, 0.9966],
        [0.0025, 0.0016, 0.9960],
        [0.0024, 0.0016, 0.9960],
        [0.0025, 0.0017, 0.9958],
        [0.0023, 0.0015, 0.9962],
        [0.0025, 0.0015, 0.9959]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3450: tensor([[0.0023, 0.0019, 0.9958],
        [0.0026, 0.0018, 0.9956],
        [0.0021, 0.0011, 0.9968],
        [0.0027, 0.0019, 0.9954],
        [0.0020, 0.0012, 0.9968],
        [0.0016, 0.0011, 0.9973],
        [0.0024, 0.0013, 0.9963],
        [0.0023, 0.0012, 0.9965],
        [0.0025, 0.0013, 0.9962],
        [0.0027, 0.0017, 0.9957],
        [0.0021, 0.0018, 0.9961],
        [0.0019, 0.0013, 0.9969]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3500: tensor([[0.0020, 0.0011, 0.9970],
        [0.0024, 0.0012, 0.9964],
        [0.0026, 0.0016, 0.9958],
        [0.0024, 0.0015, 0.9961],
        [0.0015, 0.0012, 0.9973],
        [0.0020, 0.0014, 0.9967],
        [0.0021, 0.0011, 0.9967],
        [0.0023, 0.0012, 0.9965],
        [0.0027, 0.0012, 0.9960],
        [0.0023, 0.0013, 0.9964],
        [0.0025, 0.0014, 0.9961],
        [0.0017, 0.0014, 0.9968]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3550: tensor([[0.0023, 0.0016, 0.9961],
        [0.0028, 0.0015, 0.9957],
        [0.0017, 0.0012, 0.9971],
        [0.0029, 0.0015, 0.9956],
        [0.0024, 0.0016, 0.9960],
        [0.0019, 0.0014, 0.9967],
        [0.0019, 0.0011, 0.9971],
        [0.0021, 0.0014, 0.9965],
        [0.0024, 0.0013, 0.9963],
        [0.0020, 0.0016, 0.9964],
        [0.0020, 0.0010, 0.9970],
        [0.0025, 0.0015, 0.9960]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3600: tensor([[0.0029, 0.0013, 0.9958],
        [0.0022, 0.0013, 0.9965],
        [0.0026, 0.0018, 0.9956],
        [0.0022, 0.0011, 0.9966],
        [0.0018, 0.0011, 0.9971],
        [0.0026, 0.0014, 0.9960],
        [0.0019, 0.0016, 0.9965],
        [0.0020, 0.0013, 0.9968],
        [0.0022, 0.0014, 0.9964],
        [0.0019, 0.0011, 0.9970],
        [0.0020, 0.0012, 0.9968],
        [0.0023, 0.0015, 0.9962]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3650: tensor([[0.0024, 0.0017, 0.9959],
        [0.0027, 0.0014, 0.9959],
        [0.0017, 0.0011, 0.9972],
        [0.0019, 0.0016, 0.9965],
        [0.0019, 0.0012, 0.9969],
        [0.0020, 0.0016, 0.9964],
        [0.0028, 0.0013, 0.9959],
        [0.0021, 0.0017, 0.9962],
        [0.0022, 0.0014, 0.9964],
        [0.0019, 0.0016, 0.9965],
        [0.0027, 0.0015, 0.9959],
        [0.0026, 0.0013, 0.9961]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3700: tensor([[0.0023, 0.0014, 0.9962],
        [0.0019, 0.0014, 0.9967],
        [0.0022, 0.0016, 0.9962],
        [0.0016, 0.0012, 0.9971],
        [0.0025, 0.0013, 0.9962],
        [0.0026, 0.0018, 0.9956],
        [0.0022, 0.0013, 0.9965],
        [0.0026, 0.0017, 0.9957],
        [0.0027, 0.0016, 0.9957],
        [0.0020, 0.0014, 0.9965],
        [0.0023, 0.0013, 0.9964],
        [0.0025, 0.0014, 0.9961]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3750: tensor([[0.0017, 0.0012, 0.9971],
        [0.0020, 0.0012, 0.9968],
        [0.0027, 0.0016, 0.9957],
        [0.0019, 0.0016, 0.9965],
        [0.0018, 0.0015, 0.9967],
        [0.0020, 0.0011, 0.9969],
        [0.0023, 0.0012, 0.9965],
        [0.0016, 0.0012, 0.9972],
        [0.0022, 0.0012, 0.9965],
        [0.0017, 0.0011, 0.9971],
        [0.0017, 0.0015, 0.9968],
        [0.0023, 0.0014, 0.9963]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3800: tensor([[0.0018, 0.0011, 0.9971],
        [0.0025, 0.0013, 0.9962],
        [0.0025, 0.0012, 0.9963],
        [0.0030, 0.0013, 0.9957],
        [0.0020, 0.0013, 0.9968],
        [0.0019, 0.0015, 0.9965],
        [0.0028, 0.0013, 0.9959],
        [0.0019, 0.0015, 0.9966],
        [0.0022, 0.0012, 0.9966],
        [0.0031, 0.0017, 0.9953],
        [0.0025, 0.0016, 0.9959],
        [0.0017, 0.0016, 0.9967]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3850: tensor([[0.0023, 0.0019, 0.9958],
        [0.0029, 0.0016, 0.9955],
        [0.0026, 0.0015, 0.9958],
        [0.0023, 0.0016, 0.9961],
        [0.0026, 0.0015, 0.9959],
        [0.0022, 0.0012, 0.9966],
        [0.0023, 0.0012, 0.9965],
        [0.0025, 0.0013, 0.9962],
        [0.0021, 0.0015, 0.9963],
        [0.0032, 0.0018, 0.9949],
        [0.0021, 0.0015, 0.9963],
        [0.0024, 0.0012, 0.9964]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3900: tensor([[0.0020, 0.0012, 0.9968],
        [0.0023, 0.0013, 0.9964],
        [0.0024, 0.0017, 0.9958],
        [0.0030, 0.0016, 0.9954],
        [0.0024, 0.0014, 0.9962],
        [0.0021, 0.0011, 0.9967],
        [0.0025, 0.0014, 0.9961],
        [0.0029, 0.0016, 0.9955],
        [0.0022, 0.0014, 0.9964],
        [0.0020, 0.0012, 0.9967],
        [0.0029, 0.0015, 0.9955],
        [0.0023, 0.0013, 0.9964]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3950: tensor([[0.0025, 0.0014, 0.9961],
        [0.0022, 0.0013, 0.9964],
        [0.0034, 0.0015, 0.9951],
        [0.0024, 0.0017, 0.9959],
        [0.0028, 0.0013, 0.9959],
        [0.0033, 0.0017, 0.9950],
        [0.0027, 0.0016, 0.9957],
        [0.0023, 0.0012, 0.9965],
        [0.0020, 0.0016, 0.9965],
        [0.0023, 0.0015, 0.9962],
        [0.0021, 0.0013, 0.9966],
        [0.0032, 0.0016, 0.9952]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4000: tensor([[0.0026, 0.0013, 0.9961],
        [0.0022, 0.0012, 0.9967],
        [0.0017, 0.0013, 0.9971],
        [0.0025, 0.0017, 0.9958],
        [0.0024, 0.0016, 0.9960],
        [0.0022, 0.0015, 0.9964],
        [0.0021, 0.0013, 0.9966],
        [0.0025, 0.0018, 0.9957],
        [0.0023, 0.0013, 0.9964],
        [0.0018, 0.0014, 0.9968],
        [0.0026, 0.0015, 0.9959],
        [0.0022, 0.0012, 0.9966]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4050: tensor([[0.0020, 0.0012, 0.9968],
        [0.0021, 0.0015, 0.9964],
        [0.0026, 0.0015, 0.9958],
        [0.0027, 0.0014, 0.9959],
        [0.0019, 0.0013, 0.9968],
        [0.0022, 0.0014, 0.9964],
        [0.0032, 0.0016, 0.9952],
        [0.0020, 0.0016, 0.9964],
        [0.0025, 0.0016, 0.9958],
        [0.0028, 0.0013, 0.9959],
        [0.0022, 0.0013, 0.9965],
        [0.0020, 0.0014, 0.9966]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4100: tensor([[0.0025, 0.0014, 0.9961],
        [0.0025, 0.0016, 0.9959],
        [0.0022, 0.0013, 0.9965],
        [0.0045, 0.0017, 0.9938],
        [0.0020, 0.0011, 0.9968],
        [0.0027, 0.0014, 0.9959],
        [0.0024, 0.0013, 0.9963],
        [0.0020, 0.0020, 0.9960],
        [0.0023, 0.0015, 0.9963],
        [0.0023, 0.0013, 0.9964],
        [0.0022, 0.0013, 0.9965],
        [0.0022, 0.0012, 0.9966]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4150: tensor([[0.0024, 0.0020, 0.9955],
        [0.0019, 0.0012, 0.9969],
        [0.0021, 0.0014, 0.9965],
        [0.0032, 0.0014, 0.9953],
        [0.0020, 0.0010, 0.9970],
        [0.0026, 0.0016, 0.9958],
        [0.0031, 0.0017, 0.9952],
        [0.0023, 0.0014, 0.9963],
        [0.0025, 0.0012, 0.9963],
        [0.0026, 0.0014, 0.9961],
        [0.0032, 0.0018, 0.9950],
        [0.0025, 0.0014, 0.9961]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4200: tensor([[0.0020, 0.0016, 0.9964],
        [0.0022, 0.0014, 0.9964],
        [0.0032, 0.0017, 0.9951],
        [0.0024, 0.0012, 0.9964],
        [0.0023, 0.0012, 0.9965],
        [0.0033, 0.0020, 0.9947],
        [0.0026, 0.0012, 0.9962],
        [0.0021, 0.0012, 0.9967],
        [0.0027, 0.0017, 0.9956],
        [0.0018, 0.0013, 0.9969],
        [0.0026, 0.0017, 0.9957],
        [0.0018, 0.0012, 0.9969]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4250: tensor([[0.0023, 0.0014, 0.9964],
        [0.0021, 0.0013, 0.9966],
        [0.0022, 0.0015, 0.9962],
        [0.0026, 0.0016, 0.9958],
        [0.0024, 0.0016, 0.9960],
        [0.0027, 0.0012, 0.9961],
        [0.0021, 0.0011, 0.9968],
        [0.0027, 0.0013, 0.9960],
        [0.0034, 0.0015, 0.9951],
        [0.0037, 0.0018, 0.9946],
        [0.0037, 0.0019, 0.9943],
        [0.0023, 0.0016, 0.9961]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4300: tensor([[0.0029, 0.0018, 0.9953],
        [0.0033, 0.0016, 0.9951],
        [0.0029, 0.0018, 0.9953],
        [0.0024, 0.0015, 0.9962],
        [0.0028, 0.0013, 0.9959],
        [0.0030, 0.0014, 0.9956],
        [0.0022, 0.0014, 0.9963],
        [0.0034, 0.0016, 0.9950],
        [0.0030, 0.0012, 0.9958],
        [0.0036, 0.0013, 0.9950],
        [0.0027, 0.0014, 0.9959],
        [0.0032, 0.0013, 0.9955]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4350: tensor([[0.0034, 0.0019, 0.9947],
        [0.0019, 0.0016, 0.9965],
        [0.0019, 0.0014, 0.9966],
        [0.0020, 0.0015, 0.9966],
        [0.0023, 0.0019, 0.9958],
        [0.0036, 0.0016, 0.9948],
        [0.0025, 0.0015, 0.9960],
        [0.0025, 0.0011, 0.9965],
        [0.0023, 0.0019, 0.9958],
        [0.0027, 0.0015, 0.9958],
        [0.0027, 0.0012, 0.9961],
        [0.0028, 0.0015, 0.9957]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4400: tensor([[0.0023, 0.0014, 0.9962],
        [0.0022, 0.0014, 0.9965],
        [0.0029, 0.0012, 0.9959],
        [0.0026, 0.0017, 0.9957],
        [0.0020, 0.0013, 0.9967],
        [0.0022, 0.0014, 0.9964],
        [0.0035, 0.0019, 0.9946],
        [0.0021, 0.0016, 0.9964],
        [0.0023, 0.0016, 0.9962],
        [0.0029, 0.0013, 0.9959],
        [0.0033, 0.0017, 0.9950],
        [0.0028, 0.0013, 0.9959]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4450: tensor([[0.0023, 0.0014, 0.9963],
        [0.0025, 0.0014, 0.9961],
        [0.0030, 0.0021, 0.9950],
        [0.0025, 0.0013, 0.9962],
        [0.0048, 0.0016, 0.9936],
        [0.0032, 0.0016, 0.9951],
        [0.0027, 0.0015, 0.9959],
        [0.0036, 0.0018, 0.9946],
        [0.0027, 0.0012, 0.9962],
        [0.0026, 0.0015, 0.9960],
        [0.0020, 0.0012, 0.9968],
        [0.0038, 0.0024, 0.9938]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4500: tensor([[0.0029, 0.0016, 0.9955],
        [0.0031, 0.0017, 0.9952],
        [0.0029, 0.0011, 0.9961],
        [0.0028, 0.0014, 0.9957],
        [0.0031, 0.0014, 0.9955],
        [0.0035, 0.0021, 0.9945],
        [0.0024, 0.0011, 0.9965],
        [0.0022, 0.0012, 0.9966],
        [0.0028, 0.0015, 0.9957],
        [0.0027, 0.0013, 0.9960],
        [0.0029, 0.0013, 0.9959],
        [0.0021, 0.0012, 0.9967]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4550: tensor([[0.0028, 0.0014, 0.9959],
        [0.0022, 0.0012, 0.9966],
        [0.0023, 0.0014, 0.9963],
        [0.0035, 0.0013, 0.9952],
        [0.0031, 0.0016, 0.9952],
        [0.0027, 0.0016, 0.9956],
        [0.0023, 0.0013, 0.9964],
        [0.0022, 0.0020, 0.9958],
        [0.0032, 0.0014, 0.9954],
        [0.0036, 0.0019, 0.9945],
        [0.0032, 0.0013, 0.9955],
        [0.0027, 0.0018, 0.9955]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4600: tensor([[0.0031, 0.0014, 0.9955],
        [0.0028, 0.0015, 0.9957],
        [0.0031, 0.0016, 0.9953],
        [0.0034, 0.0016, 0.9950],
        [0.0027, 0.0014, 0.9960],
        [0.0025, 0.0013, 0.9962],
        [0.0032, 0.0014, 0.9955],
        [0.0024, 0.0011, 0.9964],
        [0.0031, 0.0016, 0.9953],
        [0.0037, 0.0019, 0.9944],
        [0.0023, 0.0014, 0.9963],
        [0.0035, 0.0015, 0.9950]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4650: tensor([[0.0032, 0.0018, 0.9950],
        [0.0066, 0.0026, 0.9908],
        [0.0027, 0.0014, 0.9958],
        [0.0032, 0.0014, 0.9954],
        [0.0024, 0.0013, 0.9963],
        [0.0025, 0.0014, 0.9960],
        [0.0038, 0.0015, 0.9947],
        [0.0024, 0.0014, 0.9963],
        [0.0029, 0.0017, 0.9954],
        [0.0040, 0.0019, 0.9941],
        [0.0027, 0.0015, 0.9958],
        [0.0038, 0.0018, 0.9944]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4700: tensor([[0.0041, 0.0021, 0.9938],
        [0.0024, 0.0015, 0.9961],
        [0.0030, 0.0022, 0.9948],
        [0.0030, 0.0016, 0.9954],
        [0.0031, 0.0013, 0.9956],
        [0.0024, 0.0016, 0.9959],
        [0.0038, 0.0017, 0.9946],
        [0.0030, 0.0018, 0.9952],
        [0.0039, 0.0016, 0.9945],
        [0.0022, 0.0015, 0.9963],
        [0.0023, 0.0012, 0.9965],
        [0.0029, 0.0015, 0.9956]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4750: tensor([[0.0044, 0.0017, 0.9940],
        [0.0033, 0.0013, 0.9953],
        [0.0033, 0.0012, 0.9955],
        [0.0032, 0.0015, 0.9953],
        [0.0037, 0.0015, 0.9948],
        [0.0032, 0.0019, 0.9948],
        [0.0025, 0.0018, 0.9956],
        [0.0040, 0.0015, 0.9945],
        [0.0024, 0.0015, 0.9961],
        [0.0034, 0.0014, 0.9952],
        [0.0024, 0.0012, 0.9964],
        [0.0028, 0.0016, 0.9956]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4800: tensor([[0.0035, 0.0012, 0.9952],
        [0.0044, 0.0015, 0.9941],
        [0.0021, 0.0016, 0.9963],
        [0.0036, 0.0014, 0.9949],
        [0.0029, 0.0013, 0.9958],
        [0.0022, 0.0015, 0.9963],
        [0.0026, 0.0014, 0.9960],
        [0.0026, 0.0014, 0.9960],
        [0.0037, 0.0018, 0.9945],
        [0.0031, 0.0016, 0.9953],
        [0.0038, 0.0018, 0.9944],
        [0.0031, 0.0015, 0.9953]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4850: tensor([[0.0043, 0.0016, 0.9941],
        [0.0028, 0.0014, 0.9958],
        [0.0031, 0.0015, 0.9954],
        [0.0020, 0.0014, 0.9966],
        [0.0030, 0.0022, 0.9948],
        [0.0023, 0.0014, 0.9963],
        [0.0023, 0.0012, 0.9965],
        [0.0035, 0.0018, 0.9947],
        [0.0028, 0.0016, 0.9956],
        [0.0032, 0.0013, 0.9956],
        [0.0031, 0.0012, 0.9957],
        [0.0030, 0.0016, 0.9954]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4900: tensor([[0.0027, 0.0012, 0.9962],
        [0.0032, 0.0015, 0.9954],
        [0.0028, 0.0019, 0.9953],
        [0.0027, 0.0015, 0.9958],
        [0.0032, 0.0014, 0.9954],
        [0.0030, 0.0014, 0.9956],
        [0.0037, 0.0013, 0.9950],
        [0.0031, 0.0015, 0.9954],
        [0.0032, 0.0015, 0.9953],
        [0.0029, 0.0011, 0.9960],
        [0.0034, 0.0017, 0.9949],
        [0.0035, 0.0016, 0.9948]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #4950: tensor([[0.0042, 0.0018, 0.9940],
        [0.0034, 0.0013, 0.9953],
        [0.0043, 0.0015, 0.9942],
        [0.0028, 0.0013, 0.9959],
        [0.0031, 0.0015, 0.9954],
        [0.0033, 0.0015, 0.9951],
        [0.0028, 0.0017, 0.9955],
        [0.0028, 0.0014, 0.9958],
        [0.0032, 0.0016, 0.9952],
        [0.0039, 0.0016, 0.9945],
        [0.0027, 0.0017, 0.9956],
        [0.0030, 0.0016, 0.9954]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5000: tensor([[0.0033, 0.0018, 0.9949],
        [0.0025, 0.0013, 0.9961],
        [0.0037, 0.0014, 0.9949],
        [0.0039, 0.0016, 0.9944],
        [0.0032, 0.0014, 0.9954],
        [0.0034, 0.0020, 0.9946],
        [0.0036, 0.0014, 0.9950],
        [0.0031, 0.0017, 0.9952],
        [0.0038, 0.0016, 0.9946],
        [0.0040, 0.0015, 0.9946],
        [0.0027, 0.0015, 0.9958],
        [0.0040, 0.0015, 0.9945]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5050: tensor([[0.0029, 0.0015, 0.9957],
        [0.0037, 0.0017, 0.9946],
        [0.0025, 0.0013, 0.9962],
        [0.0032, 0.0015, 0.9952],
        [0.0032, 0.0015, 0.9953],
        [0.0038, 0.0014, 0.9948],
        [0.0041, 0.0015, 0.9944],
        [0.0050, 0.0022, 0.9927],
        [0.0028, 0.0015, 0.9958],
        [0.0041, 0.0016, 0.9944],
        [0.0038, 0.0015, 0.9947],
        [0.0037, 0.0017, 0.9946]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5100: tensor([[0.0034, 0.0018, 0.9949],
        [0.0032, 0.0014, 0.9954],
        [0.0036, 0.0013, 0.9951],
        [0.0034, 0.0016, 0.9950],
        [0.0060, 0.0020, 0.9919],
        [0.0044, 0.0016, 0.9940],
        [0.0034, 0.0019, 0.9946],
        [0.0033, 0.0013, 0.9954],
        [0.0040, 0.0019, 0.9941],
        [0.0047, 0.0018, 0.9935],
        [0.0033, 0.0015, 0.9952],
        [0.0031, 0.0015, 0.9954]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5150: tensor([[0.0032, 0.0015, 0.9953],
        [0.0041, 0.0014, 0.9945],
        [0.0038, 0.0015, 0.9947],
        [0.0048, 0.0020, 0.9932],
        [0.0033, 0.0015, 0.9951],
        [0.0037, 0.0013, 0.9949],
        [0.0050, 0.0022, 0.9928],
        [0.0055, 0.0017, 0.9927],
        [0.0046, 0.0017, 0.9937],
        [0.0041, 0.0019, 0.9941],
        [0.0033, 0.0014, 0.9953],
        [0.0037, 0.0019, 0.9945]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5200: tensor([[0.0040, 0.0016, 0.9944],
        [0.0035, 0.0016, 0.9949],
        [0.0036, 0.0014, 0.9951],
        [0.0069, 0.0021, 0.9909],
        [0.0042, 0.0013, 0.9944],
        [0.0035, 0.0017, 0.9948],
        [0.0039, 0.0016, 0.9945],
        [0.0034, 0.0014, 0.9951],
        [0.0029, 0.0013, 0.9958],
        [0.0038, 0.0018, 0.9944],
        [0.0041, 0.0013, 0.9946],
        [0.0036, 0.0015, 0.9949]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5250: tensor([[0.0037, 0.0014, 0.9950],
        [0.0032, 0.0017, 0.9950],
        [0.0040, 0.0015, 0.9944],
        [0.0032, 0.0014, 0.9954],
        [0.0038, 0.0013, 0.9949],
        [0.0033, 0.0019, 0.9948],
        [0.0031, 0.0014, 0.9955],
        [0.0041, 0.0018, 0.9941],
        [0.0036, 0.0015, 0.9949],
        [0.0039, 0.0020, 0.9941],
        [0.0045, 0.0014, 0.9940],
        [0.0039, 0.0015, 0.9946]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #5300: tensor([[0.0043, 0.0014, 0.9943],
        [0.0038, 0.0015, 0.9948],
        [0.0052, 0.0016, 0.9932],
        [0.0031, 0.0019, 0.9950],
        [0.0039, 0.0014, 0.9947],
        [0.0042, 0.0014, 0.9945],
        [0.0040, 0.0019, 0.9941],
        [0.0045, 0.0016, 0.9939],
        [0.0032, 0.0017, 0.9951],
        [0.0037, 0.0015, 0.9948],
        [0.0050, 0.0016, 0.9934],
        [0.0031, 0.0016, 0.9953]], device='cuda:0', grad_fn=<SoftmaxBackward>)
