Iter #50: tensor([[0.1684, 0.3484, 0.4832],
        [0.1713, 0.4224, 0.4063],
        [0.1317, 0.3999, 0.4684],
        [0.1399, 0.3447, 0.5154],
        [0.1130, 0.3769, 0.5101],
        [0.1560, 0.4574, 0.3866],
        [0.1807, 0.3684, 0.4509],
        [0.1550, 0.5121, 0.3329],
        [0.1404, 0.4337, 0.4259],
        [0.1431, 0.3836, 0.4733],
        [0.1285, 0.3920, 0.4795],
        [0.1050, 0.4012, 0.4938]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1580, 0.4163, 0.4257],
        [0.1354, 0.3992, 0.4655],
        [0.1484, 0.4127, 0.4389],
        [0.1327, 0.3022, 0.5651],
        [0.1342, 0.4081, 0.4578],
        [0.1441, 0.3095, 0.5464],
        [0.1478, 0.4320, 0.4202],
        [0.1763, 0.4360, 0.3877],
        [0.1515, 0.4358, 0.4127],
        [0.1380, 0.3347, 0.5274],
        [0.1567, 0.4006, 0.4427],
        [0.1497, 0.4362, 0.4141]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1082, 0.2864, 0.6054],
        [0.1172, 0.3697, 0.5130],
        [0.1128, 0.3194, 0.5678],
        [0.1011, 0.2775, 0.6213],
        [0.1104, 0.3776, 0.5119],
        [0.1133, 0.2805, 0.6062],
        [0.1381, 0.3836, 0.4783],
        [0.1284, 0.3167, 0.5549],
        [0.1159, 0.3391, 0.5450],
        [0.1364, 0.3659, 0.4977],
        [0.1447, 0.3836, 0.4717],
        [0.1285, 0.4475, 0.4240]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1393, 0.6572, 0.2034],
        [0.1864, 0.4810, 0.3326],
        [0.1454, 0.6179, 0.2367],
        [0.1517, 0.4231, 0.4252],
        [0.1352, 0.3386, 0.5262],
        [0.1299, 0.3073, 0.5628],
        [0.1434, 0.3277, 0.5289],
        [0.1311, 0.3337, 0.5351],
        [0.1232, 0.3433, 0.5335],
        [0.0991, 0.1768, 0.7241],
        [0.1170, 0.2538, 0.6293],
        [0.1188, 0.2729, 0.6083]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1151, 0.4782, 0.4067],
        [0.1854, 0.5090, 0.3055],
        [0.1069, 0.2145, 0.6787],
        [0.1244, 0.2412, 0.6343],
        [0.1242, 0.2229, 0.6529],
        [0.1462, 0.6529, 0.2008],
        [0.1444, 0.6985, 0.1571],
        [0.1661, 0.6128, 0.2212],
        [0.1472, 0.2717, 0.5810],
        [0.1535, 0.3138, 0.5326],
        [0.1351, 0.2899, 0.5750],
        [0.1274, 0.2050, 0.6676]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.1828, 0.3491, 0.4681],
        [0.1554, 0.2805, 0.5641],
        [0.1199, 0.3584, 0.5217],
        [0.1375, 0.3249, 0.5375],
        [0.1435, 0.3907, 0.4659],
        [0.1566, 0.3190, 0.5243],
        [0.1222, 0.4000, 0.4778],
        [0.1548, 0.3621, 0.4831],
        [0.1712, 0.3966, 0.4322],
        [0.1459, 0.2955, 0.5587],
        [0.1176, 0.2236, 0.6588],
        [0.1481, 0.2101, 0.6418]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.1625, 0.5518, 0.2857],
        [0.1706, 0.4489, 0.3806],
        [0.1550, 0.3227, 0.5222],
        [0.1320, 0.2305, 0.6375],
        [0.1732, 0.3818, 0.4450],
        [0.1437, 0.7553, 0.1010],
        [0.1607, 0.7326, 0.1067],
        [0.1974, 0.6704, 0.1322],
        [0.1693, 0.5153, 0.3154],
        [0.1920, 0.4987, 0.3093],
        [0.1753, 0.3442, 0.4804],
        [0.1647, 0.6829, 0.1524]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.1512, 0.2771, 0.5718],
        [0.1577, 0.2204, 0.6218],
        [0.1854, 0.4340, 0.3805],
        [0.1687, 0.2963, 0.5350],
        [0.1769, 0.5677, 0.2554],
        [0.2245, 0.4471, 0.3284],
        [0.2222, 0.4212, 0.3566],
        [0.2062, 0.4811, 0.3127],
        [0.1875, 0.3833, 0.4292],
        [0.1469, 0.3597, 0.4933],
        [0.1672, 0.4260, 0.4068],
        [0.1661, 0.3405, 0.4934]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.1586, 0.2927, 0.5487],
        [0.1712, 0.3197, 0.5091],
        [0.2062, 0.3052, 0.4886],
        [0.1664, 0.3161, 0.5175],
        [0.1698, 0.3042, 0.5259],
        [0.1673, 0.2349, 0.5977],
        [0.1640, 0.2377, 0.5983],
        [0.1585, 0.3165, 0.5250],
        [0.1898, 0.3568, 0.4534],
        [0.2108, 0.3757, 0.4135],
        [0.1950, 0.5062, 0.2988],
        [0.2449, 0.3538, 0.4013]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.2048, 0.4098, 0.3854],
        [0.1746, 0.5407, 0.2848],
        [0.1690, 0.5365, 0.2944],
        [0.2059, 0.4351, 0.3590],
        [0.1846, 0.4202, 0.3952],
        [0.1850, 0.2339, 0.5811],
        [0.1644, 0.3097, 0.5260],
        [0.1637, 0.2332, 0.6031],
        [0.1857, 0.4546, 0.3597],
        [0.1843, 0.4200, 0.3956],
        [0.1589, 0.5312, 0.3099],
        [0.1957, 0.3539, 0.4503]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1641, 0.6951, 0.1407],
        [0.1832, 0.5084, 0.3084],
        [0.1909, 0.3942, 0.4149],
        [0.2132, 0.4196, 0.3672],
        [0.1876, 0.4174, 0.3950],
        [0.1678, 0.1803, 0.6518],
        [0.1792, 0.2175, 0.6033],
        [0.2155, 0.3678, 0.4167],
        [0.1591, 0.2739, 0.5670],
        [0.2040, 0.2657, 0.5304],
        [0.1959, 0.3653, 0.4388],
        [0.2605, 0.3741, 0.3654]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #600: tensor([[0.2179, 0.3472, 0.4348],
        [0.2063, 0.4240, 0.3698],
        [0.2397, 0.3547, 0.4055],
        [0.1988, 0.3648, 0.4363],
        [0.2205, 0.2036, 0.5759],
        [0.2026, 0.4221, 0.3753],
        [0.2404, 0.3742, 0.3854],
        [0.2320, 0.3836, 0.3844],
        [0.2395, 0.4606, 0.2998],
        [0.1909, 0.3258, 0.4834],
        [0.2099, 0.4503, 0.3398],
        [0.2180, 0.4178, 0.3642]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #650: tensor([[0.2432, 0.3600, 0.3968],
        [0.2142, 0.3736, 0.4122],
        [0.2613, 0.3627, 0.3760],
        [0.2166, 0.3702, 0.4131],
        [0.2442, 0.5467, 0.2091],
        [0.2248, 0.5169, 0.2583],
        [0.2292, 0.4828, 0.2879],
        [0.2558, 0.4582, 0.2860],
        [0.2601, 0.4721, 0.2678],
        [0.2686, 0.3845, 0.3469],
        [0.2253, 0.5160, 0.2587],
        [0.2317, 0.3257, 0.4427]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #700: tensor([[0.2228, 0.3906, 0.3866],
        [0.2919, 0.3759, 0.3322],
        [0.2104, 0.2795, 0.5101],
        [0.2603, 0.3353, 0.4044],
        [0.2490, 0.2777, 0.4734],
        [0.2543, 0.2509, 0.4949],
        [0.2385, 0.3407, 0.4208],
        [0.2096, 0.4101, 0.3803],
        [0.2138, 0.6163, 0.1699],
        [0.2194, 0.4763, 0.3043],
        [0.2077, 0.3834, 0.4089],
        [0.2093, 0.6196, 0.1711]], device='cuda:0', grad_fn=<SoftmaxBackward>)
