Iter #50: tensor([[0.3950, 0.3999, 0.2051],
        [0.3589, 0.4364, 0.2048],
        [0.5008, 0.3336, 0.1657],
        [0.3664, 0.4072, 0.2264],
        [0.4145, 0.3797, 0.2057],
        [0.3247, 0.4525, 0.2228],
        [0.4099, 0.3400, 0.2501],
        [0.4633, 0.3573, 0.1793],
        [0.4084, 0.3773, 0.2143],
        [0.4918, 0.3144, 0.1938],
        [0.4463, 0.3638, 0.1899],
        [0.4523, 0.3630, 0.1847]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4251, 0.4295, 0.1454],
        [0.4834, 0.3978, 0.1188],
        [0.5152, 0.3482, 0.1367],
        [0.4711, 0.3655, 0.1633],
        [0.4214, 0.4203, 0.1583],
        [0.4358, 0.4244, 0.1398],
        [0.5604, 0.2981, 0.1415],
        [0.4971, 0.3739, 0.1290],
        [0.4978, 0.3402, 0.1620],
        [0.4683, 0.4050, 0.1266],
        [0.4922, 0.3606, 0.1472],
        [0.4860, 0.3591, 0.1549]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3453, 0.5316, 0.1231],
        [0.3521, 0.5003, 0.1477],
        [0.3607, 0.5054, 0.1338],
        [0.4614, 0.4115, 0.1271],
        [0.3187, 0.5449, 0.1363],
        [0.3224, 0.5360, 0.1416],
        [0.4185, 0.4477, 0.1338],
        [0.4131, 0.4626, 0.1244],
        [0.3798, 0.4849, 0.1353],
        [0.4390, 0.4331, 0.1279],
        [0.5653, 0.3068, 0.1280],
        [0.4314, 0.4269, 0.1417]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4952, 0.3591, 0.1457],
        [0.4073, 0.4233, 0.1694],
        [0.3388, 0.5007, 0.1605],
        [0.3237, 0.5184, 0.1579],
        [0.3329, 0.5494, 0.1178],
        [0.3018, 0.5695, 0.1287],
        [0.2048, 0.6266, 0.1686],
        [0.3123, 0.5510, 0.1367],
        [0.2504, 0.6105, 0.1390],
        [0.3621, 0.4843, 0.1537],
        [0.2877, 0.5636, 0.1487],
        [0.5442, 0.3192, 0.1365]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4215, 0.4454, 0.1331],
        [0.3014, 0.5572, 0.1414],
        [0.4413, 0.4088, 0.1499],
        [0.2967, 0.5196, 0.1836],
        [0.3487, 0.5174, 0.1339],
        [0.3455, 0.4947, 0.1598],
        [0.3968, 0.4400, 0.1632],
        [0.2792, 0.5770, 0.1437],
        [0.4087, 0.4709, 0.1204],
        [0.2649, 0.6010, 0.1341],
        [0.2729, 0.6015, 0.1256],
        [0.3299, 0.5268, 0.1433]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5247, 0.3308, 0.1445],
        [0.3413, 0.4509, 0.2078],
        [0.4575, 0.3796, 0.1629],
        [0.4637, 0.3952, 0.1412],
        [0.2743, 0.5816, 0.1441],
        [0.5644, 0.2944, 0.1412],
        [0.3699, 0.5053, 0.1247],
        [0.4581, 0.3849, 0.1570],
        [0.4451, 0.4245, 0.1303],
        [0.4089, 0.4502, 0.1409],
        [0.4412, 0.4193, 0.1395],
        [0.4321, 0.4039, 0.1640]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3232, 0.5292, 0.1475],
        [0.3985, 0.4600, 0.1415],
        [0.4042, 0.4463, 0.1495],
        [0.3285, 0.5344, 0.1372],
        [0.3255, 0.5558, 0.1186],
        [0.4396, 0.4177, 0.1428],
        [0.3517, 0.5085, 0.1398],
        [0.4855, 0.3800, 0.1345],
        [0.2134, 0.6623, 0.1243],
        [0.3078, 0.5539, 0.1383],
        [0.2315, 0.6361, 0.1324],
        [0.2736, 0.5819, 0.1444]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5553, 0.2621, 0.1826],
        [0.5743, 0.2886, 0.1370],
        [0.3358, 0.4715, 0.1927],
        [0.4009, 0.4568, 0.1423],
        [0.3488, 0.5091, 0.1421],
        [0.6758, 0.1817, 0.1424],
        [0.5951, 0.2685, 0.1364],
        [0.5717, 0.2717, 0.1566],
        [0.4954, 0.3414, 0.1632],
        [0.4811, 0.3591, 0.1598],
        [0.4457, 0.3736, 0.1808],
        [0.6521, 0.2139, 0.1340]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.2942, 0.5351, 0.1707],
        [0.3227, 0.5069, 0.1704],
        [0.4225, 0.4321, 0.1455],
        [0.6461, 0.2108, 0.1430],
        [0.6382, 0.2052, 0.1566],
        [0.7070, 0.1638, 0.1292],
        [0.3025, 0.5372, 0.1603],
        [0.2952, 0.5386, 0.1662],
        [0.2473, 0.6076, 0.1451],
        [0.4063, 0.3968, 0.1968],
        [0.4773, 0.3088, 0.2139],
        [0.3679, 0.4732, 0.1589]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4211, 0.4153, 0.1636],
        [0.2465, 0.6055, 0.1480],
        [0.5639, 0.2737, 0.1624],
        [0.3423, 0.4864, 0.1713],
        [0.3555, 0.4651, 0.1794],
        [0.2681, 0.5716, 0.1603],
        [0.3831, 0.4129, 0.2040],
        [0.2276, 0.5865, 0.1859],
        [0.3065, 0.5289, 0.1646],
        [0.5163, 0.3057, 0.1780],
        [0.3019, 0.5575, 0.1406],
        [0.2827, 0.5444, 0.1729]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.6665, 0.1597, 0.1738],
        [0.3886, 0.4144, 0.1970],
        [0.3049, 0.5197, 0.1754],
        [0.3836, 0.3938, 0.2225],
        [0.4185, 0.3576, 0.2239],
        [0.1998, 0.6001, 0.2001],
        [0.2989, 0.5124, 0.1887],
        [0.4753, 0.3092, 0.2155],
        [0.4067, 0.4032, 0.1901],
        [0.2933, 0.5181, 0.1886],
        [0.5528, 0.2332, 0.2140],
        [0.5187, 0.3013, 0.1800]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #600: tensor([[0.4897, 0.2366, 0.2737],
        [0.4990, 0.2821, 0.2189],
        [0.5475, 0.2534, 0.1991],
        [0.4841, 0.3391, 0.1768],
        [0.3421, 0.4440, 0.2139],
        [0.4215, 0.3420, 0.2364],
        [0.5284, 0.2592, 0.2124],
        [0.3648, 0.4576, 0.1776],
        [0.5982, 0.2099, 0.1919],
        [0.4882, 0.3054, 0.2064],
        [0.5102, 0.2811, 0.2087],
        [0.4202, 0.3248, 0.2550]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #650: tensor([[0.5414, 0.2191, 0.2395],
        [0.4032, 0.3563, 0.2405],
        [0.3126, 0.4582, 0.2292],
        [0.4562, 0.3042, 0.2396],
        [0.3698, 0.4047, 0.2255],
        [0.3141, 0.4103, 0.2756],
        [0.3053, 0.4526, 0.2421],
        [0.2968, 0.4917, 0.2115],
        [0.2393, 0.5514, 0.2093],
        [0.3322, 0.4397, 0.2281],
        [0.4410, 0.3220, 0.2370],
        [0.3263, 0.4623, 0.2114]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #700: tensor([[0.4615, 0.2791, 0.2594],
        [0.5099, 0.2451, 0.2450],
        [0.4469, 0.3140, 0.2391],
        [0.5381, 0.2267, 0.2353],
        [0.4803, 0.2463, 0.2734],
        [0.4224, 0.3283, 0.2493],
        [0.2674, 0.5354, 0.1973],
        [0.3161, 0.4492, 0.2347],
        [0.3660, 0.3836, 0.2504],
        [0.4590, 0.2600, 0.2810],
        [0.4511, 0.2765, 0.2723],
        [0.3184, 0.3931, 0.2885]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #750: tensor([[0.4365, 0.3429, 0.2206],
        [0.4662, 0.2851, 0.2487],
        [0.3955, 0.3088, 0.2957],
        [0.4855, 0.2424, 0.2721],
        [0.2885, 0.4730, 0.2386],
        [0.3007, 0.4577, 0.2415],
        [0.3923, 0.3514, 0.2563],
        [0.2782, 0.4190, 0.3028],
        [0.4646, 0.3016, 0.2338],
        [0.2684, 0.4924, 0.2391],
        [0.3478, 0.3606, 0.2917],
        [0.2280, 0.5575, 0.2145]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #800: tensor([[0.4061, 0.3523, 0.2417],
        [0.3906, 0.3477, 0.2617],
        [0.2971, 0.4284, 0.2745],
        [0.4912, 0.2641, 0.2447],
        [0.3874, 0.3194, 0.2931],
        [0.3135, 0.3868, 0.2997],
        [0.2357, 0.4749, 0.2894],
        [0.2698, 0.4319, 0.2983],
        [0.2807, 0.4886, 0.2307],
        [0.3501, 0.3602, 0.2896],
        [0.3264, 0.4117, 0.2619],
        [0.4091, 0.3080, 0.2830]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #850: tensor([[0.5107, 0.1962, 0.2931],
        [0.5166, 0.2110, 0.2723],
        [0.5575, 0.1804, 0.2621],
        [0.2614, 0.4978, 0.2408],
        [0.2762, 0.4586, 0.2652],
        [0.3571, 0.3442, 0.2986],
        [0.3485, 0.3659, 0.2856],
        [0.1648, 0.5842, 0.2510],
        [0.3095, 0.4131, 0.2773],
        [0.3461, 0.3755, 0.2783],
        [0.3713, 0.3567, 0.2720],
        [0.2448, 0.5015, 0.2537]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #900: tensor([[0.3868, 0.3001, 0.3130],
        [0.2361, 0.4556, 0.3083],
        [0.2637, 0.4935, 0.2428],
        [0.1835, 0.5416, 0.2749],
        [0.3328, 0.3890, 0.2782],
        [0.1887, 0.5742, 0.2372],
        [0.3490, 0.3754, 0.2755],
        [0.7126, 0.0824, 0.2050],
        [0.6441, 0.1122, 0.2437],
        [0.7124, 0.0789, 0.2086],
        [0.3430, 0.3848, 0.2721],
        [0.2932, 0.3664, 0.3404]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #950: tensor([[0.2128, 0.5212, 0.2659],
        [0.2096, 0.5546, 0.2359],
        [0.1931, 0.4888, 0.3181],
        [0.2574, 0.4593, 0.2833],
        [0.1730, 0.5858, 0.2412],
        [0.1880, 0.5868, 0.2252],
        [0.4707, 0.2226, 0.3067],
        [0.4603, 0.2316, 0.3081],
        [0.5331, 0.1917, 0.2752],
        [0.1667, 0.5844, 0.2488],
        [0.2183, 0.4777, 0.3040],
        [0.2567, 0.4139, 0.3295]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1000: tensor([[0.4187, 0.2496, 0.3317],
        [0.4977, 0.1589, 0.3434],
        [0.4219, 0.2236, 0.3545],
        [0.5008, 0.1797, 0.3195],
        [0.3992, 0.2230, 0.3778],
        [0.4075, 0.2558, 0.3368],
        [0.5234, 0.1785, 0.2981],
        [0.3838, 0.2568, 0.3594],
        [0.4997, 0.2176, 0.2826],
        [0.3583, 0.2990, 0.3427],
        [0.5354, 0.1748, 0.2897],
        [0.3763, 0.2891, 0.3346]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1050: tensor([[0.2531, 0.4061, 0.3408],
        [0.1398, 0.6137, 0.2465],
        [0.2059, 0.4568, 0.3373],
        [0.2307, 0.4311, 0.3382],
        [0.2432, 0.5073, 0.2495],
        [0.1767, 0.5354, 0.2879],
        [0.2284, 0.4957, 0.2758],
        [0.1633, 0.5186, 0.3181],
        [0.2004, 0.4902, 0.3093],
        [0.2215, 0.4634, 0.3151],
        [0.1543, 0.5748, 0.2708],
        [0.1650, 0.5428, 0.2922]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1100: tensor([[0.1948, 0.4780, 0.3272],
        [0.3480, 0.2974, 0.3546],
        [0.2372, 0.4169, 0.3459],
        [0.2087, 0.4752, 0.3161],
        [0.2114, 0.4636, 0.3250],
        [0.2580, 0.4060, 0.3360],
        [0.2572, 0.4284, 0.3144],
        [0.4965, 0.1575, 0.3460],
        [0.2488, 0.3837, 0.3675],
        [0.6099, 0.1038, 0.2862],
        [0.4348, 0.2037, 0.3615],
        [0.5145, 0.1332, 0.3523]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1150: tensor([[0.1500, 0.5561, 0.2939],
        [0.1049, 0.6454, 0.2497],
        [0.1465, 0.5681, 0.2854],
        [0.2941, 0.3051, 0.4008],
        [0.2786, 0.3455, 0.3759],
        [0.2142, 0.3671, 0.4187],
        [0.5045, 0.1343, 0.3612],
        [0.3935, 0.1677, 0.4388],
        [0.4609, 0.1439, 0.3952],
        [0.1949, 0.4511, 0.3541],
        [0.1811, 0.4288, 0.3901],
        [0.1820, 0.5172, 0.3008]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1200: tensor([[0.2271, 0.3994, 0.3735],
        [0.1973, 0.4600, 0.3427],
        [0.2351, 0.3177, 0.4473],
        [0.2428, 0.3787, 0.3786],
        [0.1558, 0.5027, 0.3415],
        [0.1845, 0.4651, 0.3505],
        [0.2932, 0.2907, 0.4161],
        [0.2264, 0.4095, 0.3641],
        [0.2338, 0.3738, 0.3924],
        [0.2377, 0.3832, 0.3791],
        [0.1803, 0.4638, 0.3559],
        [0.1745, 0.5040, 0.3215]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1250: tensor([[0.7739, 0.0297, 0.1963],
        [0.7341, 0.0325, 0.2334],
        [0.7796, 0.0232, 0.1972],
        [0.7746, 0.0220, 0.2034],
        [0.7915, 0.0219, 0.1866],
        [0.7897, 0.0203, 0.1900],
        [0.7765, 0.0201, 0.2034],
        [0.7848, 0.0224, 0.1928],
        [0.7642, 0.0239, 0.2119],
        [0.8009, 0.0263, 0.1728],
        [0.7940, 0.0230, 0.1830],
        [0.7771, 0.0230, 0.1999]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1300: tensor([[0.0522, 0.6352, 0.3125],
        [0.0567, 0.6653, 0.2781],
        [0.0865, 0.5429, 0.3706],
        [0.0926, 0.4981, 0.4093],
        [0.1316, 0.3907, 0.4778],
        [0.2606, 0.1880, 0.5514],
        [0.2727, 0.1518, 0.5755],
        [0.3446, 0.1636, 0.4919],
        [0.1157, 0.3957, 0.4886],
        [0.1219, 0.3767, 0.5014],
        [0.2294, 0.2559, 0.5148],
        [0.1538, 0.3215, 0.5247]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1350: tensor([[0.0464, 0.7993, 0.1542],
        [0.0352, 0.8142, 0.1506],
        [0.0421, 0.7848, 0.1731],
        [0.0397, 0.8106, 0.1498],
        [0.0405, 0.7739, 0.1857],
        [0.0622, 0.6880, 0.2498],
        [0.0707, 0.6566, 0.2726],
        [0.0653, 0.7109, 0.2238],
        [0.0840, 0.6986, 0.2174],
        [0.0609, 0.7019, 0.2371],
        [0.0492, 0.7498, 0.2011],
        [0.0413, 0.7945, 0.1642]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1400: tensor([[0.0496, 0.7411, 0.2093],
        [0.0495, 0.7465, 0.2040],
        [0.0411, 0.7848, 0.1741],
        [0.0451, 0.7750, 0.1799],
        [0.0641, 0.6718, 0.2641],
        [0.0370, 0.7667, 0.1963],
        [0.0793, 0.5688, 0.3519],
        [0.0581, 0.6942, 0.2477],
        [0.0534, 0.7008, 0.2458],
        [0.0556, 0.7274, 0.2170],
        [0.0489, 0.7273, 0.2238],
        [0.0469, 0.7235, 0.2295]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1450: tensor([[0.0612, 0.6772, 0.2616],
        [0.0663, 0.6473, 0.2864],
        [0.0581, 0.6370, 0.3050],
        [0.0684, 0.4696, 0.4619],
        [0.0735, 0.3958, 0.5307],
        [0.0750, 0.4358, 0.4892],
        [0.0782, 0.5345, 0.3873],
        [0.0787, 0.5603, 0.3610],
        [0.0636, 0.6340, 0.3024],
        [0.0720, 0.5286, 0.3994],
        [0.1161, 0.2834, 0.6005],
        [0.0905, 0.3986, 0.5109]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1500: tensor([[0.0715, 0.5049, 0.4236],
        [0.0724, 0.5039, 0.4237],
        [0.0735, 0.5390, 0.3875],
        [0.0736, 0.4963, 0.4301],
        [0.0856, 0.5030, 0.4115],
        [0.0663, 0.5809, 0.3528],
        [0.0805, 0.5467, 0.3728],
        [0.0848, 0.4763, 0.4388],
        [0.0675, 0.6048, 0.3277],
        [0.1184, 0.2430, 0.6386],
        [0.1651, 0.2038, 0.6311],
        [0.1425, 0.2219, 0.6356]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1550: tensor([[0.0824, 0.4719, 0.4457],
        [0.0789, 0.4743, 0.4467],
        [0.0815, 0.4090, 0.5095],
        [0.0939, 0.4344, 0.4716],
        [0.0911, 0.5078, 0.4011],
        [0.0776, 0.4853, 0.4371],
        [0.0757, 0.5703, 0.3540],
        [0.0988, 0.4169, 0.4843],
        [0.0774, 0.4719, 0.4507],
        [0.0760, 0.5520, 0.3720],
        [0.0677, 0.5160, 0.4163],
        [0.0659, 0.4444, 0.4897]], device='cuda:0', grad_fn=<SoftmaxBackward>)
