Iter #50: tensor([[0.1993, 0.4558, 0.3449],
        [0.2904, 0.3691, 0.3405],
        [0.2648, 0.3483, 0.3869],
        [0.2264, 0.4391, 0.3344],
        [0.3079, 0.3803, 0.3118],
        [0.3182, 0.3385, 0.3432],
        [0.2836, 0.3411, 0.3753],
        [0.2804, 0.4132, 0.3064],
        [0.2517, 0.4167, 0.3316],
        [0.3299, 0.3358, 0.3343],
        [0.2228, 0.4247, 0.3526],
        [0.2494, 0.4955, 0.2551]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1942, 0.4274, 0.3784],
        [0.2469, 0.4086, 0.3445],
        [0.2082, 0.4970, 0.2949],
        [0.2238, 0.4505, 0.3257],
        [0.2133, 0.4460, 0.3407],
        [0.1755, 0.5648, 0.2597],
        [0.1901, 0.5008, 0.3090],
        [0.2320, 0.4091, 0.3590],
        [0.2287, 0.3832, 0.3881],
        [0.2173, 0.5470, 0.2357],
        [0.1961, 0.5343, 0.2696],
        [0.2167, 0.4646, 0.3187]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1456, 0.5131, 0.3413],
        [0.1660, 0.4897, 0.3443],
        [0.1835, 0.5110, 0.3055],
        [0.1734, 0.4503, 0.3763],
        [0.2147, 0.4358, 0.3494],
        [0.1770, 0.4788, 0.3443],
        [0.1792, 0.5296, 0.2912],
        [0.1350, 0.5717, 0.2933],
        [0.1631, 0.5252, 0.3117],
        [0.1724, 0.4739, 0.3537],
        [0.1968, 0.4242, 0.3790],
        [0.1618, 0.4610, 0.3772]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1034, 0.6317, 0.2649],
        [0.1237, 0.5958, 0.2804],
        [0.1127, 0.6209, 0.2665],
        [0.1065, 0.5889, 0.3046],
        [0.1257, 0.5623, 0.3120],
        [0.1270, 0.6314, 0.2416],
        [0.1762, 0.5235, 0.3003],
        [0.1504, 0.4729, 0.3767],
        [0.1420, 0.5273, 0.3307],
        [0.1466, 0.5413, 0.3121],
        [0.1167, 0.6092, 0.2741],
        [0.1256, 0.6234, 0.2510]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1301, 0.5772, 0.2927],
        [0.1389, 0.5427, 0.3184],
        [0.1280, 0.5173, 0.3547],
        [0.1337, 0.5581, 0.3082],
        [0.1391, 0.4660, 0.3950],
        [0.1208, 0.6481, 0.2311],
        [0.1185, 0.6226, 0.2589],
        [0.1288, 0.6477, 0.2235],
        [0.0961, 0.6580, 0.2459],
        [0.1340, 0.6057, 0.2603],
        [0.1614, 0.3978, 0.4408],
        [0.1870, 0.5074, 0.3056]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.1335, 0.5660, 0.3005],
        [0.1196, 0.6623, 0.2181],
        [0.1968, 0.4121, 0.3911],
        [0.1378, 0.4213, 0.4410],
        [0.1796, 0.4585, 0.3620],
        [0.1173, 0.5626, 0.3200],
        [0.1637, 0.4629, 0.3733],
        [0.1308, 0.5452, 0.3240],
        [0.1584, 0.4004, 0.4413],
        [0.1194, 0.4663, 0.4142],
        [0.1737, 0.4960, 0.3303],
        [0.1252, 0.5900, 0.2848]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.1508, 0.5139, 0.3353],
        [0.1528, 0.5305, 0.3167],
        [0.1167, 0.6114, 0.2719],
        [0.1408, 0.5788, 0.2804],
        [0.1411, 0.4907, 0.3682],
        [0.1354, 0.5084, 0.3562],
        [0.1178, 0.6162, 0.2660],
        [0.1385, 0.5094, 0.3520],
        [0.1216, 0.4720, 0.4064],
        [0.1526, 0.5140, 0.3333],
        [0.1303, 0.5592, 0.3104],
        [0.1316, 0.5119, 0.3565]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.1662, 0.4468, 0.3870],
        [0.1326, 0.5528, 0.3146],
        [0.1268, 0.5723, 0.3009],
        [0.2276, 0.2424, 0.5300],
        [0.1881, 0.1976, 0.6144],
        [0.2019, 0.2334, 0.5647],
        [0.1804, 0.1536, 0.6660],
        [0.1968, 0.1680, 0.6352],
        [0.2153, 0.1760, 0.6086],
        [0.1376, 0.5345, 0.3279],
        [0.1133, 0.5612, 0.3255],
        [0.1157, 0.5529, 0.3313]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.1259, 0.5839, 0.2903],
        [0.1699, 0.4557, 0.3744],
        [0.1692, 0.4749, 0.3559],
        [0.1655, 0.5024, 0.3321],
        [0.1588, 0.5715, 0.2697],
        [0.1395, 0.6321, 0.2283],
        [0.1429, 0.6113, 0.2458],
        [0.1448, 0.4954, 0.3598],
        [0.1449, 0.4617, 0.3934],
        [0.1513, 0.5925, 0.2562],
        [0.1721, 0.4412, 0.3866],
        [0.1373, 0.4841, 0.3786]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1460, 0.4983, 0.3557],
        [0.1801, 0.5320, 0.2879],
        [0.1396, 0.5454, 0.3150],
        [0.1899, 0.4220, 0.3881],
        [0.1344, 0.5286, 0.3371],
        [0.1761, 0.4674, 0.3564],
        [0.1271, 0.5538, 0.3192],
        [0.1577, 0.5646, 0.2777],
        [0.1147, 0.5869, 0.2984],
        [0.2455, 0.2452, 0.5092],
        [0.1477, 0.4451, 0.4071],
        [0.1671, 0.5026, 0.3303]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1987, 0.4070, 0.3943],
        [0.1868, 0.4374, 0.3758],
        [0.2239, 0.2748, 0.5014],
        [0.1759, 0.2819, 0.5422],
        [0.2010, 0.2864, 0.5126],
        [0.1602, 0.4556, 0.3843],
        [0.1276, 0.5340, 0.3384],
        [0.1435, 0.5069, 0.3496],
        [0.1675, 0.3946, 0.4379],
        [0.1837, 0.2853, 0.5310],
        [0.1457, 0.5334, 0.3209],
        [0.1374, 0.4812, 0.3814]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #600: tensor([[0.2072, 0.1999, 0.5929],
        [0.1573, 0.4214, 0.4213],
        [0.1365, 0.5413, 0.3222],
        [0.1625, 0.3176, 0.5199],
        [0.1936, 0.1981, 0.6083],
        [0.1974, 0.3540, 0.4486],
        [0.1693, 0.3555, 0.4751],
        [0.2060, 0.3471, 0.4469],
        [0.2039, 0.2089, 0.5872],
        [0.2204, 0.4747, 0.3049],
        [0.2068, 0.1910, 0.6022],
        [0.1868, 0.3033, 0.5099]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #650: tensor([[0.2055, 0.4508, 0.3437],
        [0.1739, 0.5112, 0.3149],
        [0.1743, 0.5124, 0.3133],
        [0.1666, 0.4969, 0.3365],
        [0.2029, 0.2631, 0.5339],
        [0.2015, 0.1471, 0.6514],
        [0.1779, 0.2913, 0.5309],
        [0.2032, 0.3323, 0.4645],
        [0.1932, 0.2910, 0.5158],
        [0.1892, 0.3573, 0.4535],
        [0.1926, 0.5197, 0.2877],
        [0.1878, 0.4189, 0.3933]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #700: tensor([[0.1937, 0.3942, 0.4121],
        [0.2052, 0.4076, 0.3871],
        [0.2184, 0.3747, 0.4069],
        [0.1821, 0.5795, 0.2384],
        [0.1778, 0.4314, 0.3908],
        [0.2009, 0.4802, 0.3189],
        [0.2319, 0.3095, 0.4586],
        [0.2193, 0.3170, 0.4637],
        [0.2168, 0.3757, 0.4076],
        [0.1863, 0.4251, 0.3886],
        [0.2075, 0.3934, 0.3991],
        [0.2055, 0.2048, 0.5897]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #750: tensor([[0.2172, 0.0913, 0.6915],
        [0.2168, 0.1585, 0.6247],
        [0.2190, 0.1143, 0.6668],
        [0.1892, 0.3898, 0.4210],
        [0.2089, 0.3110, 0.4801],
        [0.2183, 0.3034, 0.4783],
        [0.2210, 0.4404, 0.3386],
        [0.1974, 0.4710, 0.3316],
        [0.2269, 0.3520, 0.4210],
        [0.1734, 0.3709, 0.4557],
        [0.2375, 0.2037, 0.5588],
        [0.2132, 0.4167, 0.3700]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #800: tensor([[0.1960, 0.5021, 0.3019],
        [0.1851, 0.4802, 0.3347],
        [0.1887, 0.4744, 0.3369],
        [0.1607, 0.5937, 0.2456],
        [0.1623, 0.5272, 0.3105],
        [0.1608, 0.5488, 0.2904],
        [0.1811, 0.5449, 0.2740],
        [0.2398, 0.4405, 0.3197],
        [0.2359, 0.3648, 0.3993],
        [0.1817, 0.4865, 0.3318],
        [0.2255, 0.2938, 0.4808],
        [0.2252, 0.3346, 0.4402]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #850: tensor([[0.2286, 0.3543, 0.4171],
        [0.2227, 0.4106, 0.3667],
        [0.1835, 0.5923, 0.2242],
        [0.2078, 0.4803, 0.3118],
        [0.2204, 0.3526, 0.4270],
        [0.2134, 0.3990, 0.3876],
        [0.1974, 0.5083, 0.2943],
        [0.2397, 0.3086, 0.4517],
        [0.2585, 0.2924, 0.4492],
        [0.2020, 0.2067, 0.5913],
        [0.2614, 0.2879, 0.4507],
        [0.2286, 0.3625, 0.4089]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #900: tensor([[0.2110, 0.5517, 0.2373],
        [0.2529, 0.3959, 0.3512],
        [0.2326, 0.4306, 0.3369],
        [0.2443, 0.3108, 0.4449],
        [0.2021, 0.4441, 0.3538],
        [0.2687, 0.4319, 0.2994],
        [0.2061, 0.5213, 0.2726],
        [0.2314, 0.4508, 0.3178],
        [0.2481, 0.4136, 0.3383],
        [0.2156, 0.4607, 0.3237],
        [0.2341, 0.4309, 0.3350],
        [0.2372, 0.2680, 0.4948]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #950: tensor([[0.2351, 0.3957, 0.3692],
        [0.2098, 0.3872, 0.4030],
        [0.2241, 0.4839, 0.2920],
        [0.2092, 0.5091, 0.2817],
        [0.2116, 0.3382, 0.4502],
        [0.2604, 0.2795, 0.4601],
        [0.2133, 0.4407, 0.3460],
        [0.2110, 0.5566, 0.2325],
        [0.2392, 0.4446, 0.3162],
        [0.3191, 0.2818, 0.3991],
        [0.2607, 0.4604, 0.2789],
        [0.1902, 0.4449, 0.3649]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1000: tensor([[0.2204, 0.4535, 0.3261],
        [0.2378, 0.3624, 0.3998],
        [0.2079, 0.5640, 0.2282],
        [0.2279, 0.4188, 0.3533],
        [0.2286, 0.5261, 0.2453],
        [0.2737, 0.4301, 0.2962],
        [0.2455, 0.4777, 0.2769],
        [0.2325, 0.3630, 0.4045],
        [0.2209, 0.4964, 0.2826],
        [0.2240, 0.4091, 0.3669],
        [0.2317, 0.4872, 0.2812],
        [0.2593, 0.2948, 0.4459]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1050: tensor([[0.2474, 0.4355, 0.3171],
        [0.3095, 0.3299, 0.3607],
        [0.2829, 0.4085, 0.3086],
        [0.2465, 0.3014, 0.4521],
        [0.2782, 0.1367, 0.5851],
        [0.2352, 0.1014, 0.6634],
        [0.2403, 0.1400, 0.6197],
        [0.2826, 0.1855, 0.5318],
        [0.2978, 0.2429, 0.4593],
        [0.2375, 0.3180, 0.4445],
        [0.2495, 0.3393, 0.4111],
        [0.2162, 0.5975, 0.1863]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1100: tensor([[0.2693, 0.2963, 0.4344],
        [0.2786, 0.3295, 0.3919],
        [0.2863, 0.1756, 0.5381],
        [0.2778, 0.3322, 0.3900],
        [0.2687, 0.1862, 0.5451],
        [0.2902, 0.3589, 0.3509],
        [0.2746, 0.3845, 0.3410],
        [0.2707, 0.3910, 0.3383],
        [0.2503, 0.4528, 0.2969],
        [0.3238, 0.2312, 0.4449],
        [0.2622, 0.2074, 0.5305],
        [0.2800, 0.3251, 0.3949]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1150: tensor([[0.1984, 0.5825, 0.2191],
        [0.2435, 0.5281, 0.2285],
        [0.2712, 0.3431, 0.3857],
        [0.2667, 0.2498, 0.4835],
        [0.2763, 0.3707, 0.3530],
        [0.2258, 0.6305, 0.1437],
        [0.2560, 0.4237, 0.3202],
        [0.2630, 0.3581, 0.3789],
        [0.2603, 0.5350, 0.2046],
        [0.2897, 0.3402, 0.3701],
        [0.2553, 0.4404, 0.3043],
        [0.2427, 0.3796, 0.3777]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1200: tensor([[0.3298, 0.2948, 0.3754],
        [0.2939, 0.3889, 0.3172],
        [0.3011, 0.3321, 0.3668],
        [0.2594, 0.3967, 0.3439],
        [0.2741, 0.4656, 0.2602],
        [0.2696, 0.4819, 0.2484],
        [0.3428, 0.2669, 0.3903],
        [0.3065, 0.3242, 0.3693],
        [0.2932, 0.3682, 0.3387],
        [0.2727, 0.3877, 0.3396],
        [0.2663, 0.3632, 0.3705],
        [0.2954, 0.4353, 0.2694]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1250: tensor([[0.3237, 0.3619, 0.3144],
        [0.2480, 0.4908, 0.2611],
        [0.2401, 0.5127, 0.2472],
        [0.2681, 0.2565, 0.4754],
        [0.2754, 0.4978, 0.2268],
        [0.3026, 0.2744, 0.4230],
        [0.3001, 0.3379, 0.3620],
        [0.2969, 0.4858, 0.2173],
        [0.2783, 0.1382, 0.5835],
        [0.3247, 0.3929, 0.2824],
        [0.2974, 0.3029, 0.3997],
        [0.2708, 0.3929, 0.3363]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1300: tensor([[0.3019, 0.2846, 0.4135],
        [0.2916, 0.1455, 0.5629],
        [0.3006, 0.3679, 0.3315],
        [0.3084, 0.1704, 0.5212],
        [0.3177, 0.2762, 0.4061],
        [0.2777, 0.2716, 0.4508],
        [0.2944, 0.3774, 0.3282],
        [0.3158, 0.3299, 0.3543],
        [0.3479, 0.3706, 0.2814],
        [0.2859, 0.2190, 0.4951],
        [0.3530, 0.3788, 0.2682],
        [0.3166, 0.2212, 0.4621]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1350: tensor([[0.2852, 0.4600, 0.2548],
        [0.2843, 0.5133, 0.2025],
        [0.2660, 0.4695, 0.2645],
        [0.2850, 0.4222, 0.2928],
        [0.3194, 0.3753, 0.3052],
        [0.2957, 0.4773, 0.2270],
        [0.2817, 0.5411, 0.1772],
        [0.3695, 0.4049, 0.2256],
        [0.2903, 0.4135, 0.2962],
        [0.2698, 0.4849, 0.2453],
        [0.2282, 0.5543, 0.2175],
        [0.2805, 0.4468, 0.2726]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1400: tensor([[0.2235, 0.6015, 0.1750],
        [0.3233, 0.3676, 0.3091],
        [0.3103, 0.3905, 0.2992],
        [0.3442, 0.3730, 0.2828],
        [0.3407, 0.4148, 0.2445],
        [0.3374, 0.4168, 0.2458],
        [0.3759, 0.2730, 0.3511],
        [0.3413, 0.3721, 0.2866],
        [0.3160, 0.3355, 0.3485],
        [0.3731, 0.3419, 0.2851],
        [0.3224, 0.3782, 0.2994],
        [0.3144, 0.3942, 0.2914]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1450: tensor([[0.2831, 0.5690, 0.1479],
        [0.3523, 0.4086, 0.2391],
        [0.2806, 0.5363, 0.1831],
        [0.3166, 0.5057, 0.1777],
        [0.3339, 0.4759, 0.1902],
        [0.2612, 0.5343, 0.2044],
        [0.3006, 0.4869, 0.2125],
        [0.2848, 0.5288, 0.1863],
        [0.2565, 0.5516, 0.1919],
        [0.2273, 0.5743, 0.1983],
        [0.3309, 0.4564, 0.2127],
        [0.3052, 0.4715, 0.2233]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1500: tensor([[0.1843, 0.6960, 0.1197],
        [0.2624, 0.5407, 0.1969],
        [0.2601, 0.5802, 0.1597],
        [0.2671, 0.5625, 0.1704],
        [0.1972, 0.6822, 0.1206],
        [0.2289, 0.6295, 0.1416],
        [0.2674, 0.5644, 0.1682],
        [0.3243, 0.4456, 0.2301],
        [0.2649, 0.5411, 0.1940],
        [0.2126, 0.6458, 0.1416],
        [0.3077, 0.5187, 0.1737],
        [0.2178, 0.6295, 0.1526]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1550: tensor([[0.3929, 0.3479, 0.2592],
        [0.2858, 0.5597, 0.1545],
        [0.3893, 0.4001, 0.2105],
        [0.4267, 0.3329, 0.2405],
        [0.3471, 0.4555, 0.1974],
        [0.3011, 0.5158, 0.1831],
        [0.3710, 0.4187, 0.2103],
        [0.3517, 0.4014, 0.2469],
        [0.3379, 0.4412, 0.2209],
        [0.3791, 0.3835, 0.2374],
        [0.2944, 0.5178, 0.1878],
        [0.3239, 0.4860, 0.1901]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1600: tensor([[0.3022, 0.4687, 0.2291],
        [0.5196, 0.1174, 0.3630],
        [0.4497, 0.3126, 0.2377],
        [0.5413, 0.1203, 0.3384],
        [0.3801, 0.3623, 0.2576],
        [0.4519, 0.2241, 0.3240],
        [0.3144, 0.5041, 0.1814],
        [0.4169, 0.3542, 0.2289],
        [0.3910, 0.3729, 0.2361],
        [0.4126, 0.3404, 0.2470],
        [0.5396, 0.0893, 0.3710],
        [0.4475, 0.2710, 0.2816]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1650: tensor([[0.4138, 0.0695, 0.5167],
        [0.4304, 0.3283, 0.2413],
        [0.4258, 0.3539, 0.2203],
        [0.4617, 0.3215, 0.2168],
        [0.3128, 0.4795, 0.2077],
        [0.3060, 0.5290, 0.1650],
        [0.3085, 0.5481, 0.1434],
        [0.3072, 0.5520, 0.1408],
        [0.3868, 0.4335, 0.1797],
        [0.3332, 0.4733, 0.1935],
        [0.3380, 0.4543, 0.2077],
        [0.4455, 0.3437, 0.2109]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1700: tensor([[0.2808, 0.5930, 0.1262],
        [0.3365, 0.5358, 0.1277],
        [0.4008, 0.4130, 0.1862],
        [0.3807, 0.4537, 0.1656],
        [0.3740, 0.4537, 0.1723],
        [0.2420, 0.6439, 0.1141],
        [0.2350, 0.6557, 0.1093],
        [0.2470, 0.6341, 0.1189],
        [0.4408, 0.0917, 0.4674],
        [0.4766, 0.1883, 0.3351],
        [0.4515, 0.2512, 0.2973],
        [0.3516, 0.3935, 0.2550]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1750: tensor([[0.2012, 0.6795, 0.1193],
        [0.2857, 0.5734, 0.1409],
        [0.3356, 0.5004, 0.1640],
        [0.3347, 0.4917, 0.1736],
        [0.2700, 0.6095, 0.1206],
        [0.2523, 0.6261, 0.1216],
        [0.3037, 0.5787, 0.1176],
        [0.3079, 0.5524, 0.1397],
        [0.3167, 0.5131, 0.1701],
        [0.3802, 0.4486, 0.1712],
        [0.3100, 0.5653, 0.1248],
        [0.2672, 0.5800, 0.1528]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1800: tensor([[0.3680, 0.4578, 0.1742],
        [0.3260, 0.5435, 0.1305],
        [0.3760, 0.4837, 0.1403],
        [0.2806, 0.5794, 0.1400],
        [0.3163, 0.5680, 0.1157],
        [0.4035, 0.4399, 0.1567],
        [0.4227, 0.3925, 0.1848],
        [0.4128, 0.3919, 0.1954],
        [0.2898, 0.5852, 0.1250],
        [0.3716, 0.4847, 0.1437],
        [0.3389, 0.5130, 0.1481],
        [0.2987, 0.5497, 0.1516]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1850: tensor([[0.3304, 0.5139, 0.1557],
        [0.4367, 0.3821, 0.1811],
        [0.3028, 0.5583, 0.1389],
        [0.3498, 0.4899, 0.1603],
        [0.4141, 0.4287, 0.1573],
        [0.3001, 0.5143, 0.1856],
        [0.3362, 0.5239, 0.1399],
        [0.3266, 0.4788, 0.1946],
        [0.4501, 0.3153, 0.2347],
        [0.4118, 0.3543, 0.2339],
        [0.4438, 0.3130, 0.2432],
        [0.3150, 0.5418, 0.1432]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1900: tensor([[0.3945, 0.3767, 0.2288],
        [0.3305, 0.5217, 0.1478],
        [0.2958, 0.5360, 0.1683],
        [0.3868, 0.3831, 0.2301],
        [0.4331, 0.0616, 0.5053],
        [0.4417, 0.3467, 0.2116],
        [0.3947, 0.4510, 0.1543],
        [0.5124, 0.1981, 0.2895],
        [0.3900, 0.4289, 0.1811],
        [0.4048, 0.4000, 0.1953],
        [0.4100, 0.4024, 0.1877],
        [0.3960, 0.3910, 0.2131]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1950: tensor([[0.3667, 0.4039, 0.2294],
        [0.3451, 0.4865, 0.1684],
        [0.4118, 0.3479, 0.2403],
        [0.3428, 0.4794, 0.1779],
        [0.3267, 0.5493, 0.1240],
        [0.3140, 0.5112, 0.1748],
        [0.4326, 0.3537, 0.2137],
        [0.3722, 0.4027, 0.2250],
        [0.4087, 0.3769, 0.2144],
        [0.3673, 0.4515, 0.1812],
        [0.3217, 0.4966, 0.1817],
        [0.2706, 0.5712, 0.1582]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2000: tensor([[0.4094, 0.3899, 0.2006],
        [0.4791, 0.2177, 0.3031],
        [0.5316, 0.1045, 0.3638],
        [0.4193, 0.2085, 0.3722],
        [0.3780, 0.4256, 0.1964],
        [0.3779, 0.4414, 0.1807],
        [0.3277, 0.4840, 0.1883],
        [0.3231, 0.5179, 0.1590],
        [0.3570, 0.4624, 0.1807],
        [0.3105, 0.5096, 0.1798],
        [0.3781, 0.4582, 0.1637],
        [0.2905, 0.5345, 0.1750]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2050: tensor([[0.3434, 0.4817, 0.1748],
        [0.3755, 0.4091, 0.2153],
        [0.3901, 0.3671, 0.2428],
        [0.4721, 0.2309, 0.2970],
        [0.2851, 0.5167, 0.1983],
        [0.3126, 0.5031, 0.1842],
        [0.2912, 0.5520, 0.1568],
        [0.2867, 0.5385, 0.1749],
        [0.3038, 0.5139, 0.1823],
        [0.2641, 0.5746, 0.1613],
        [0.2952, 0.5388, 0.1660],
        [0.3064, 0.5412, 0.1524]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2100: tensor([[0.3978, 0.3573, 0.2450],
        [0.2812, 0.5564, 0.1624],
        [0.4299, 0.3099, 0.2602],
        [0.3666, 0.4433, 0.1901],
        [0.3773, 0.4338, 0.1889],
        [0.4171, 0.3359, 0.2470],
        [0.3657, 0.4362, 0.1982],
        [0.4091, 0.3369, 0.2540],
        [0.4714, 0.2127, 0.3159],
        [0.5402, 0.1404, 0.3194],
        [0.3344, 0.4228, 0.2428],
        [0.3516, 0.4350, 0.2135]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2150: tensor([[0.3661, 0.3859, 0.2480],
        [0.4326, 0.3340, 0.2333],
        [0.3957, 0.3549, 0.2493],
        [0.4003, 0.3805, 0.2192],
        [0.3632, 0.4372, 0.1996],
        [0.3519, 0.4641, 0.1840],
        [0.3253, 0.4994, 0.1754],
        [0.3859, 0.3855, 0.2286],
        [0.3798, 0.3944, 0.2258],
        [0.3246, 0.4713, 0.2042],
        [0.3613, 0.4398, 0.1989],
        [0.3185, 0.5026, 0.1789]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2200: tensor([[0.3572, 0.4554, 0.1874],
        [0.3544, 0.4276, 0.2180],
        [0.3843, 0.3654, 0.2503],
        [0.4214, 0.3579, 0.2207],
        [0.3623, 0.4101, 0.2276],
        [0.4325, 0.3314, 0.2360],
        [0.3410, 0.3852, 0.2738],
        [0.3619, 0.4311, 0.2070],
        [0.3843, 0.3774, 0.2383],
        [0.4443, 0.2730, 0.2827],
        [0.3721, 0.3742, 0.2537],
        [0.4453, 0.3160, 0.2387]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2250: tensor([[0.3791, 0.3901, 0.2308],
        [0.3112, 0.4523, 0.2365],
        [0.3261, 0.4275, 0.2464],
        [0.4351, 0.2954, 0.2695],
        [0.3371, 0.4527, 0.2103],
        [0.3306, 0.4857, 0.1836],
        [0.3248, 0.4598, 0.2154],
        [0.3612, 0.4049, 0.2339],
        [0.3079, 0.4816, 0.2105],
        [0.2934, 0.5058, 0.2007],
        [0.2785, 0.5425, 0.1790],
        [0.3649, 0.4009, 0.2342]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2300: tensor([[0.2423, 0.5803, 0.1774],
        [0.2016, 0.6185, 0.1799],
        [0.1955, 0.6293, 0.1751],
        [0.2278, 0.5708, 0.2014],
        [0.3182, 0.4465, 0.2353],
        [0.2436, 0.5310, 0.2255],
        [0.1621, 0.6620, 0.1759],
        [0.1975, 0.6352, 0.1672],
        [0.2244, 0.5651, 0.2105],
        [0.2217, 0.6089, 0.1694],
        [0.2196, 0.6183, 0.1621],
        [0.2322, 0.5762, 0.1915]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2350: tensor([[0.3435, 0.4223, 0.2342],
        [0.4118, 0.3567, 0.2315],
        [0.3629, 0.3959, 0.2412],
        [0.3275, 0.4288, 0.2437],
        [0.4555, 0.2998, 0.2446],
        [0.3341, 0.4096, 0.2563],
        [0.4410, 0.3210, 0.2380],
        [0.3702, 0.3955, 0.2344],
        [0.2986, 0.4910, 0.2105],
        [0.3792, 0.3644, 0.2564],
        [0.4490, 0.3078, 0.2432],
        [0.3716, 0.3617, 0.2667]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2400: tensor([[0.1734, 0.5724, 0.2542],
        [0.1489, 0.6462, 0.2049],
        [0.1176, 0.6796, 0.2027],
        [0.1667, 0.6227, 0.2106],
        [0.1780, 0.6048, 0.2172],
        [0.1983, 0.5915, 0.2102],
        [0.1729, 0.5637, 0.2634],
        [0.2121, 0.5572, 0.2307],
        [0.1529, 0.6669, 0.1802],
        [0.1212, 0.6740, 0.2048],
        [0.1870, 0.5580, 0.2551],
        [0.1484, 0.6378, 0.2138]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2450: tensor([[0.2669, 0.4561, 0.2770],
        [0.2413, 0.4837, 0.2749],
        [0.2110, 0.5350, 0.2540],
        [0.2200, 0.5116, 0.2684],
        [0.2078, 0.5332, 0.2589],
        [0.2347, 0.5100, 0.2554],
        [0.2474, 0.4747, 0.2778],
        [0.2964, 0.4318, 0.2718],
        [0.2334, 0.4963, 0.2703],
        [0.2545, 0.4559, 0.2896],
        [0.2641, 0.4451, 0.2907],
        [0.2406, 0.5254, 0.2340]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2500: tensor([[0.2557, 0.4411, 0.3033],
        [0.2996, 0.4263, 0.2741],
        [0.2741, 0.4542, 0.2717],
        [0.3389, 0.3870, 0.2742],
        [0.2830, 0.4179, 0.2991],
        [0.2139, 0.4762, 0.3098],
        [0.2895, 0.4343, 0.2762],
        [0.2512, 0.4576, 0.2912],
        [0.3571, 0.3553, 0.2876],
        [0.3131, 0.4109, 0.2761],
        [0.3593, 0.3351, 0.3056],
        [0.2875, 0.3936, 0.3188]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2550: tensor([[0.2289, 0.4751, 0.2960],
        [0.2938, 0.4318, 0.2743],
        [0.2363, 0.4968, 0.2669],
        [0.2301, 0.4638, 0.3061],
        [0.3109, 0.4136, 0.2755],
        [0.3401, 0.3683, 0.2916],
        [0.2702, 0.4110, 0.3188],
        [0.2969, 0.4249, 0.2782],
        [0.2917, 0.4011, 0.3072],
        [0.2476, 0.4849, 0.2675],
        [0.3153, 0.4104, 0.2743],
        [0.3170, 0.3736, 0.3095]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2600: tensor([[0.3053, 0.4129, 0.2818],
        [0.2801, 0.4397, 0.2801],
        [0.2722, 0.4279, 0.3000],
        [0.3043, 0.3990, 0.2967],
        [0.3053, 0.4296, 0.2651],
        [0.2906, 0.3973, 0.3121],
        [0.3654, 0.3529, 0.2817],
        [0.3043, 0.3859, 0.3098],
        [0.2800, 0.4326, 0.2874],
        [0.3067, 0.3403, 0.3530],
        [0.3208, 0.3231, 0.3560],
        [0.3888, 0.3072, 0.3040]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2650: tensor([[0.3146, 0.3891, 0.2963],
        [0.2916, 0.4281, 0.2803],
        [0.3002, 0.4060, 0.2938],
        [0.2972, 0.4217, 0.2812],
        [0.2294, 0.4957, 0.2749],
        [0.3498, 0.3389, 0.3113],
        [0.2411, 0.4896, 0.2693],
        [0.2554, 0.4510, 0.2936],
        [0.2697, 0.4581, 0.2722],
        [0.2308, 0.4972, 0.2720],
        [0.2744, 0.4255, 0.3001],
        [0.2777, 0.3887, 0.3336]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2700: tensor([[0.3141, 0.3510, 0.3349],
        [0.2315, 0.4559, 0.3126],
        [0.2586, 0.4607, 0.2808],
        [0.2278, 0.5190, 0.2532],
        [0.4459, 0.2336, 0.3205],
        [0.2221, 0.4897, 0.2882],
        [0.2529, 0.4411, 0.3060],
        [0.3227, 0.3227, 0.3546],
        [0.3239, 0.3897, 0.2864],
        [0.3623, 0.3034, 0.3343],
        [0.2006, 0.5263, 0.2731],
        [0.2191, 0.5050, 0.2760]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2750: tensor([[0.2784, 0.4082, 0.3134],
        [0.3290, 0.4033, 0.2677],
        [0.2953, 0.3968, 0.3079],
        [0.2718, 0.4546, 0.2737],
        [0.3201, 0.3552, 0.3247],
        [0.2551, 0.4341, 0.3108],
        [0.3123, 0.3772, 0.3105],
        [0.2807, 0.3865, 0.3328],
        [0.3481, 0.3364, 0.3155],
        [0.4049, 0.2819, 0.3132],
        [0.3296, 0.3502, 0.3202],
        [0.3664, 0.3137, 0.3199]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2800: tensor([[0.2739, 0.3901, 0.3359],
        [0.2679, 0.4208, 0.3113],
        [0.2941, 0.4428, 0.2631],
        [0.2704, 0.3901, 0.3395],
        [0.2762, 0.3752, 0.3486],
        [0.3403, 0.3031, 0.3566],
        [0.2704, 0.4295, 0.3002],
        [0.2375, 0.4443, 0.3181],
        [0.3383, 0.3264, 0.3353],
        [0.2528, 0.4314, 0.3158],
        [0.2759, 0.4364, 0.2877],
        [0.2776, 0.4043, 0.3181]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2850: tensor([[0.2655, 0.4409, 0.2936],
        [0.2582, 0.4301, 0.3117],
        [0.3305, 0.3278, 0.3416],
        [0.3005, 0.4021, 0.2974],
        [0.1979, 0.4967, 0.3054],
        [0.2888, 0.3747, 0.3365],
        [0.7000, 0.0620, 0.2380],
        [0.2788, 0.3948, 0.3264],
        [0.2813, 0.4175, 0.3012],
        [0.2617, 0.3984, 0.3398],
        [0.2841, 0.4189, 0.2970],
        [0.3292, 0.3445, 0.3263]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2900: tensor([[0.2359, 0.4352, 0.3288],
        [0.2109, 0.5060, 0.2831],
        [0.2039, 0.5079, 0.2882],
        [0.2601, 0.4532, 0.2867],
        [0.2645, 0.3998, 0.3357],
        [0.2298, 0.5070, 0.2632],
        [0.6761, 0.0471, 0.2768],
        [0.3198, 0.3332, 0.3471],
        [0.6968, 0.0424, 0.2608],
        [0.2866, 0.3927, 0.3207],
        [0.3887, 0.3513, 0.2600],
        [0.3625, 0.3333, 0.3042]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2950: tensor([[0.6535, 0.0443, 0.3022],
        [0.3137, 0.3755, 0.3108],
        [0.3028, 0.3405, 0.3568],
        [0.2712, 0.3857, 0.3431],
        [0.2780, 0.3871, 0.3349],
        [0.2452, 0.4658, 0.2890],
        [0.2856, 0.4316, 0.2828],
        [0.2882, 0.3964, 0.3154],
        [0.2642, 0.4581, 0.2777],
        [0.3208, 0.3908, 0.2884],
        [0.3005, 0.3716, 0.3279],
        [0.2606, 0.4379, 0.3015]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3000: tensor([[0.3367, 0.3483, 0.3150],
        [0.3350, 0.3122, 0.3528],
        [0.2817, 0.3660, 0.3523],
        [0.2808, 0.3291, 0.3900],
        [0.2270, 0.4902, 0.2828],
        [0.2677, 0.3994, 0.3328],
        [0.2285, 0.4912, 0.2802],
        [0.3726, 0.2998, 0.3277],
        [0.2571, 0.4277, 0.3153],
        [0.2950, 0.3694, 0.3356],
        [0.2859, 0.4048, 0.3094],
        [0.2267, 0.4629, 0.3104]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3050: tensor([[0.2921, 0.3519, 0.3559],
        [0.6164, 0.0565, 0.3270],
        [0.2956, 0.4039, 0.3005],
        [0.2446, 0.4638, 0.2915],
        [0.2695, 0.3788, 0.3517],
        [0.2836, 0.3640, 0.3524],
        [0.2542, 0.4101, 0.3357],
        [0.3274, 0.3658, 0.3068],
        [0.2401, 0.4605, 0.2994],
        [0.2826, 0.3552, 0.3622],
        [0.2926, 0.3715, 0.3359],
        [0.2895, 0.3817, 0.3288]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3100: tensor([[0.2716, 0.3971, 0.3313],
        [0.2642, 0.4219, 0.3140],
        [0.2268, 0.4697, 0.3035],
        [0.2744, 0.4354, 0.2902],
        [0.2885, 0.3885, 0.3231],
        [0.2837, 0.3944, 0.3219],
        [0.3032, 0.3661, 0.3307],
        [0.2839, 0.3927, 0.3233],
        [0.2760, 0.4331, 0.2909],
        [0.2528, 0.4293, 0.3179],
        [0.2427, 0.4131, 0.3442],
        [0.2451, 0.4381, 0.3168]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3150: tensor([[0.2465, 0.4178, 0.3357],
        [0.2904, 0.3673, 0.3423],
        [0.2248, 0.4266, 0.3485],
        [0.2259, 0.4531, 0.3209],
        [0.2724, 0.3531, 0.3745],
        [0.2500, 0.4094, 0.3406],
        [0.2837, 0.3355, 0.3808],
        [0.2498, 0.4232, 0.3270],
        [0.3520, 0.3295, 0.3185],
        [0.2740, 0.4081, 0.3179],
        [0.3165, 0.2923, 0.3912],
        [0.2212, 0.4679, 0.3109]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3200: tensor([[0.2837, 0.3873, 0.3290],
        [0.2283, 0.4873, 0.2844],
        [0.2320, 0.4563, 0.3117],
        [0.2803, 0.4100, 0.3098],
        [0.2857, 0.3861, 0.3282],
        [0.2871, 0.3798, 0.3331],
        [0.3213, 0.3030, 0.3757],
        [0.3008, 0.3459, 0.3533],
        [0.2563, 0.4318, 0.3120],
        [0.3306, 0.2776, 0.3918],
        [0.2623, 0.4201, 0.3177],
        [0.2298, 0.4761, 0.2942]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #3250: tensor([[0.2486, 0.4094, 0.3421],
        [0.2686, 0.3776, 0.3538],
        [0.2934, 0.3514, 0.3552],
        [0.2023, 0.4968, 0.3010],
        [0.3451, 0.3073, 0.3476],
        [0.2682, 0.4276, 0.3043],
        [0.2548, 0.4570, 0.2881],
        [0.2423, 0.4580, 0.2997],
        [0.2286, 0.4691, 0.3023],
        [0.3083, 0.3508, 0.3409],
        [0.2604, 0.4066, 0.3329],
        [0.2816, 0.3377, 0.3807]], device='cuda:0', grad_fn=<SoftmaxBackward>)
