Iter #50: tensor([[0.1577, 0.4232, 0.4191],
        [0.1497, 0.3800, 0.4703],
        [0.1604, 0.4674, 0.3722],
        [0.1458, 0.4183, 0.4359],
        [0.1606, 0.4441, 0.3954],
        [0.1646, 0.4844, 0.3510],
        [0.1653, 0.4711, 0.3635],
        [0.1724, 0.4716, 0.3560],
        [0.1629, 0.5300, 0.3072],
        [0.1457, 0.5114, 0.3430],
        [0.1368, 0.4288, 0.4345],
        [0.1752, 0.5031, 0.3217]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1169, 0.4968, 0.3863],
        [0.1202, 0.4658, 0.4140],
        [0.1169, 0.4758, 0.4073],
        [0.1321, 0.4525, 0.4154],
        [0.1497, 0.4931, 0.3572],
        [0.1156, 0.5456, 0.3388],
        [0.1147, 0.4665, 0.4188],
        [0.1296, 0.4755, 0.3948],
        [0.1175, 0.4965, 0.3860],
        [0.1375, 0.4462, 0.4164],
        [0.1224, 0.4989, 0.3786],
        [0.1200, 0.5451, 0.3349]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1521, 0.3075, 0.5404],
        [0.1178, 0.3625, 0.5197],
        [0.1467, 0.2841, 0.5692],
        [0.1324, 0.3810, 0.4865],
        [0.1770, 0.3717, 0.4513],
        [0.1375, 0.2984, 0.5641],
        [0.1279, 0.3733, 0.4988],
        [0.1919, 0.3015, 0.5066],
        [0.1429, 0.3143, 0.5428],
        [0.1249, 0.3192, 0.5558],
        [0.1517, 0.4269, 0.4214],
        [0.1227, 0.3524, 0.5248]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1459, 0.4950, 0.3591],
        [0.1537, 0.4621, 0.3841],
        [0.1382, 0.4970, 0.3648],
        [0.1474, 0.4408, 0.4118],
        [0.1254, 0.5251, 0.3495],
        [0.1506, 0.4884, 0.3610],
        [0.1647, 0.4326, 0.4026],
        [0.1568, 0.4419, 0.4012],
        [0.1367, 0.4104, 0.4529],
        [0.1700, 0.4276, 0.4024],
        [0.1459, 0.3740, 0.4801],
        [0.1452, 0.5535, 0.3013]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1710, 0.5728, 0.2562],
        [0.1333, 0.4978, 0.3689],
        [0.1454, 0.4332, 0.4214],
        [0.1433, 0.4819, 0.3749],
        [0.1411, 0.4433, 0.4155],
        [0.1833, 0.4538, 0.3629],
        [0.1693, 0.3273, 0.5035],
        [0.1483, 0.4398, 0.4119],
        [0.1560, 0.3586, 0.4854],
        [0.1334, 0.4468, 0.4198],
        [0.2025, 0.4149, 0.3826],
        [0.1771, 0.3718, 0.4511]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.1370, 0.2788, 0.5842],
        [0.1362, 0.4053, 0.4585],
        [0.1737, 0.5402, 0.2861],
        [0.1749, 0.4521, 0.3730],
        [0.1564, 0.4214, 0.4222],
        [0.1605, 0.4853, 0.3542],
        [0.1345, 0.5482, 0.3173],
        [0.1477, 0.5536, 0.2987],
        [0.1795, 0.4592, 0.3613],
        [0.1576, 0.4657, 0.3768],
        [0.1427, 0.5670, 0.2903],
        [0.1650, 0.4717, 0.3633]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.1729, 0.5499, 0.2772],
        [0.1617, 0.4587, 0.3796],
        [0.1608, 0.3753, 0.4639],
        [0.1608, 0.3831, 0.4561],
        [0.1799, 0.3834, 0.4367],
        [0.1402, 0.3672, 0.4926],
        [0.1705, 0.3299, 0.4995],
        [0.1846, 0.3123, 0.5031],
        [0.1680, 0.5894, 0.2427],
        [0.1869, 0.3689, 0.4442],
        [0.1544, 0.4905, 0.3552],
        [0.1665, 0.4049, 0.4285]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.1816, 0.5818, 0.2366],
        [0.1660, 0.4319, 0.4022],
        [0.1873, 0.4293, 0.3834],
        [0.2470, 0.4709, 0.2820],
        [0.1861, 0.4490, 0.3648],
        [0.1766, 0.4655, 0.3579],
        [0.1888, 0.5395, 0.2717],
        [0.2018, 0.4792, 0.3190],
        [0.1544, 0.4615, 0.3841],
        [0.1886, 0.4830, 0.3284],
        [0.1689, 0.4468, 0.3842],
        [0.1728, 0.4630, 0.3642]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.1947, 0.3312, 0.4741],
        [0.1670, 0.4305, 0.4026],
        [0.1513, 0.5817, 0.2670],
        [0.1716, 0.3562, 0.4722],
        [0.1876, 0.5605, 0.2519],
        [0.2087, 0.4713, 0.3200],
        [0.2278, 0.4639, 0.3082],
        [0.1849, 0.4757, 0.3394],
        [0.1551, 0.3510, 0.4938],
        [0.2209, 0.3655, 0.4136],
        [0.1748, 0.3797, 0.4455],
        [0.1828, 0.4525, 0.3647]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1470, 0.2498, 0.6032],
        [0.1735, 0.1948, 0.6317],
        [0.1628, 0.2580, 0.5791],
        [0.1733, 0.2952, 0.5315],
        [0.1772, 0.1953, 0.6275],
        [0.1721, 0.2836, 0.5443],
        [0.1808, 0.2683, 0.5509],
        [0.2075, 0.3632, 0.4292],
        [0.1965, 0.2198, 0.5837],
        [0.2062, 0.3244, 0.4694],
        [0.2187, 0.2447, 0.5367],
        [0.1894, 0.4121, 0.3985]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2080, 0.2852, 0.5068],
        [0.2214, 0.3675, 0.4110],
        [0.2224, 0.5091, 0.2684],
        [0.2424, 0.2408, 0.5168],
        [0.2187, 0.5438, 0.2375],
        [0.1904, 0.2903, 0.5193],
        [0.2103, 0.4587, 0.3309],
        [0.1968, 0.2926, 0.5107],
        [0.2101, 0.3617, 0.4282],
        [0.2360, 0.3579, 0.4061],
        [0.2084, 0.2882, 0.5035],
        [0.1818, 0.6119, 0.2063]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #600: tensor([[0.2177, 0.2877, 0.4946],
        [0.1708, 0.2680, 0.5612],
        [0.2071, 0.2434, 0.5494],
        [0.1953, 0.3135, 0.4912],
        [0.2338, 0.2966, 0.4696],
        [0.1881, 0.5942, 0.2177],
        [0.2604, 0.3082, 0.4315],
        [0.2526, 0.4321, 0.3153],
        [0.2294, 0.5034, 0.2672],
        [0.2454, 0.3823, 0.3723],
        [0.2263, 0.4649, 0.3088],
        [0.2341, 0.3805, 0.3854]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #650: tensor([[0.2691, 0.3761, 0.3548],
        [0.2640, 0.4593, 0.2767],
        [0.2470, 0.3724, 0.3806],
        [0.2682, 0.3982, 0.3336],
        [0.2217, 0.2695, 0.5088],
        [0.2118, 0.3105, 0.4778],
        [0.2253, 0.4209, 0.3538],
        [0.2547, 0.3156, 0.4297],
        [0.1862, 0.3629, 0.4509],
        [0.1894, 0.2369, 0.5737],
        [0.2605, 0.4471, 0.2924],
        [0.2828, 0.3836, 0.3336]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #700: tensor([[0.2452, 0.4944, 0.2604],
        [0.2856, 0.4250, 0.2895],
        [0.2522, 0.5707, 0.1772],
        [0.2785, 0.3586, 0.3629],
        [0.2407, 0.5138, 0.2455],
        [0.2827, 0.3850, 0.3322],
        [0.2569, 0.2801, 0.4630],
        [0.2499, 0.3745, 0.3756],
        [0.2809, 0.4128, 0.3063],
        [0.2832, 0.4237, 0.2931],
        [0.2648, 0.3839, 0.3512],
        [0.2525, 0.2705, 0.4770]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #750: tensor([[0.2536, 0.5100, 0.2365],
        [0.2910, 0.5003, 0.2087],
        [0.2593, 0.4951, 0.2456],
        [0.2637, 0.4760, 0.2603],
        [0.2662, 0.4927, 0.2411],
        [0.2128, 0.1637, 0.6236],
        [0.2632, 0.1764, 0.5604],
        [0.2320, 0.2547, 0.5133],
        [0.2807, 0.4342, 0.2851],
        [0.2570, 0.3142, 0.4289],
        [0.2586, 0.2964, 0.4450],
        [0.2462, 0.5002, 0.2536]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #800: tensor([[0.2341, 0.4946, 0.2713],
        [0.3044, 0.3914, 0.3042],
        [0.3367, 0.3452, 0.3181],
        [0.2923, 0.4500, 0.2577],
        [0.2559, 0.3868, 0.3573],
        [0.2890, 0.4321, 0.2789],
        [0.2941, 0.3983, 0.3076],
        [0.2815, 0.2998, 0.4187],
        [0.2697, 0.3720, 0.3583],
        [0.2581, 0.4373, 0.3045],
        [0.3225, 0.3756, 0.3020],
        [0.2668, 0.5508, 0.1824]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #850: tensor([[0.2913, 0.3281, 0.3806],
        [0.3021, 0.3505, 0.3474],
        [0.2879, 0.4767, 0.2354],
        [0.2977, 0.5256, 0.1767],
        [0.2878, 0.2524, 0.4599],
        [0.3143, 0.3190, 0.3667],
        [0.3107, 0.4012, 0.2881],
        [0.2872, 0.3883, 0.3244],
        [0.3058, 0.3192, 0.3750],
        [0.3095, 0.3836, 0.3069],
        [0.3085, 0.3913, 0.3003],
        [0.2904, 0.2696, 0.4401]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #900: tensor([[0.3445, 0.2812, 0.3743],
        [0.2751, 0.3199, 0.4050],
        [0.2770, 0.3416, 0.3813],
        [0.2366, 0.3462, 0.4172],
        [0.2665, 0.2342, 0.4993],
        [0.3003, 0.3019, 0.3978],
        [0.3087, 0.2622, 0.4291],
        [0.2912, 0.3238, 0.3849],
        [0.3106, 0.3392, 0.3502],
        [0.3053, 0.4011, 0.2937],
        [0.2876, 0.2776, 0.4348],
        [0.2767, 0.2621, 0.4612]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #950: tensor([[0.1977, 0.0825, 0.7197],
        [0.1858, 0.1045, 0.7097],
        [0.2068, 0.1169, 0.6762],
        [0.2416, 0.1244, 0.6340],
        [0.1552, 0.0839, 0.7609],
        [0.2208, 0.1416, 0.6377],
        [0.1695, 0.1198, 0.7108],
        [0.2499, 0.1594, 0.5907],
        [0.2555, 0.1696, 0.5750],
        [0.1845, 0.1398, 0.6756],
        [0.2160, 0.1692, 0.6148],
        [0.2167, 0.1091, 0.6742]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1000: tensor([[0.2648, 0.2672, 0.4680],
        [0.2038, 0.1357, 0.6606],
        [0.3230, 0.2310, 0.4459],
        [0.3342, 0.3104, 0.3553],
        [0.3340, 0.3146, 0.3513],
        [0.2634, 0.1817, 0.5549],
        [0.3035, 0.2697, 0.4268],
        [0.2688, 0.2294, 0.5018],
        [0.2264, 0.1359, 0.6378],
        [0.3061, 0.2041, 0.4898],
        [0.1965, 0.1497, 0.6538],
        [0.2467, 0.1534, 0.5999]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1050: tensor([[0.3716, 0.2537, 0.3747],
        [0.3080, 0.3425, 0.3495],
        [0.3026, 0.1951, 0.5023],
        [0.3151, 0.2624, 0.4225],
        [0.2511, 0.1851, 0.5638],
        [0.2684, 0.2126, 0.5190],
        [0.2662, 0.2460, 0.4878],
        [0.3237, 0.2426, 0.4337],
        [0.3388, 0.2506, 0.4106],
        [0.2506, 0.2149, 0.5345],
        [0.3411, 0.2802, 0.3787],
        [0.2905, 0.2294, 0.4801]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1100: tensor([[0.3430, 0.2385, 0.4184],
        [0.3408, 0.3107, 0.3485],
        [0.3745, 0.3007, 0.3248],
        [0.3612, 0.3165, 0.3223],
        [0.3062, 0.2156, 0.4782],
        [0.3689, 0.3140, 0.3171],
        [0.3350, 0.2684, 0.3966],
        [0.3235, 0.3074, 0.3692],
        [0.3555, 0.2658, 0.3787],
        [0.3111, 0.2209, 0.4680],
        [0.3176, 0.2633, 0.4191],
        [0.3738, 0.3207, 0.3055]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1150: tensor([[0.2823, 0.2117, 0.5061],
        [0.4061, 0.3763, 0.2175],
        [0.3154, 0.2835, 0.4011],
        [0.3730, 0.2772, 0.3498],
        [0.3172, 0.2430, 0.4398],
        [0.2878, 0.1945, 0.5177],
        [0.2779, 0.1946, 0.5274],
        [0.3542, 0.2362, 0.4096],
        [0.3574, 0.2677, 0.3750],
        [0.2730, 0.1920, 0.5350],
        [0.3023, 0.2575, 0.4402],
        [0.3281, 0.3218, 0.3501]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1200: tensor([[0.3004, 0.2653, 0.4343],
        [0.2435, 0.1955, 0.5610],
        [0.3157, 0.2518, 0.4325],
        [0.3039, 0.2457, 0.4505],
        [0.3208, 0.2494, 0.4297],
        [0.4046, 0.2819, 0.3135],
        [0.3423, 0.2514, 0.4063],
        [0.3382, 0.3023, 0.3596],
        [0.2457, 0.1894, 0.5649],
        [0.2768, 0.2019, 0.5213],
        [0.3102, 0.2769, 0.4130],
        [0.3662, 0.3134, 0.3204]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1250: tensor([[0.3634, 0.2475, 0.3892],
        [0.3032, 0.2250, 0.4718],
        [0.3562, 0.2410, 0.4027],
        [0.4252, 0.3257, 0.2491],
        [0.3323, 0.2989, 0.3688],
        [0.2885, 0.2688, 0.4427],
        [0.3486, 0.2381, 0.4133],
        [0.3462, 0.2728, 0.3810],
        [0.2814, 0.2592, 0.4593],
        [0.3451, 0.2921, 0.3627],
        [0.2027, 0.1526, 0.6447],
        [0.2680, 0.2755, 0.4566]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1300: tensor([[0.3771, 0.3031, 0.3198],
        [0.3780, 0.3159, 0.3061],
        [0.3666, 0.3250, 0.3085],
        [0.3042, 0.3048, 0.3910],
        [0.4130, 0.3880, 0.1990],
        [0.3145, 0.2318, 0.4538],
        [0.3214, 0.2447, 0.4339],
        [0.2766, 0.2277, 0.4957],
        [0.3728, 0.2805, 0.3467],
        [0.3441, 0.2733, 0.3826],
        [0.3416, 0.2562, 0.4022],
        [0.2765, 0.1903, 0.5332]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1350: tensor([[0.2531, 0.2161, 0.5308],
        [0.2200, 0.1589, 0.6211],
        [0.3153, 0.3282, 0.3565],
        [0.3421, 0.3038, 0.3541],
        [0.4284, 0.3642, 0.2074],
        [0.3968, 0.2919, 0.3113],
        [0.3876, 0.3290, 0.2833],
        [0.3475, 0.2747, 0.3778],
        [0.3371, 0.3480, 0.3149],
        [0.3423, 0.3194, 0.3383],
        [0.3664, 0.3367, 0.2969],
        [0.2857, 0.3070, 0.4073]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1400: tensor([[0.3295, 0.2635, 0.4070],
        [0.3714, 0.2922, 0.3364],
        [0.2664, 0.2061, 0.5275],
        [0.2651, 0.1991, 0.5358],
        [0.2919, 0.2466, 0.4616],
        [0.2988, 0.2196, 0.4815],
        [0.2389, 0.2081, 0.5531],
        [0.3053, 0.2153, 0.4794],
        [0.2494, 0.2280, 0.5226],
        [0.2853, 0.2457, 0.4690],
        [0.3134, 0.2142, 0.4725],
        [0.2199, 0.2294, 0.5507]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1450: tensor([[0.3498, 0.2728, 0.3775],
        [0.3472, 0.2844, 0.3684],
        [0.2272, 0.2140, 0.5588],
        [0.3307, 0.2979, 0.3714],
        [0.3735, 0.3248, 0.3017],
        [0.3886, 0.3299, 0.2815],
        [0.3692, 0.3231, 0.3078],
        [0.4536, 0.3664, 0.1800],
        [0.3759, 0.3187, 0.3055],
        [0.4421, 0.3354, 0.2225],
        [0.3935, 0.4301, 0.1764],
        [0.3466, 0.2864, 0.3670]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1500: tensor([[0.3136, 0.3036, 0.3827],
        [0.3025, 0.2723, 0.4252],
        [0.2903, 0.3018, 0.4078],
        [0.3959, 0.3275, 0.2767],
        [0.3172, 0.3156, 0.3672],
        [0.3364, 0.3419, 0.3216],
        [0.3906, 0.3697, 0.2397],
        [0.3004, 0.2671, 0.4325],
        [0.3097, 0.3850, 0.3053],
        [0.3938, 0.3151, 0.2911],
        [0.3689, 0.3035, 0.3276],
        [0.3438, 0.3106, 0.3456]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1550: tensor([[0.3295, 0.2991, 0.3714],
        [0.3862, 0.3330, 0.2809],
        [0.2613, 0.2498, 0.4889],
        [0.3946, 0.3988, 0.2065],
        [0.3175, 0.2941, 0.3883],
        [0.2954, 0.3229, 0.3817],
        [0.3150, 0.2997, 0.3854],
        [0.2936, 0.2498, 0.4566],
        [0.3513, 0.2756, 0.3732],
        [0.3015, 0.2575, 0.4410],
        [0.4507, 0.3755, 0.1738],
        [0.2808, 0.3076, 0.4116]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1600: tensor([[0.3032, 0.3095, 0.3873],
        [0.3637, 0.3641, 0.2723],
        [0.1898, 0.1984, 0.6118],
        [0.2232, 0.2419, 0.5349],
        [0.2615, 0.2680, 0.4704],
        [0.3430, 0.3498, 0.3072],
        [0.4166, 0.4000, 0.1833],
        [0.4525, 0.4373, 0.1102],
        [0.1973, 0.1958, 0.6070],
        [0.3134, 0.2624, 0.4242],
        [0.1677, 0.1773, 0.6550],
        [0.1501, 0.1440, 0.7058]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1650: tensor([[0.4181, 0.3815, 0.2004],
        [0.4362, 0.4028, 0.1610],
        [0.3805, 0.3831, 0.2364],
        [0.3621, 0.3342, 0.3038],
        [0.3452, 0.2755, 0.3793],
        [0.3117, 0.3348, 0.3535],
        [0.4051, 0.4004, 0.1945],
        [0.3343, 0.3124, 0.3533],
        [0.4028, 0.4010, 0.1963],
        [0.3772, 0.3263, 0.2966],
        [0.4302, 0.3451, 0.2247],
        [0.3023, 0.2783, 0.4194]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1700: tensor([[0.2995, 0.3666, 0.3339],
        [0.3231, 0.3014, 0.3756],
        [0.3500, 0.3500, 0.3000],
        [0.3037, 0.2756, 0.4208],
        [0.3426, 0.3784, 0.2789],
        [0.2485, 0.2330, 0.5185],
        [0.2454, 0.2638, 0.4908],
        [0.2317, 0.2359, 0.5324],
        [0.2283, 0.2138, 0.5579],
        [0.3123, 0.3161, 0.3715],
        [0.3278, 0.3276, 0.3446],
        [0.3814, 0.3064, 0.3122]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1750: tensor([[0.3157, 0.3429, 0.3414],
        [0.3792, 0.3408, 0.2799],
        [0.2781, 0.2626, 0.4593],
        [0.3528, 0.3816, 0.2656],
        [0.3704, 0.3339, 0.2957],
        [0.3195, 0.3167, 0.3639],
        [0.2676, 0.2558, 0.4766],
        [0.3786, 0.3617, 0.2598],
        [0.4415, 0.3879, 0.1706],
        [0.3727, 0.3373, 0.2899],
        [0.2667, 0.2656, 0.4677],
        [0.3343, 0.3247, 0.3410]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1800: tensor([[0.3547, 0.3398, 0.3055],
        [0.2712, 0.2889, 0.4399],
        [0.3813, 0.3519, 0.2668],
        [0.3807, 0.3254, 0.2939],
        [0.3612, 0.3880, 0.2509],
        [0.3215, 0.3264, 0.3521],
        [0.3442, 0.3908, 0.2650],
        [0.3225, 0.3108, 0.3667],
        [0.3857, 0.3327, 0.2816],
        [0.3747, 0.3748, 0.2505],
        [0.4251, 0.3566, 0.2183],
        [0.3810, 0.3422, 0.2768]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1850: tensor([[0.4070, 0.3726, 0.2203],
        [0.3435, 0.3608, 0.2956],
        [0.3641, 0.3893, 0.2466],
        [0.2994, 0.2761, 0.4245],
        [0.3423, 0.3227, 0.3351],
        [0.3112, 0.3165, 0.3723],
        [0.3862, 0.3294, 0.2844],
        [0.3649, 0.3745, 0.2606],
        [0.2455, 0.2722, 0.4823],
        [0.3371, 0.3587, 0.3043],
        [0.2765, 0.3358, 0.3877],
        [0.3443, 0.3236, 0.3320]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1900: tensor([[0.2922, 0.3794, 0.3284],
        [0.2733, 0.2706, 0.4561],
        [0.3119, 0.3303, 0.3578],
        [0.3815, 0.3757, 0.2429],
        [0.3902, 0.3742, 0.2356],
        [0.3452, 0.3344, 0.3205],
        [0.3221, 0.3614, 0.3164],
        [0.2804, 0.3312, 0.3884],
        [0.2409, 0.2578, 0.5013],
        [0.3435, 0.3587, 0.2978],
        [0.3902, 0.3591, 0.2507],
        [0.3263, 0.3087, 0.3650]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1950: tensor([[0.3228, 0.3200, 0.3572],
        [0.3574, 0.3707, 0.2719],
        [0.3735, 0.3787, 0.2478],
        [0.3742, 0.3460, 0.2798],
        [0.3591, 0.3397, 0.3012],
        [0.2981, 0.3141, 0.3878],
        [0.3732, 0.3853, 0.2415],
        [0.3360, 0.2925, 0.3716],
        [0.3536, 0.4027, 0.2437],
        [0.3795, 0.4033, 0.2172],
        [0.3506, 0.3355, 0.3139],
        [0.3640, 0.3828, 0.2532]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2000: tensor([[0.3357, 0.3327, 0.3316],
        [0.2554, 0.2818, 0.4628],
        [0.2591, 0.2771, 0.4638],
        [0.2874, 0.3020, 0.4107],
        [0.3687, 0.3536, 0.2778],
        [0.3513, 0.3393, 0.3094],
        [0.4181, 0.4150, 0.1668],
        [0.3533, 0.3494, 0.2973],
        [0.3433, 0.3351, 0.3216],
        [0.3631, 0.3511, 0.2858],
        [0.3010, 0.3179, 0.3812],
        [0.4228, 0.3509, 0.2262]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2050: tensor([[0.3474, 0.3582, 0.2944],
        [0.3765, 0.3535, 0.2700],
        [0.2468, 0.2506, 0.5026],
        [0.2612, 0.2977, 0.4412],
        [0.2630, 0.3167, 0.4203],
        [0.2846, 0.2992, 0.4162],
        [0.3109, 0.3170, 0.3721],
        [0.2850, 0.2871, 0.4279],
        [0.3208, 0.2981, 0.3812],
        [0.3260, 0.3158, 0.3582],
        [0.3273, 0.3550, 0.3177],
        [0.3802, 0.3280, 0.2918]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2100: tensor([[0.3753, 0.3767, 0.2479],
        [0.3797, 0.3546, 0.2657],
        [0.3968, 0.4119, 0.1913],
        [0.3158, 0.3117, 0.3724],
        [0.3080, 0.2999, 0.3921],
        [0.3737, 0.3956, 0.2307],
        [0.2998, 0.2762, 0.4240],
        [0.2993, 0.3308, 0.3699],
        [0.2129, 0.2109, 0.5762],
        [0.3800, 0.3510, 0.2690],
        [0.3353, 0.3683, 0.2964],
        [0.3147, 0.3466, 0.3387]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2150: tensor([[0.2470, 0.2657, 0.4872],
        [0.2944, 0.2511, 0.4545],
        [0.2804, 0.2928, 0.4267],
        [0.3159, 0.3394, 0.3447],
        [0.3554, 0.3644, 0.2802],
        [0.3153, 0.3109, 0.3738],
        [0.3493, 0.3161, 0.3346],
        [0.3133, 0.3015, 0.3852],
        [0.3673, 0.3712, 0.2615],
        [0.3822, 0.3508, 0.2670],
        [0.2389, 0.2827, 0.4784],
        [0.3459, 0.3067, 0.3475]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2200: tensor([[0.3409, 0.3304, 0.3287],
        [0.3107, 0.3402, 0.3490],
        [0.3588, 0.3647, 0.2764],
        [0.4231, 0.3864, 0.1905],
        [0.3500, 0.2940, 0.3560],
        [0.3434, 0.3340, 0.3226],
        [0.3428, 0.3364, 0.3208],
        [0.2852, 0.2697, 0.4451],
        [0.3544, 0.3767, 0.2689],
        [0.2947, 0.3229, 0.3824],
        [0.3564, 0.4161, 0.2275],
        [0.3308, 0.3498, 0.3193]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2250: tensor([[0.2379, 0.2315, 0.5306],
        [0.2455, 0.2785, 0.4761],
        [0.2023, 0.2152, 0.5825],
        [0.2141, 0.2199, 0.5660],
        [0.2900, 0.3019, 0.4081],
        [0.3500, 0.3285, 0.3215],
        [0.2691, 0.3014, 0.4295],
        [0.2777, 0.2897, 0.4327],
        [0.2774, 0.2949, 0.4277],
        [0.2646, 0.2856, 0.4498],
        [0.4164, 0.4113, 0.1723],
        [0.4096, 0.3846, 0.2058]], device='cuda:0', grad_fn=<SoftmaxBackward>)
