Iter #50: tensor([[0.3486, 0.2132, 0.4382],
        [0.3690, 0.2035, 0.4275],
        [0.3780, 0.2050, 0.4170],
        [0.3718, 0.2256, 0.4026],
        [0.3543, 0.1949, 0.4508],
        [0.3870, 0.1812, 0.4319],
        [0.3084, 0.1553, 0.5363],
        [0.3608, 0.1950, 0.4442],
        [0.2783, 0.1436, 0.5781],
        [0.3621, 0.1822, 0.4557],
        [0.3323, 0.1398, 0.5280],
        [0.3634, 0.1759, 0.4607]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3516, 0.1729, 0.4755],
        [0.3833, 0.1738, 0.4429],
        [0.3741, 0.1666, 0.4594],
        [0.3198, 0.1779, 0.5023],
        [0.2989, 0.1344, 0.5667],
        [0.3128, 0.1372, 0.5500],
        [0.3054, 0.1422, 0.5524],
        [0.3225, 0.1728, 0.5048],
        [0.3239, 0.1590, 0.5171],
        [0.3126, 0.1971, 0.4903],
        [0.2441, 0.1915, 0.5644],
        [0.3249, 0.1282, 0.5469]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3114, 0.1347, 0.5539],
        [0.3406, 0.1402, 0.5192],
        [0.3198, 0.1336, 0.5467],
        [0.3274, 0.1517, 0.5208],
        [0.3490, 0.1451, 0.5059],
        [0.3823, 0.1559, 0.4618],
        [0.3943, 0.1439, 0.4618],
        [0.3773, 0.1564, 0.4663],
        [0.2854, 0.1209, 0.5937],
        [0.3799, 0.1603, 0.4598],
        [0.4097, 0.1500, 0.4403],
        [0.3959, 0.1392, 0.4649]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3828, 0.1605, 0.4566],
        [0.3798, 0.1459, 0.4743],
        [0.4259, 0.1335, 0.4406],
        [0.3411, 0.1296, 0.5292],
        [0.3939, 0.1339, 0.4722],
        [0.3349, 0.1085, 0.5565],
        [0.4256, 0.1402, 0.4341],
        [0.3464, 0.1693, 0.4843],
        [0.3879, 0.1350, 0.4771],
        [0.3508, 0.1256, 0.5236],
        [0.3364, 0.1030, 0.5606],
        [0.4364, 0.1311, 0.4325]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3405, 0.1507, 0.5089],
        [0.4278, 0.1785, 0.3938],
        [0.2903, 0.1194, 0.5903],
        [0.4672, 0.1864, 0.3464],
        [0.3991, 0.1440, 0.4570],
        [0.3677, 0.1785, 0.4538],
        [0.2918, 0.1376, 0.5706],
        [0.3471, 0.1343, 0.5185],
        [0.4447, 0.1614, 0.3939],
        [0.3765, 0.1744, 0.4490],
        [0.4333, 0.1500, 0.4167],
        [0.3793, 0.1522, 0.4685]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3453, 0.1476, 0.5072],
        [0.3037, 0.1458, 0.5505],
        [0.4257, 0.1837, 0.3906],
        [0.3879, 0.1561, 0.4560],
        [0.3535, 0.1602, 0.4862],
        [0.4683, 0.1790, 0.3527],
        [0.3759, 0.1842, 0.4400],
        [0.3385, 0.1588, 0.5027],
        [0.3307, 0.1607, 0.5086],
        [0.3863, 0.2078, 0.4059],
        [0.3784, 0.1690, 0.4525],
        [0.3468, 0.1620, 0.4912]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3974, 0.1908, 0.4119],
        [0.3973, 0.1604, 0.4423],
        [0.3665, 0.1709, 0.4626],
        [0.4213, 0.1862, 0.3926],
        [0.4700, 0.2176, 0.3124],
        [0.4854, 0.1707, 0.3439],
        [0.4261, 0.1982, 0.3757],
        [0.4643, 0.1756, 0.3601],
        [0.4862, 0.1759, 0.3379],
        [0.4356, 0.1831, 0.3813],
        [0.5230, 0.1725, 0.3046],
        [0.4183, 0.1639, 0.4178]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3941, 0.2019, 0.4040],
        [0.4475, 0.1711, 0.3815],
        [0.4317, 0.1740, 0.3942],
        [0.4268, 0.2127, 0.3605],
        [0.3885, 0.1912, 0.4203],
        [0.4181, 0.1967, 0.3852],
        [0.4575, 0.1904, 0.3521],
        [0.4186, 0.1860, 0.3954],
        [0.2088, 0.1367, 0.6545],
        [0.2393, 0.1709, 0.5898],
        [0.3704, 0.1876, 0.4419],
        [0.4897, 0.1944, 0.3159]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4636, 0.2421, 0.2944],
        [0.3849, 0.2305, 0.3846],
        [0.4229, 0.2023, 0.3748],
        [0.4604, 0.2176, 0.3220],
        [0.3642, 0.2033, 0.4326],
        [0.4072, 0.2215, 0.3714],
        [0.3763, 0.2093, 0.4144],
        [0.3752, 0.2309, 0.3939],
        [0.4400, 0.1909, 0.3691],
        [0.2688, 0.1987, 0.5325],
        [0.3635, 0.2248, 0.4117],
        [0.4871, 0.2390, 0.2739]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3337, 0.2695, 0.3968],
        [0.4199, 0.2207, 0.3593],
        [0.4240, 0.2620, 0.3139],
        [0.3981, 0.2204, 0.3815],
        [0.3644, 0.2206, 0.4151],
        [0.2849, 0.2315, 0.4836],
        [0.4231, 0.2649, 0.3120],
        [0.2778, 0.2112, 0.5110],
        [0.3510, 0.2118, 0.4371],
        [0.4053, 0.2071, 0.3876],
        [0.3716, 0.2927, 0.3357],
        [0.4877, 0.2328, 0.2796]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3159, 0.2176, 0.4665],
        [0.3921, 0.2486, 0.3593],
        [0.4088, 0.2276, 0.3636],
        [0.4301, 0.2479, 0.3220],
        [0.3879, 0.2434, 0.3687],
        [0.3982, 0.2317, 0.3701],
        [0.4134, 0.2565, 0.3301],
        [0.4519, 0.2109, 0.3372],
        [0.3627, 0.2172, 0.4201],
        [0.4275, 0.2074, 0.3651],
        [0.3478, 0.2402, 0.4120],
        [0.3370, 0.2360, 0.4270]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #600: tensor([[0.3746, 0.2593, 0.3661],
        [0.3358, 0.1998, 0.4644],
        [0.3266, 0.2204, 0.4531],
        [0.3970, 0.2340, 0.3690],
        [0.3999, 0.2431, 0.3570],
        [0.4351, 0.2764, 0.2885],
        [0.4263, 0.2260, 0.3477],
        [0.3002, 0.2644, 0.4354],
        [0.3685, 0.2482, 0.3833],
        [0.3595, 0.2908, 0.3497],
        [0.3462, 0.2200, 0.4338],
        [0.4590, 0.2223, 0.3187]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #650: tensor([[0.2695, 0.2322, 0.4983],
        [0.3527, 0.2486, 0.3987],
        [0.2043, 0.2495, 0.5463],
        [0.4196, 0.2520, 0.3284],
        [0.3888, 0.2806, 0.3307],
        [0.4442, 0.2717, 0.2841],
        [0.4825, 0.2557, 0.2618],
        [0.4378, 0.2625, 0.2996],
        [0.3082, 0.2297, 0.4621],
        [0.4287, 0.2814, 0.2898],
        [0.3309, 0.2544, 0.4146],
        [0.3638, 0.2829, 0.3533]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #700: tensor([[0.4538, 0.2756, 0.2707],
        [0.3256, 0.2640, 0.4104],
        [0.3955, 0.2459, 0.3587],
        [0.3895, 0.2849, 0.3256],
        [0.3926, 0.2442, 0.3633],
        [0.3428, 0.2882, 0.3689],
        [0.3603, 0.2343, 0.4053],
        [0.1452, 0.1915, 0.6633],
        [0.3195, 0.2768, 0.4038],
        [0.3853, 0.3181, 0.2966],
        [0.4842, 0.2713, 0.2445],
        [0.3679, 0.2867, 0.3454]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #750: tensor([[0.3714, 0.2729, 0.3556],
        [0.3570, 0.2721, 0.3709],
        [0.2460, 0.2164, 0.5375],
        [0.4714, 0.2565, 0.2721],
        [0.2769, 0.2640, 0.4592],
        [0.3181, 0.2814, 0.4005],
        [0.4412, 0.2672, 0.2917],
        [0.3949, 0.2843, 0.3208],
        [0.3140, 0.2485, 0.4375],
        [0.3651, 0.2869, 0.3479],
        [0.3710, 0.3289, 0.3000],
        [0.4755, 0.2763, 0.2481]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #800: tensor([[0.1688, 0.2062, 0.6250],
        [0.3630, 0.2833, 0.3537],
        [0.2608, 0.3058, 0.4334],
        [0.2219, 0.2453, 0.5328],
        [0.3329, 0.2860, 0.3811],
        [0.2930, 0.2747, 0.4324],
        [0.2418, 0.2991, 0.4591],
        [0.3178, 0.3093, 0.3729],
        [0.3978, 0.2803, 0.3219],
        [0.3808, 0.2850, 0.3342],
        [0.4082, 0.2273, 0.3645],
        [0.2711, 0.2696, 0.4594]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #850: tensor([[0.4196, 0.2941, 0.2863],
        [0.4082, 0.3272, 0.2645],
        [0.3069, 0.3058, 0.3873],
        [0.3855, 0.3381, 0.2763],
        [0.3506, 0.2764, 0.3731],
        [0.4733, 0.3117, 0.2150],
        [0.1584, 0.2016, 0.6400],
        [0.4282, 0.3220, 0.2498],
        [0.1475, 0.1940, 0.6585],
        [0.1631, 0.2063, 0.6306],
        [0.1257, 0.1924, 0.6818],
        [0.4360, 0.3313, 0.2327]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #900: tensor([[0.2650, 0.2967, 0.4383],
        [0.2096, 0.2722, 0.5183],
        [0.2471, 0.2912, 0.4616],
        [0.2518, 0.2795, 0.4688],
        [0.2067, 0.2194, 0.5739],
        [0.3392, 0.3459, 0.3149],
        [0.2410, 0.2794, 0.4796],
        [0.2878, 0.2936, 0.4186],
        [0.2478, 0.2716, 0.4806],
        [0.3644, 0.3185, 0.3171],
        [0.3441, 0.2892, 0.3667],
        [0.2527, 0.3341, 0.4132]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #950: tensor([[0.3281, 0.3116, 0.3603],
        [0.2344, 0.2658, 0.4998],
        [0.4105, 0.2778, 0.3117],
        [0.3435, 0.3219, 0.3346],
        [0.1558, 0.1819, 0.6623],
        [0.3037, 0.3016, 0.3947],
        [0.2807, 0.3655, 0.3538],
        [0.2340, 0.2737, 0.4923],
        [0.3133, 0.2924, 0.3943],
        [0.3087, 0.3363, 0.3549],
        [0.3083, 0.3222, 0.3694],
        [0.1093, 0.1794, 0.7112]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1000: tensor([[0.2775, 0.2902, 0.4323],
        [0.3020, 0.3285, 0.3695],
        [0.3656, 0.4192, 0.2152],
        [0.3147, 0.3628, 0.3225],
        [0.2529, 0.2761, 0.4710],
        [0.3959, 0.3594, 0.2447],
        [0.2635, 0.2850, 0.4516],
        [0.3250, 0.3903, 0.2846],
        [0.3166, 0.3264, 0.3570],
        [0.3024, 0.3356, 0.3621],
        [0.2662, 0.3045, 0.4293],
        [0.3083, 0.2963, 0.3954]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1050: tensor([[0.2757, 0.2955, 0.4288],
        [0.2411, 0.2457, 0.5133],
        [0.3023, 0.3192, 0.3785],
        [0.2301, 0.2315, 0.5384],
        [0.3463, 0.3431, 0.3107],
        [0.1649, 0.1969, 0.6382],
        [0.3275, 0.3310, 0.3414],
        [0.2807, 0.3372, 0.3821],
        [0.2816, 0.3213, 0.3971],
        [0.2762, 0.3241, 0.3997],
        [0.3046, 0.3336, 0.3618],
        [0.2711, 0.2968, 0.4320]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1100: tensor([[0.1915, 0.2465, 0.5620],
        [0.3250, 0.3490, 0.3261],
        [0.3301, 0.3060, 0.3639],
        [0.2179, 0.2591, 0.5230],
        [0.1899, 0.2857, 0.5244],
        [0.3180, 0.3058, 0.3762],
        [0.2354, 0.2728, 0.4918],
        [0.2463, 0.3260, 0.4277],
        [0.1785, 0.2619, 0.5596],
        [0.2067, 0.2347, 0.5586],
        [0.2683, 0.3047, 0.4270],
        [0.2606, 0.3763, 0.3630]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1150: tensor([[0.1334, 0.1605, 0.7061],
        [0.2293, 0.3389, 0.4318],
        [0.2705, 0.3222, 0.4073],
        [0.3210, 0.3133, 0.3657],
        [0.2848, 0.3331, 0.3821],
        [0.3740, 0.3495, 0.2765],
        [0.2866, 0.3661, 0.3472],
        [0.3002, 0.3835, 0.3163],
        [0.2449, 0.2619, 0.4932],
        [0.3500, 0.3139, 0.3361],
        [0.3349, 0.3403, 0.3248],
        [0.3039, 0.3102, 0.3859]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1200: tensor([[0.3991, 0.3945, 0.2063],
        [0.3024, 0.3448, 0.3527],
        [0.2789, 0.3348, 0.3863],
        [0.2790, 0.3496, 0.3714],
        [0.2735, 0.3333, 0.3932],
        [0.2723, 0.3237, 0.4040],
        [0.3296, 0.3865, 0.2840],
        [0.2236, 0.2735, 0.5029],
        [0.2812, 0.3350, 0.3838],
        [0.3741, 0.3899, 0.2360],
        [0.2758, 0.3839, 0.3403],
        [0.1393, 0.1695, 0.6912]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1250: tensor([[0.3853, 0.4273, 0.1874],
        [0.1123, 0.1146, 0.7731],
        [0.2909, 0.3316, 0.3775],
        [0.2295, 0.3436, 0.4268],
        [0.3283, 0.3826, 0.2891],
        [0.2485, 0.3027, 0.4488],
        [0.2894, 0.3461, 0.3645],
        [0.2272, 0.2638, 0.5090],
        [0.2559, 0.3229, 0.4212],
        [0.2835, 0.3185, 0.3980],
        [0.2811, 0.3346, 0.3843],
        [0.3316, 0.3597, 0.3087]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1300: tensor([[0.3742, 0.4012, 0.2246],
        [0.3036, 0.3944, 0.3020],
        [0.3720, 0.4180, 0.2100],
        [0.2730, 0.3886, 0.3384],
        [0.2494, 0.3059, 0.4448],
        [0.2816, 0.3112, 0.4072],
        [0.2106, 0.2552, 0.5341],
        [0.3629, 0.4012, 0.2359],
        [0.2790, 0.3369, 0.3841],
        [0.3456, 0.3942, 0.2602],
        [0.2425, 0.3404, 0.4171],
        [0.2441, 0.2948, 0.4611]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1350: tensor([[0.3176, 0.3868, 0.2956],
        [0.2461, 0.2594, 0.4945],
        [0.2671, 0.3230, 0.4099],
        [0.3526, 0.4318, 0.2156],
        [0.2596, 0.2469, 0.4935],
        [0.2149, 0.2183, 0.5669],
        [0.2964, 0.3331, 0.3705],
        [0.2855, 0.3268, 0.3877],
        [0.3463, 0.3971, 0.2566],
        [0.3184, 0.4342, 0.2474],
        [0.2997, 0.3197, 0.3806],
        [0.2607, 0.2950, 0.4443]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1400: tensor([[0.2502, 0.2331, 0.5167],
        [0.2799, 0.3078, 0.4123],
        [0.2958, 0.3804, 0.3238],
        [0.2911, 0.3331, 0.3758],
        [0.2629, 0.2889, 0.4483],
        [0.2846, 0.3673, 0.3481],
        [0.3622, 0.3860, 0.2518],
        [0.3266, 0.4391, 0.2343],
        [0.3264, 0.4450, 0.2287],
        [0.3161, 0.3682, 0.3157],
        [0.3144, 0.3796, 0.3059],
        [0.2316, 0.2348, 0.5335]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1450: tensor([[0.3272, 0.4161, 0.2567],
        [0.3813, 0.3851, 0.2336],
        [0.3482, 0.4534, 0.1983],
        [0.3424, 0.4047, 0.2529],
        [0.3235, 0.3916, 0.2849],
        [0.3348, 0.3646, 0.3006],
        [0.4034, 0.3714, 0.2253],
        [0.3492, 0.4197, 0.2310],
        [0.2980, 0.3700, 0.3320],
        [0.2912, 0.3960, 0.3129],
        [0.2855, 0.3392, 0.3753],
        [0.3613, 0.3534, 0.2853]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1500: tensor([[0.2588, 0.3409, 0.4002],
        [0.1236, 0.1396, 0.7368],
        [0.3302, 0.3253, 0.3444],
        [0.3447, 0.3931, 0.2621],
        [0.3818, 0.3507, 0.2675],
        [0.3172, 0.4382, 0.2446],
        [0.3590, 0.4089, 0.2322],
        [0.3524, 0.3610, 0.2866],
        [0.2824, 0.3416, 0.3760],
        [0.3206, 0.3107, 0.3687],
        [0.3131, 0.4096, 0.2772],
        [0.2908, 0.2693, 0.4400]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1550: tensor([[0.3076, 0.2986, 0.3939],
        [0.3157, 0.3983, 0.2860],
        [0.2818, 0.3101, 0.4081],
        [0.3718, 0.3889, 0.2393],
        [0.3240, 0.3998, 0.2761],
        [0.2731, 0.3176, 0.4093],
        [0.3124, 0.3371, 0.3505],
        [0.1792, 0.1828, 0.6379],
        [0.2417, 0.2299, 0.5284],
        [0.2676, 0.2454, 0.4870],
        [0.3705, 0.4361, 0.1934],
        [0.3090, 0.3056, 0.3854]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1600: tensor([[0.3112, 0.3687, 0.3201],
        [0.3369, 0.4038, 0.2592],
        [0.3571, 0.3755, 0.2674],
        [0.3639, 0.3587, 0.2775],
        [0.3336, 0.3656, 0.3008],
        [0.3015, 0.3403, 0.3582],
        [0.3148, 0.3693, 0.3158],
        [0.3516, 0.3802, 0.2682],
        [0.3687, 0.3621, 0.2692],
        [0.3637, 0.3986, 0.2377],
        [0.3098, 0.4229, 0.2673],
        [0.3420, 0.3933, 0.2647]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1650: tensor([[0.3126, 0.3859, 0.3015],
        [0.2865, 0.2535, 0.4601],
        [0.2688, 0.2523, 0.4788],
        [0.2805, 0.2877, 0.4318],
        [0.2943, 0.2765, 0.4292],
        [0.2993, 0.3365, 0.3641],
        [0.2129, 0.2254, 0.5617],
        [0.3069, 0.3016, 0.3914],
        [0.3367, 0.3060, 0.3573],
        [0.3376, 0.3424, 0.3200],
        [0.2937, 0.2800, 0.4263],
        [0.3171, 0.3910, 0.2919]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1700: tensor([[0.3856, 0.3461, 0.2683],
        [0.2961, 0.3473, 0.3566],
        [0.2510, 0.2866, 0.4625],
        [0.3368, 0.3402, 0.3230],
        [0.2190, 0.2168, 0.5642],
        [0.3065, 0.2947, 0.3987],
        [0.3257, 0.3566, 0.3177],
        [0.3507, 0.3841, 0.2652],
        [0.2572, 0.2472, 0.4956],
        [0.2934, 0.2785, 0.4281],
        [0.3661, 0.3803, 0.2536],
        [0.2502, 0.2405, 0.5093]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1750: tensor([[0.2683, 0.2882, 0.4435],
        [0.2901, 0.2762, 0.4337],
        [0.3273, 0.3119, 0.3608],
        [0.2950, 0.2798, 0.4252],
        [0.3018, 0.3605, 0.3377],
        [0.3037, 0.3415, 0.3548],
        [0.2869, 0.3495, 0.3635],
        [0.3405, 0.3704, 0.2892],
        [0.2908, 0.2861, 0.4231],
        [0.3138, 0.3279, 0.3584],
        [0.3072, 0.2461, 0.4467],
        [0.3403, 0.4096, 0.2501]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1800: tensor([[0.3211, 0.3144, 0.3645],
        [0.3348, 0.3535, 0.3117],
        [0.3607, 0.3212, 0.3181],
        [0.3310, 0.3982, 0.2708],
        [0.2298, 0.2198, 0.5504],
        [0.3068, 0.3661, 0.3271],
        [0.3493, 0.3533, 0.2975],
        [0.3724, 0.4026, 0.2250],
        [0.1425, 0.1418, 0.7157],
        [0.3674, 0.3817, 0.2509],
        [0.3273, 0.3321, 0.3406],
        [0.2856, 0.3319, 0.3825]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1850: tensor([[0.3453, 0.3104, 0.3444],
        [0.3858, 0.4150, 0.1993],
        [0.2352, 0.1942, 0.5705],
        [0.2458, 0.3197, 0.4344],
        [0.2235, 0.2078, 0.5687],
        [0.3430, 0.3524, 0.3046],
        [0.3210, 0.3025, 0.3765],
        [0.2611, 0.2647, 0.4742],
        [0.3216, 0.3067, 0.3717],
        [0.3484, 0.3364, 0.3152],
        [0.3406, 0.4688, 0.1906],
        [0.2511, 0.2272, 0.5217]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1900: tensor([[0.2738, 0.2973, 0.4289],
        [0.1353, 0.0985, 0.7662],
        [0.3628, 0.3449, 0.2923],
        [0.4063, 0.3835, 0.2102],
        [0.3446, 0.3431, 0.3123],
        [0.2401, 0.2210, 0.5389],
        [0.3998, 0.3822, 0.2180],
        [0.3577, 0.4034, 0.2390],
        [0.3606, 0.4083, 0.2310],
        [0.3371, 0.4202, 0.2428],
        [0.3076, 0.2849, 0.4075],
        [0.3869, 0.3631, 0.2500]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #1950: tensor([[0.3385, 0.3434, 0.3181],
        [0.2507, 0.2245, 0.5248],
        [0.3389, 0.3800, 0.2811],
        [0.3434, 0.3662, 0.2903],
        [0.3313, 0.3205, 0.3482],
        [0.3549, 0.3650, 0.2801],
        [0.3374, 0.2798, 0.3828],
        [0.3299, 0.3716, 0.2985],
        [0.3441, 0.3482, 0.3077],
        [0.2652, 0.3125, 0.4223],
        [0.3950, 0.3523, 0.2527],
        [0.3542, 0.3805, 0.2654]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2000: tensor([[0.3204, 0.2795, 0.4001],
        [0.3855, 0.3170, 0.2975],
        [0.3636, 0.3503, 0.2861],
        [0.3058, 0.3313, 0.3629],
        [0.3108, 0.2587, 0.4305],
        [0.3294, 0.3248, 0.3458],
        [0.3499, 0.3611, 0.2890],
        [0.3582, 0.3402, 0.3015],
        [0.3196, 0.2888, 0.3917],
        [0.3838, 0.3411, 0.2751],
        [0.3590, 0.3917, 0.2493],
        [0.3823, 0.3359, 0.2818]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2050: tensor([[0.3659, 0.3326, 0.3015],
        [0.3576, 0.3717, 0.2707],
        [0.2934, 0.2739, 0.4327],
        [0.2989, 0.3237, 0.3774],
        [0.3372, 0.2862, 0.3766],
        [0.2591, 0.2569, 0.4841],
        [0.3231, 0.3644, 0.3125],
        [0.1433, 0.1177, 0.7390],
        [0.3080, 0.2864, 0.4055],
        [0.2998, 0.3061, 0.3941],
        [0.3285, 0.3360, 0.3355],
        [0.3491, 0.3116, 0.3392]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2100: tensor([[0.4130, 0.3976, 0.1894],
        [0.3146, 0.3071, 0.3783],
        [0.3481, 0.3101, 0.3418],
        [0.2419, 0.1850, 0.5730],
        [0.3516, 0.3447, 0.3037],
        [0.2221, 0.2224, 0.5556],
        [0.2894, 0.2751, 0.4355],
        [0.3665, 0.3697, 0.2638],
        [0.3287, 0.2914, 0.3799],
        [0.2882, 0.3012, 0.4106],
        [0.3601, 0.3820, 0.2580],
        [0.3654, 0.3029, 0.3317]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2150: tensor([[0.2769, 0.2568, 0.4664],
        [0.3578, 0.3818, 0.2603],
        [0.3396, 0.2739, 0.3865],
        [0.2192, 0.2320, 0.5488],
        [0.3784, 0.3040, 0.3176],
        [0.3477, 0.3415, 0.3108],
        [0.3734, 0.3412, 0.2854],
        [0.3436, 0.3443, 0.3122],
        [0.3616, 0.3276, 0.3107],
        [0.3891, 0.3576, 0.2533],
        [0.2906, 0.2765, 0.4329],
        [0.1369, 0.1251, 0.7380]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2200: tensor([[0.3608, 0.3795, 0.2597],
        [0.1774, 0.1828, 0.6398],
        [0.2750, 0.3204, 0.4046],
        [0.3485, 0.3342, 0.3173],
        [0.3615, 0.3795, 0.2590],
        [0.3606, 0.3520, 0.2874],
        [0.3467, 0.3086, 0.3447],
        [0.3180, 0.2936, 0.3883],
        [0.3701, 0.3794, 0.2505],
        [0.3474, 0.3079, 0.3447],
        [0.3563, 0.3040, 0.3398],
        [0.3232, 0.3114, 0.3654]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2250: tensor([[0.3542, 0.3575, 0.2884],
        [0.4198, 0.3754, 0.2048],
        [0.3643, 0.3198, 0.3159],
        [0.3469, 0.3797, 0.2733],
        [0.4007, 0.3903, 0.2090],
        [0.3258, 0.3562, 0.3180],
        [0.3911, 0.3923, 0.2166],
        [0.3598, 0.3427, 0.2975],
        [0.4059, 0.4121, 0.1820],
        [0.3451, 0.3115, 0.3434],
        [0.3761, 0.3864, 0.2375],
        [0.3670, 0.4347, 0.1983]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #2300: tensor([[0.3300, 0.3444, 0.3255],
        [0.3920, 0.3893, 0.2187],
        [0.2847, 0.2541, 0.4612],
        [0.3201, 0.3432, 0.3368],
        [0.3013, 0.2738, 0.4249],
        [0.3251, 0.3463, 0.3286],
        [0.2705, 0.2419, 0.4877],
        [0.3510, 0.2742, 0.3748],
        [0.3512, 0.2917, 0.3571],
        [0.2646, 0.2913, 0.4441],
        [0.2154, 0.2113, 0.5732],
        [0.3298, 0.3515, 0.3187]], device='cuda:0', grad_fn=<SoftmaxBackward>)
