Iter #50: tensor([[0.4786, 0.5214],
        [0.4360, 0.5640],
        [0.4395, 0.5605],
        [0.5233, 0.4767],
        [0.5096, 0.4904],
        [0.5229, 0.4771],
        [0.4987, 0.5013],
        [0.5144, 0.4856],
        [0.4960, 0.5040],
        [0.5028, 0.4972],
        [0.4524, 0.5476],
        [0.4968, 0.5032]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4996, 0.5004],
        [0.5218, 0.4782],
        [0.5184, 0.4816],
        [0.4611, 0.5389],
        [0.5057, 0.4943],
        [0.5344, 0.4656],
        [0.5024, 0.4976],
        [0.5026, 0.4974],
        [0.4754, 0.5246],
        [0.4946, 0.5054],
        [0.5611, 0.4389],
        [0.5514, 0.4486]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.5138, 0.4862],
        [0.5142, 0.4858],
        [0.4685, 0.5315],
        [0.5218, 0.4782],
        [0.4729, 0.5271],
        [0.4428, 0.5572],
        [0.5206, 0.4794],
        [0.5348, 0.4652],
        [0.4906, 0.5094],
        [0.4820, 0.5180],
        [0.5118, 0.4882],
        [0.4599, 0.5401]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4956, 0.5044],
        [0.5097, 0.4903],
        [0.5640, 0.4360],
        [0.5210, 0.4790],
        [0.4748, 0.5252],
        [0.4986, 0.5014],
        [0.5334, 0.4666],
        [0.4633, 0.5367],
        [0.4779, 0.5221],
        [0.4846, 0.5154],
        [0.4965, 0.5035],
        [0.5193, 0.4807]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4992, 0.5008],
        [0.5099, 0.4901],
        [0.5698, 0.4302],
        [0.5265, 0.4735],
        [0.5159, 0.4841],
        [0.4998, 0.5002],
        [0.5794, 0.4206],
        [0.4746, 0.5254],
        [0.5449, 0.4551],
        [0.5178, 0.4822],
        [0.5002, 0.4998],
        [0.5023, 0.4977]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5236, 0.4764],
        [0.5329, 0.4671],
        [0.5940, 0.4060],
        [0.5117, 0.4883],
        [0.6004, 0.3996],
        [0.4477, 0.5523],
        [0.5609, 0.4391],
        [0.4906, 0.5094],
        [0.5033, 0.4967],
        [0.4942, 0.5058],
        [0.4954, 0.5046],
        [0.4575, 0.5425]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5111, 0.4889],
        [0.5707, 0.4293],
        [0.4601, 0.5399],
        [0.4949, 0.5051],
        [0.5224, 0.4776],
        [0.5436, 0.4564],
        [0.4750, 0.5250],
        [0.5434, 0.4566],
        [0.5367, 0.4633],
        [0.5094, 0.4906],
        [0.5325, 0.4675],
        [0.6222, 0.3778]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5326, 0.4674],
        [0.4151, 0.5849],
        [0.4548, 0.5452],
        [0.5412, 0.4588],
        [0.5297, 0.4703],
        [0.5397, 0.4603],
        [0.5756, 0.4244],
        [0.5415, 0.4585],
        [0.4919, 0.5081],
        [0.4886, 0.5114],
        [0.5849, 0.4151],
        [0.5211, 0.4789]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4688, 0.5312],
        [0.4068, 0.5932],
        [0.3832, 0.6168],
        [0.4963, 0.5037],
        [0.4576, 0.5424],
        [0.4895, 0.5105],
        [0.3989, 0.6011],
        [0.4769, 0.5231],
        [0.4973, 0.5027],
        [0.4924, 0.5076],
        [0.5100, 0.4900],
        [0.4869, 0.5131]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.5384, 0.4616],
        [0.4704, 0.5296],
        [0.4853, 0.5147],
        [0.4830, 0.5170],
        [0.4953, 0.5047],
        [0.5176, 0.4824],
        [0.4965, 0.5035],
        [0.4991, 0.5009],
        [0.4770, 0.5230],
        [0.4906, 0.5094],
        [0.4570, 0.5430],
        [0.5083, 0.4917]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4796, 0.5204],
        [0.5077, 0.4923],
        [0.4718, 0.5282],
        [0.5179, 0.4821],
        [0.5085, 0.4915],
        [0.4296, 0.5704],
        [0.4602, 0.5398],
        [0.4553, 0.5447],
        [0.5559, 0.4441],
        [0.5570, 0.4430],
        [0.4041, 0.5959],
        [0.5721, 0.4279]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4907, 0.5093],
        [0.5426, 0.4574],
        [0.5279, 0.4721],
        [0.4811, 0.5189],
        [0.5042, 0.4958],
        [0.5112, 0.4888],
        [0.5432, 0.4568],
        [0.5034, 0.4966],
        [0.4700, 0.5300],
        [0.4661, 0.5339],
        [0.4914, 0.5086],
        [0.4741, 0.5259]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3823, 0.6177],
        [0.4758, 0.5242],
        [0.4936, 0.5064],
        [0.5482, 0.4518],
        [0.5486, 0.4514],
        [0.5031, 0.4969],
        [0.5438, 0.4562],
        [0.5538, 0.4462],
        [0.5982, 0.4018],
        [0.5205, 0.4795],
        [0.5784, 0.4216],
        [0.6036, 0.3964]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4498, 0.5502],
        [0.4121, 0.5879],
        [0.4831, 0.5169],
        [0.5482, 0.4518],
        [0.5880, 0.4120],
        [0.5410, 0.4590],
        [0.5424, 0.4576],
        [0.6705, 0.3295],
        [0.4921, 0.5079],
        [0.5831, 0.4169],
        [0.4837, 0.5163],
        [0.3927, 0.6073]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4361, 0.5639],
        [0.5223, 0.4777],
        [0.5517, 0.4483],
        [0.5089, 0.4911],
        [0.5335, 0.4665],
        [0.3645, 0.6355],
        [0.5241, 0.4759],
        [0.5084, 0.4916],
        [0.5717, 0.4283],
        [0.4494, 0.5506],
        [0.4320, 0.5680],
        [0.4523, 0.5477]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.6349, 0.3651],
        [0.6014, 0.3986],
        [0.6138, 0.3862],
        [0.5280, 0.4720],
        [0.5010, 0.4990],
        [0.4107, 0.5893],
        [0.5120, 0.4880],
        [0.5835, 0.4165],
        [0.5989, 0.4011],
        [0.5205, 0.4795],
        [0.4012, 0.5988],
        [0.3927, 0.6073]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.6353, 0.3647],
        [0.4145, 0.5855],
        [0.5195, 0.4805],
        [0.4890, 0.5110],
        [0.6600, 0.3400],
        [0.5797, 0.4203],
        [0.6237, 0.3763],
        [0.5984, 0.4016],
        [0.6223, 0.3777],
        [0.4535, 0.5465],
        [0.4988, 0.5012],
        [0.5228, 0.4772]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.6203, 0.3797],
        [0.5423, 0.4577],
        [0.5197, 0.4803],
        [0.4649, 0.5351],
        [0.4917, 0.5083],
        [0.5830, 0.4170],
        [0.4307, 0.5693],
        [0.5523, 0.4477],
        [0.4206, 0.5794],
        [0.5684, 0.4316],
        [0.4160, 0.5840],
        [0.6800, 0.3200]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4149, 0.5851],
        [0.3883, 0.6117],
        [0.5929, 0.4071],
        [0.5484, 0.4516],
        [0.6153, 0.3847],
        [0.5340, 0.4660],
        [0.4459, 0.5541],
        [0.5069, 0.4931],
        [0.5765, 0.4235],
        [0.3338, 0.6662],
        [0.3811, 0.6189],
        [0.5318, 0.4682]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3937, 0.6063],
        [0.4449, 0.5551],
        [0.4380, 0.5620],
        [0.4605, 0.5395],
        [0.5519, 0.4481],
        [0.3449, 0.6551],
        [0.4862, 0.5138],
        [0.3946, 0.6054],
        [0.7167, 0.2833],
        [0.7253, 0.2747],
        [0.5975, 0.4025],
        [0.5092, 0.4908]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4138, 0.5862],
        [0.5166, 0.4834],
        [0.4924, 0.5076],
        [0.3948, 0.6052],
        [0.4950, 0.5050],
        [0.6951, 0.3049],
        [0.5686, 0.4314],
        [0.5423, 0.4577],
        [0.5180, 0.4820],
        [0.5697, 0.4303],
        [0.6926, 0.3074],
        [0.5592, 0.4408]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5375, 0.4625],
        [0.4791, 0.5209],
        [0.4972, 0.5028],
        [0.4019, 0.5981],
        [0.4419, 0.5581],
        [0.4244, 0.5756],
        [0.5580, 0.4420],
        [0.5518, 0.4482],
        [0.4368, 0.5632],
        [0.3970, 0.6030],
        [0.3804, 0.6196],
        [0.5199, 0.4801]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4595, 0.5405],
        [0.3835, 0.6165],
        [0.4409, 0.5591],
        [0.4251, 0.5749],
        [0.3510, 0.6490],
        [0.3357, 0.6643],
        [0.4650, 0.5350],
        [0.5081, 0.4919],
        [0.4953, 0.5047],
        [0.3689, 0.6311],
        [0.4687, 0.5313],
        [0.4910, 0.5090]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4511, 0.5489],
        [0.5383, 0.4617],
        [0.4924, 0.5076],
        [0.5693, 0.4307],
        [0.5180, 0.4820],
        [0.4882, 0.5118],
        [0.5004, 0.4996],
        [0.4739, 0.5261],
        [0.5208, 0.4792],
        [0.4924, 0.5076],
        [0.3490, 0.6510],
        [0.4869, 0.5131]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4248, 0.5752],
        [0.3555, 0.6445],
        [0.4821, 0.5179],
        [0.4804, 0.5196],
        [0.3921, 0.6079],
        [0.4231, 0.5769],
        [0.7015, 0.2985],
        [0.6515, 0.3485],
        [0.5759, 0.4241],
        [0.5850, 0.4150],
        [0.5871, 0.4129],
        [0.5235, 0.4765]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4482, 0.5518],
        [0.4660, 0.5340],
        [0.3741, 0.6259],
        [0.3471, 0.6529],
        [0.5596, 0.4404],
        [0.3623, 0.6377],
        [0.5905, 0.4095],
        [0.5034, 0.4966],
        [0.4098, 0.5902],
        [0.5246, 0.4754],
        [0.5825, 0.4175],
        [0.5448, 0.4552]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5740, 0.4260],
        [0.5491, 0.4509],
        [0.7467, 0.2533],
        [0.7074, 0.2926],
        [0.4945, 0.5055],
        [0.4204, 0.5796],
        [0.4108, 0.5892],
        [0.5506, 0.4494],
        [0.5246, 0.4754],
        [0.6131, 0.3869],
        [0.4191, 0.5809],
        [0.6775, 0.3225]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3964, 0.6036],
        [0.5575, 0.4425],
        [0.4529, 0.5471],
        [0.6168, 0.3832],
        [0.6926, 0.3074],
        [0.4123, 0.5877],
        [0.5242, 0.4758],
        [0.5510, 0.4490],
        [0.6053, 0.3947],
        [0.4444, 0.5556],
        [0.3300, 0.6700],
        [0.5657, 0.4343]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4984, 0.5016],
        [0.3891, 0.6109],
        [0.4070, 0.5930],
        [0.4929, 0.5071],
        [0.4773, 0.5227],
        [0.6126, 0.3874],
        [0.4021, 0.5979],
        [0.5373, 0.4627],
        [0.6085, 0.3915],
        [0.5922, 0.4078],
        [0.5347, 0.4653],
        [0.4518, 0.5482]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.6775, 0.3225],
        [0.6111, 0.3889],
        [0.4735, 0.5265],
        [0.7263, 0.2737],
        [0.4271, 0.5729],
        [0.4514, 0.5486],
        [0.5298, 0.4702],
        [0.4785, 0.5215],
        [0.4990, 0.5010],
        [0.3666, 0.6334],
        [0.4207, 0.5793],
        [0.5292, 0.4708]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4457, 0.5543],
        [0.5288, 0.4712],
        [0.2873, 0.7127],
        [0.5253, 0.4747],
        [0.3956, 0.6044],
        [0.4197, 0.5803],
        [0.5877, 0.4123],
        [0.3678, 0.6322],
        [0.5849, 0.4151],
        [0.6231, 0.3769],
        [0.5657, 0.4343],
        [0.4626, 0.5374]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.5751, 0.4249],
        [0.4192, 0.5808],
        [0.6655, 0.3345],
        [0.5050, 0.4950],
        [0.3603, 0.6397],
        [0.4343, 0.5657],
        [0.3501, 0.6499],
        [0.3538, 0.6462],
        [0.3009, 0.6991],
        [0.5643, 0.4357],
        [0.5511, 0.4489],
        [0.4428, 0.5572]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5923, 0.4077],
        [0.4635, 0.5365],
        [0.3925, 0.6075],
        [0.4627, 0.5373],
        [0.5646, 0.4354],
        [0.3929, 0.6071],
        [0.6037, 0.3963],
        [0.3316, 0.6684],
        [0.5533, 0.4467],
        [0.5288, 0.4712],
        [0.4731, 0.5269],
        [0.4207, 0.5793]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4890, 0.5110],
        [0.4361, 0.5639],
        [0.3276, 0.6724],
        [0.3891, 0.6109],
        [0.5837, 0.4163],
        [0.4754, 0.5246],
        [0.4915, 0.5085],
        [0.3231, 0.6769],
        [0.5285, 0.4715],
        [0.6129, 0.3871],
        [0.4566, 0.5434],
        [0.5108, 0.4892]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4508, 0.5492],
        [0.4463, 0.5537],
        [0.5597, 0.4403],
        [0.4408, 0.5592],
        [0.5892, 0.4108],
        [0.6314, 0.3686],
        [0.4692, 0.5308],
        [0.3520, 0.6480],
        [0.5319, 0.4681],
        [0.5087, 0.4913],
        [0.3458, 0.6542],
        [0.4992, 0.5008]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4030, 0.5970],
        [0.3519, 0.6481],
        [0.4980, 0.5020],
        [0.4531, 0.5469],
        [0.2944, 0.7056],
        [0.4518, 0.5482],
        [0.3970, 0.6030],
        [0.4898, 0.5102],
        [0.4892, 0.5108],
        [0.5955, 0.4045],
        [0.4944, 0.5056],
        [0.5772, 0.4228]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4831, 0.5169],
        [0.5422, 0.4578],
        [0.5268, 0.4732],
        [0.3386, 0.6614],
        [0.3270, 0.6730],
        [0.3242, 0.6758],
        [0.6772, 0.3228],
        [0.4299, 0.5701],
        [0.4176, 0.5824],
        [0.5670, 0.4330],
        [0.4812, 0.5188],
        [0.3698, 0.6302]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.6915, 0.3085],
        [0.5504, 0.4496],
        [0.5374, 0.4626],
        [0.6447, 0.3553],
        [0.4696, 0.5304],
        [0.4788, 0.5212],
        [0.3248, 0.6752],
        [0.4489, 0.5511],
        [0.4554, 0.5446],
        [0.6287, 0.3713],
        [0.5105, 0.4895],
        [0.4682, 0.5318]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5617, 0.4383],
        [0.5190, 0.4810],
        [0.3562, 0.6438],
        [0.5008, 0.4992],
        [0.4835, 0.5165],
        [0.6523, 0.3477],
        [0.4333, 0.5667],
        [0.6014, 0.3986],
        [0.6190, 0.3810],
        [0.4663, 0.5337],
        [0.5343, 0.4657],
        [0.4367, 0.5633]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5664, 0.4336],
        [0.5553, 0.4447],
        [0.3354, 0.6646],
        [0.6187, 0.3813],
        [0.5850, 0.4150],
        [0.5787, 0.4213],
        [0.3745, 0.6255],
        [0.4670, 0.5330],
        [0.4341, 0.5659],
        [0.4616, 0.5384],
        [0.4262, 0.5738],
        [0.4211, 0.5789]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5008, 0.4992],
        [0.6142, 0.3858],
        [0.5262, 0.4738],
        [0.6094, 0.3906],
        [0.6192, 0.3808],
        [0.4428, 0.5572],
        [0.4443, 0.5557],
        [0.5239, 0.4761],
        [0.5773, 0.4227],
        [0.3410, 0.6590],
        [0.4059, 0.5941],
        [0.5346, 0.4654]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.6546, 0.3454],
        [0.4585, 0.5415],
        [0.3807, 0.6193],
        [0.4148, 0.5852],
        [0.3933, 0.6067],
        [0.4710, 0.5290],
        [0.4281, 0.5719],
        [0.3699, 0.6301],
        [0.5913, 0.4087],
        [0.4417, 0.5583],
        [0.4790, 0.5210],
        [0.4136, 0.5864]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.5637, 0.4363],
        [0.1631, 0.8369],
        [0.4475, 0.5525],
        [0.5363, 0.4637],
        [0.6213, 0.3787],
        [0.5151, 0.4849],
        [0.3977, 0.6023],
        [0.5666, 0.4334],
        [0.4916, 0.5084],
        [0.5339, 0.4661],
        [0.3842, 0.6158],
        [0.3592, 0.6408]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5564, 0.4436],
        [0.4846, 0.5154],
        [0.5152, 0.4848],
        [0.4460, 0.5540],
        [0.3795, 0.6205],
        [0.5524, 0.4476],
        [0.5070, 0.4930],
        [0.3170, 0.6830],
        [0.4634, 0.5366],
        [0.4349, 0.5651],
        [0.5887, 0.4113],
        [0.5826, 0.4174]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5677, 0.4323],
        [0.6697, 0.3303],
        [0.4565, 0.5435],
        [0.4700, 0.5300],
        [0.6090, 0.3910],
        [0.5400, 0.4600],
        [0.5323, 0.4677],
        [0.5380, 0.4620],
        [0.4230, 0.5770],
        [0.7241, 0.2759],
        [0.4394, 0.5606],
        [0.3509, 0.6491]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3942, 0.6058],
        [0.5834, 0.4166],
        [0.3857, 0.6143],
        [0.6449, 0.3551],
        [0.6598, 0.3402],
        [0.6192, 0.3808],
        [0.5988, 0.4012],
        [0.4001, 0.5999],
        [0.5258, 0.4742],
        [0.5446, 0.4554],
        [0.4199, 0.5801],
        [0.5009, 0.4991]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.5452, 0.4548],
        [0.5751, 0.4249],
        [0.7049, 0.2951],
        [0.4148, 0.5852],
        [0.4411, 0.5589],
        [0.5037, 0.4963],
        [0.3633, 0.6367],
        [0.7481, 0.2519],
        [0.7178, 0.2822],
        [0.4277, 0.5723],
        [0.5215, 0.4785],
        [0.4473, 0.5527]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.6342, 0.3658],
        [0.7369, 0.2631],
        [0.4189, 0.5811],
        [0.5292, 0.4708],
        [0.3187, 0.6813],
        [0.5897, 0.4103],
        [0.8468, 0.1532],
        [0.6176, 0.3824],
        [0.4623, 0.5377],
        [0.5455, 0.4545],
        [0.6009, 0.3991],
        [0.4924, 0.5076]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5392, 0.4608],
        [0.6310, 0.3690],
        [0.5240, 0.4760],
        [0.6154, 0.3846],
        [0.5430, 0.4570],
        [0.4319, 0.5681],
        [0.3564, 0.6436],
        [0.3379, 0.6621],
        [0.3882, 0.6118],
        [0.3005, 0.6995],
        [0.4304, 0.5696],
        [0.4792, 0.5208]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4869, 0.5131],
        [0.3869, 0.6131],
        [0.2922, 0.7078],
        [0.4236, 0.5764],
        [0.4140, 0.5860],
        [0.6489, 0.3511],
        [0.3946, 0.6054],
        [0.4278, 0.5722],
        [0.5321, 0.4679],
        [0.5273, 0.4727],
        [0.4208, 0.5792],
        [0.2706, 0.7294]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.6843, 0.3157],
        [0.5712, 0.4288],
        [0.3206, 0.6794],
        [0.6065, 0.3935],
        [0.4443, 0.5557],
        [0.4828, 0.5172],
        [0.2838, 0.7162],
        [0.5899, 0.4101],
        [0.5008, 0.4992],
        [0.6996, 0.3004],
        [0.4525, 0.5475],
        [0.3554, 0.6446]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5283, 0.4717],
        [0.4721, 0.5279],
        [0.5985, 0.4015],
        [0.6208, 0.3792],
        [0.4951, 0.5049],
        [0.3663, 0.6337],
        [0.4232, 0.5768],
        [0.7260, 0.2740],
        [0.4158, 0.5842],
        [0.3411, 0.6589],
        [0.3960, 0.6040],
        [0.5162, 0.4838]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3135, 0.6865],
        [0.4237, 0.5763],
        [0.4506, 0.5494],
        [0.5904, 0.4096],
        [0.3048, 0.6952],
        [0.3250, 0.6750],
        [0.2986, 0.7014],
        [0.4376, 0.5624],
        [0.6437, 0.3563],
        [0.5286, 0.4714],
        [0.4842, 0.5158],
        [0.3976, 0.6024]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4641, 0.5359],
        [0.3294, 0.6706],
        [0.6428, 0.3572],
        [0.3678, 0.6322],
        [0.4006, 0.5994],
        [0.5736, 0.4264],
        [0.4787, 0.5213],
        [0.3651, 0.6349],
        [0.5205, 0.4795],
        [0.5337, 0.4663],
        [0.5818, 0.4182],
        [0.5349, 0.4651]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5202, 0.4798],
        [0.5932, 0.4068],
        [0.4790, 0.5210],
        [0.5127, 0.4873],
        [0.3732, 0.6268],
        [0.5263, 0.4737],
        [0.7859, 0.2141],
        [0.4832, 0.5168],
        [0.5640, 0.4360],
        [0.4759, 0.5241],
        [0.5259, 0.4741],
        [0.4629, 0.5371]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5544, 0.4456],
        [0.4691, 0.5309],
        [0.5446, 0.4554],
        [0.3954, 0.6046],
        [0.5101, 0.4899],
        [0.4078, 0.5922],
        [0.3973, 0.6027],
        [0.5289, 0.4711],
        [0.3400, 0.6600],
        [0.4683, 0.5317],
        [0.4810, 0.5190],
        [0.4051, 0.5949]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3706, 0.6294],
        [0.6225, 0.3775],
        [0.3798, 0.6202],
        [0.6695, 0.3305],
        [0.7059, 0.2941],
        [0.2812, 0.7188],
        [0.5430, 0.4570],
        [0.6655, 0.3345],
        [0.4171, 0.5829],
        [0.4533, 0.5467],
        [0.4439, 0.5561],
        [0.4791, 0.5209]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.5043, 0.4957],
        [0.2764, 0.7236],
        [0.7617, 0.2383],
        [0.5436, 0.4564],
        [0.4187, 0.5813],
        [0.4746, 0.5254],
        [0.4246, 0.5754],
        [0.5351, 0.4649],
        [0.6397, 0.3603],
        [0.7897, 0.2103],
        [0.3890, 0.6110],
        [0.4529, 0.5471]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.6980, 0.3020],
        [0.5277, 0.4723],
        [0.5258, 0.4742],
        [0.4843, 0.5157],
        [0.2164, 0.7836],
        [0.4265, 0.5735],
        [0.7673, 0.2327],
        [0.5268, 0.4732],
        [0.5718, 0.4282],
        [0.4596, 0.5404],
        [0.4816, 0.5184],
        [0.5122, 0.4878]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.6406, 0.3594],
        [0.5152, 0.4848],
        [0.5542, 0.4458],
        [0.4271, 0.5729],
        [0.6071, 0.3929],
        [0.6236, 0.3764],
        [0.3037, 0.6963],
        [0.6187, 0.3813],
        [0.3277, 0.6723],
        [0.5807, 0.4193],
        [0.6860, 0.3140],
        [0.5455, 0.4545]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4719, 0.5281],
        [0.4840, 0.5160],
        [0.5058, 0.4942],
        [0.2920, 0.7080],
        [0.5711, 0.4289],
        [0.6461, 0.3539],
        [0.6355, 0.3645],
        [0.5001, 0.4999],
        [0.4911, 0.5089],
        [0.4619, 0.5381],
        [0.5171, 0.4829],
        [0.2758, 0.7242]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5265, 0.4735],
        [0.7326, 0.2674],
        [0.2490, 0.7510],
        [0.6173, 0.3827],
        [0.5353, 0.4647],
        [0.5494, 0.4506],
        [0.5578, 0.4422],
        [0.6629, 0.3371],
        [0.5880, 0.4120],
        [0.5035, 0.4965],
        [0.5797, 0.4203],
        [0.4589, 0.5411]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5246, 0.4754],
        [0.5695, 0.4305],
        [0.5651, 0.4349],
        [0.5437, 0.4563],
        [0.4333, 0.5667],
        [0.2642, 0.7358],
        [0.3575, 0.6425],
        [0.4691, 0.5309],
        [0.4091, 0.5909],
        [0.3906, 0.6094],
        [0.3690, 0.6310],
        [0.3993, 0.6007]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4390, 0.5610],
        [0.6082, 0.3918],
        [0.3252, 0.6748],
        [0.4286, 0.5714],
        [0.4674, 0.5326],
        [0.5076, 0.4924],
        [0.4025, 0.5975],
        [0.4087, 0.5913],
        [0.6295, 0.3705],
        [0.5221, 0.4779],
        [0.5061, 0.4939],
        [0.7299, 0.2701]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3408, 0.6592],
        [0.4246, 0.5754],
        [0.4964, 0.5036],
        [0.6048, 0.3952],
        [0.6442, 0.3558],
        [0.4251, 0.5749],
        [0.4898, 0.5102],
        [0.5715, 0.4285],
        [0.4654, 0.5346],
        [0.6107, 0.3893],
        [0.6666, 0.3334],
        [0.5822, 0.4178]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4000, 0.6000],
        [0.6991, 0.3009],
        [0.4436, 0.5564],
        [0.5207, 0.4793],
        [0.5369, 0.4631],
        [0.6440, 0.3560],
        [0.4218, 0.5782],
        [0.3737, 0.6263],
        [0.6761, 0.3239],
        [0.5328, 0.4672],
        [0.7194, 0.2806],
        [0.5166, 0.4834]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.6148, 0.3852],
        [0.4582, 0.5418],
        [0.4722, 0.5278],
        [0.4423, 0.5577],
        [0.5608, 0.4392],
        [0.3631, 0.6369],
        [0.5592, 0.4408],
        [0.3684, 0.6316],
        [0.3657, 0.6343],
        [0.6321, 0.3679],
        [0.4463, 0.5537],
        [0.3777, 0.6223]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4453, 0.5547],
        [0.3229, 0.6771],
        [0.4980, 0.5020],
        [0.3912, 0.6088],
        [0.6692, 0.3308],
        [0.4458, 0.5542],
        [0.4819, 0.5181],
        [0.3587, 0.6413],
        [0.3767, 0.6233],
        [0.5685, 0.4315],
        [0.4464, 0.5536],
        [0.4271, 0.5729]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3937, 0.6063],
        [0.2536, 0.7464],
        [0.7415, 0.2585],
        [0.4426, 0.5574],
        [0.4751, 0.5249],
        [0.3261, 0.6739],
        [0.4230, 0.5770],
        [0.3664, 0.6336],
        [0.3707, 0.6293],
        [0.4813, 0.5187],
        [0.4958, 0.5042],
        [0.1853, 0.8147]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4452, 0.5548],
        [0.7740, 0.2260],
        [0.4792, 0.5208],
        [0.4140, 0.5860],
        [0.1894, 0.8106],
        [0.4423, 0.5577],
        [0.6670, 0.3330],
        [0.5661, 0.4339],
        [0.5982, 0.4018],
        [0.4435, 0.5565],
        [0.4173, 0.5827],
        [0.4924, 0.5076]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.6542, 0.3458],
        [0.5910, 0.4090],
        [0.6797, 0.3203],
        [0.5324, 0.4676],
        [0.5590, 0.4410],
        [0.4986, 0.5014],
        [0.4089, 0.5911],
        [0.6838, 0.3162],
        [0.5650, 0.4350],
        [0.5257, 0.4743],
        [0.5411, 0.4589],
        [0.6313, 0.3687]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4186, 0.5814],
        [0.5329, 0.4671],
        [0.3091, 0.6909],
        [0.2384, 0.7616],
        [0.4279, 0.5721],
        [0.7816, 0.2184],
        [0.5199, 0.4801],
        [0.3670, 0.6330],
        [0.5955, 0.4045],
        [0.4013, 0.5987],
        [0.5006, 0.4994],
        [0.2697, 0.7303]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5862, 0.4138],
        [0.6460, 0.3540],
        [0.4424, 0.5576],
        [0.6613, 0.3387],
        [0.4190, 0.5810],
        [0.6042, 0.3958],
        [0.5601, 0.4399],
        [0.5682, 0.4318],
        [0.3201, 0.6799],
        [0.6446, 0.3554],
        [0.5071, 0.4929],
        [0.2716, 0.7284]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3923, 0.6077],
        [0.5743, 0.4257],
        [0.4804, 0.5196],
        [0.7658, 0.2342],
        [0.5419, 0.4581],
        [0.4418, 0.5582],
        [0.2035, 0.7965],
        [0.7315, 0.2685],
        [0.4449, 0.5551],
        [0.3086, 0.6914],
        [0.3185, 0.6815],
        [0.6536, 0.3464]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5111, 0.4889],
        [0.5874, 0.4126],
        [0.5231, 0.4769],
        [0.5001, 0.4999],
        [0.5305, 0.4695],
        [0.6148, 0.3852],
        [0.3715, 0.6285],
        [0.3283, 0.6717],
        [0.7697, 0.2303],
        [0.6101, 0.3899],
        [0.4980, 0.5020],
        [0.6007, 0.3993]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3889, 0.6111],
        [0.5104, 0.4896],
        [0.3872, 0.6128],
        [0.7275, 0.2725],
        [0.6189, 0.3811],
        [0.4367, 0.5633],
        [0.3742, 0.6258],
        [0.5148, 0.4852],
        [0.3782, 0.6218],
        [0.4078, 0.5922],
        [0.7241, 0.2759],
        [0.6255, 0.3745]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5404, 0.4596],
        [0.4203, 0.5797],
        [0.4914, 0.5086],
        [0.3337, 0.6663],
        [0.4094, 0.5906],
        [0.4606, 0.5394],
        [0.6404, 0.3596],
        [0.4185, 0.5815],
        [0.6328, 0.3672],
        [0.4327, 0.5673],
        [0.3570, 0.6430],
        [0.3283, 0.6717]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5642, 0.4358],
        [0.6570, 0.3430],
        [0.3911, 0.6089],
        [0.4256, 0.5744],
        [0.4827, 0.5173],
        [0.3915, 0.6085],
        [0.3066, 0.6934],
        [0.4650, 0.5350],
        [0.3366, 0.6634],
        [0.4731, 0.5269],
        [0.5848, 0.4152],
        [0.3469, 0.6531]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3078, 0.6922],
        [0.4894, 0.5106],
        [0.4465, 0.5535],
        [0.5293, 0.4707],
        [0.7268, 0.2732],
        [0.2853, 0.7147],
        [0.4631, 0.5369],
        [0.6181, 0.3819],
        [0.5137, 0.4863],
        [0.6178, 0.3822],
        [0.2570, 0.7430],
        [0.3647, 0.6353]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4202, 0.5798],
        [0.4899, 0.5101],
        [0.6186, 0.3814],
        [0.3956, 0.6044],
        [0.3931, 0.6069],
        [0.5639, 0.4361],
        [0.4081, 0.5919],
        [0.3012, 0.6988],
        [0.3983, 0.6017],
        [0.5856, 0.4144],
        [0.4341, 0.5659],
        [0.2306, 0.7694]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3776, 0.6224],
        [0.6355, 0.3645],
        [0.4240, 0.5760],
        [0.3719, 0.6281],
        [0.5609, 0.4391],
        [0.4228, 0.5772],
        [0.6705, 0.3295],
        [0.4749, 0.5251],
        [0.5368, 0.4632],
        [0.8502, 0.1498],
        [0.5091, 0.4909],
        [0.5649, 0.4351]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.6162, 0.3838],
        [0.4547, 0.5453],
        [0.6769, 0.3231],
        [0.5400, 0.4600],
        [0.4284, 0.5716],
        [0.6634, 0.3366],
        [0.3900, 0.6100],
        [0.5114, 0.4886],
        [0.3871, 0.6129],
        [0.5241, 0.4759],
        [0.7387, 0.2613],
        [0.5047, 0.4953]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.8117, 0.1883],
        [0.4642, 0.5358],
        [0.3863, 0.6137],
        [0.1993, 0.8007],
        [0.7659, 0.2341],
        [0.7471, 0.2529],
        [0.4078, 0.5922],
        [0.4097, 0.5903],
        [0.5995, 0.4005],
        [0.5454, 0.4546],
        [0.5411, 0.4589],
        [0.4654, 0.5346]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.7810, 0.2190],
        [0.4511, 0.5489],
        [0.1997, 0.8003],
        [0.4457, 0.5543],
        [0.5811, 0.4189],
        [0.6432, 0.3568],
        [0.6095, 0.3905],
        [0.5811, 0.4189],
        [0.6055, 0.3945],
        [0.4483, 0.5517],
        [0.4916, 0.5084],
        [0.2591, 0.7409]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.7372, 0.2628],
        [0.5240, 0.4760],
        [0.5345, 0.4655],
        [0.7212, 0.2788],
        [0.5696, 0.4304],
        [0.4378, 0.5622],
        [0.4024, 0.5976],
        [0.6254, 0.3746],
        [0.4003, 0.5997],
        [0.3975, 0.6025],
        [0.3630, 0.6370],
        [0.4607, 0.5393]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5734, 0.4266],
        [0.5327, 0.4673],
        [0.2957, 0.7043],
        [0.5170, 0.4830],
        [0.4821, 0.5179],
        [0.6318, 0.3682],
        [0.5162, 0.4838],
        [0.4505, 0.5495],
        [0.7385, 0.2615],
        [0.4419, 0.5581],
        [0.7251, 0.2749],
        [0.6238, 0.3762]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.2823, 0.7177],
        [0.4022, 0.5978],
        [0.5722, 0.4278],
        [0.6153, 0.3847],
        [0.4475, 0.5525],
        [0.5439, 0.4561],
        [0.5486, 0.4514],
        [0.4833, 0.5167],
        [0.2926, 0.7074],
        [0.5292, 0.4708],
        [0.5167, 0.4833],
        [0.6640, 0.3360]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2082, 0.7918],
        [0.5660, 0.4340],
        [0.5070, 0.4930],
        [0.2229, 0.7771],
        [0.6296, 0.3704],
        [0.4567, 0.5433],
        [0.6858, 0.3142],
        [0.5973, 0.4027],
        [0.7174, 0.2826],
        [0.5178, 0.4822],
        [0.3833, 0.6167],
        [0.5437, 0.4563]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5692, 0.4308],
        [0.7409, 0.2591],
        [0.4468, 0.5532],
        [0.1650, 0.8350],
        [0.6086, 0.3914],
        [0.4208, 0.5792],
        [0.3687, 0.6313],
        [0.3822, 0.6178],
        [0.4214, 0.5786],
        [0.5943, 0.4057],
        [0.3857, 0.6143],
        [0.3957, 0.6043]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3481, 0.6519],
        [0.4543, 0.5457],
        [0.5589, 0.4411],
        [0.6007, 0.3993],
        [0.6812, 0.3188],
        [0.5287, 0.4713],
        [0.5708, 0.4292],
        [0.4539, 0.5461],
        [0.3453, 0.6547],
        [0.5615, 0.4385],
        [0.4179, 0.5821],
        [0.4985, 0.5015]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.2379, 0.7621],
        [0.3427, 0.6573],
        [0.5230, 0.4770],
        [0.4005, 0.5995],
        [0.5797, 0.4203],
        [0.3704, 0.6296],
        [0.5233, 0.4767],
        [0.3546, 0.6454],
        [0.7056, 0.2944],
        [0.4636, 0.5364],
        [0.4152, 0.5848],
        [0.1860, 0.8140]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5865, 0.4135],
        [0.6806, 0.3194],
        [0.4487, 0.5513],
        [0.2558, 0.7442],
        [0.3266, 0.6734],
        [0.5233, 0.4767],
        [0.4707, 0.5293],
        [0.4997, 0.5003],
        [0.5420, 0.4580],
        [0.7074, 0.2926],
        [0.4637, 0.5363],
        [0.6335, 0.3665]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3180, 0.6820],
        [0.6090, 0.3910],
        [0.5912, 0.4088],
        [0.4649, 0.5351],
        [0.5661, 0.4339],
        [0.4249, 0.5751],
        [0.3765, 0.6235],
        [0.5272, 0.4728],
        [0.4265, 0.5735],
        [0.7001, 0.2999],
        [0.5553, 0.4447],
        [0.3308, 0.6692]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.6413, 0.3587],
        [0.5993, 0.4007],
        [0.3417, 0.6583],
        [0.2178, 0.7822],
        [0.4903, 0.5097],
        [0.7413, 0.2587],
        [0.3386, 0.6614],
        [0.4432, 0.5568],
        [0.5476, 0.4524],
        [0.2896, 0.7104],
        [0.6345, 0.3655],
        [0.3948, 0.6052]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5423, 0.4577],
        [0.4067, 0.5933],
        [0.3850, 0.6150],
        [0.5751, 0.4249],
        [0.2366, 0.7634],
        [0.5229, 0.4771],
        [0.4223, 0.5777],
        [0.5422, 0.4578],
        [0.4278, 0.5722],
        [0.5627, 0.4373],
        [0.3842, 0.6158],
        [0.4126, 0.5874]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4685, 0.5315],
        [0.5089, 0.4911],
        [0.4616, 0.5384],
        [0.7390, 0.2610],
        [0.3486, 0.6514],
        [0.4518, 0.5482],
        [0.2465, 0.7535],
        [0.6239, 0.3761],
        [0.6982, 0.3018],
        [0.3544, 0.6456],
        [0.4589, 0.5411],
        [0.4797, 0.5203]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4289, 0.5711],
        [0.4965, 0.5035],
        [0.4734, 0.5266],
        [0.5709, 0.4291],
        [0.3700, 0.6300],
        [0.2599, 0.7401],
        [0.4441, 0.5559],
        [0.3289, 0.6711],
        [0.8407, 0.1593],
        [0.2576, 0.7424],
        [0.4362, 0.5638],
        [0.8167, 0.1833]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3190, 0.6810],
        [0.2771, 0.7229],
        [0.6188, 0.3812],
        [0.4558, 0.5442],
        [0.5138, 0.4862],
        [0.2294, 0.7706],
        [0.3296, 0.6704],
        [0.4226, 0.5774],
        [0.3904, 0.6096],
        [0.2450, 0.7550],
        [0.6532, 0.3468],
        [0.5243, 0.4757]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2362, 0.7638],
        [0.1148, 0.8852],
        [0.4062, 0.5938],
        [0.0943, 0.9057],
        [0.1315, 0.8685],
        [0.1740, 0.8260],
        [0.6848, 0.3152],
        [0.1015, 0.8985],
        [0.1379, 0.8621],
        [0.1451, 0.8549],
        [0.1412, 0.8588],
        [0.1622, 0.8378]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3882, 0.6118],
        [0.4730, 0.5270],
        [0.1389, 0.8611],
        [0.4510, 0.5490],
        [0.3480, 0.6520],
        [0.3926, 0.6074],
        [0.2476, 0.7524],
        [0.1721, 0.8279],
        [0.4579, 0.5421],
        [0.2840, 0.7160],
        [0.0906, 0.9094],
        [0.1877, 0.8123]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3239, 0.6761],
        [0.6896, 0.3104],
        [0.4343, 0.5657],
        [0.4093, 0.5907],
        [0.6166, 0.3834],
        [0.3744, 0.6256],
        [0.4174, 0.5826],
        [0.9337, 0.0663],
        [0.3910, 0.6090],
        [0.6234, 0.3766],
        [0.3861, 0.6139],
        [0.5544, 0.4456]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3253, 0.6747],
        [0.3830, 0.6170],
        [0.9301, 0.0699],
        [0.8506, 0.1494],
        [0.1572, 0.8428],
        [0.3491, 0.6509],
        [0.3400, 0.6600],
        [0.1918, 0.8082],
        [0.2652, 0.7348],
        [0.6566, 0.3434],
        [0.7971, 0.2029],
        [0.6187, 0.3813]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3511, 0.6489],
        [0.3655, 0.6345],
        [0.4242, 0.5758],
        [0.4363, 0.5637],
        [0.8986, 0.1014],
        [0.3551, 0.6449],
        [0.9098, 0.0902],
        [0.4252, 0.5748],
        [0.2612, 0.7388],
        [0.4612, 0.5388],
        [0.3580, 0.6420],
        [0.3456, 0.6544]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.2102, 0.7898],
        [0.7698, 0.2302],
        [0.7853, 0.2147],
        [0.4243, 0.5757],
        [0.6792, 0.3208],
        [0.4143, 0.5857],
        [0.2826, 0.7174],
        [0.8283, 0.1717],
        [0.3753, 0.6247],
        [0.6610, 0.3390],
        [0.1894, 0.8106],
        [0.2174, 0.7826]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.8723, 0.1277],
        [0.5570, 0.4430],
        [0.1976, 0.8024],
        [0.4237, 0.5763],
        [0.4011, 0.5989],
        [0.3640, 0.6360],
        [0.5364, 0.4636],
        [0.2385, 0.7615],
        [0.5118, 0.4882],
        [0.2287, 0.7713],
        [0.4318, 0.5682],
        [0.3875, 0.6125]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3229, 0.6771],
        [0.3216, 0.6784],
        [0.4005, 0.5995],
        [0.4480, 0.5520],
        [0.3537, 0.6463],
        [0.6908, 0.3092],
        [0.4335, 0.5665],
        [0.1480, 0.8520],
        [0.2255, 0.7745],
        [0.4212, 0.5788],
        [0.2588, 0.7412],
        [0.3014, 0.6986]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2218, 0.7782],
        [0.2644, 0.7356],
        [0.2696, 0.7304],
        [0.6164, 0.3836],
        [0.7089, 0.2911],
        [0.1279, 0.8721],
        [0.2544, 0.7456],
        [0.3925, 0.6075],
        [0.1649, 0.8351],
        [0.2626, 0.7374],
        [0.3620, 0.6380],
        [0.6981, 0.3019]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.2566, 0.7434],
        [0.7029, 0.2971],
        [0.3033, 0.6967],
        [0.2222, 0.7778],
        [0.1383, 0.8617],
        [0.2059, 0.7941],
        [0.1797, 0.8203],
        [0.1003, 0.8997],
        [0.8007, 0.1993],
        [0.2037, 0.7963],
        [0.8512, 0.1488],
        [0.1373, 0.8627]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1710, 0.8290],
        [0.0823, 0.9177],
        [0.2682, 0.7318],
        [0.5231, 0.4769],
        [0.7491, 0.2509],
        [0.0857, 0.9143],
        [0.1826, 0.8174],
        [0.4185, 0.5815],
        [0.2415, 0.7585],
        [0.6313, 0.3687],
        [0.2395, 0.7605],
        [0.8125, 0.1875]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1625, 0.8375],
        [0.2244, 0.7756],
        [0.4871, 0.5129],
        [0.4237, 0.5763],
        [0.3008, 0.6992],
        [0.2306, 0.7694],
        [0.1383, 0.8617],
        [0.1368, 0.8632],
        [0.7759, 0.2241],
        [0.1618, 0.8382],
        [0.3190, 0.6810],
        [0.1064, 0.8936]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3349, 0.6651],
        [0.6172, 0.3828],
        [0.1023, 0.8977],
        [0.2370, 0.7630],
        [0.1959, 0.8041],
        [0.3340, 0.6660],
        [0.2363, 0.7637],
        [0.0965, 0.9035],
        [0.3052, 0.6948],
        [0.2153, 0.7847],
        [0.0705, 0.9295],
        [0.5470, 0.4530]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0823, 0.9177],
        [0.8353, 0.1647],
        [0.1478, 0.8522],
        [0.3203, 0.6797],
        [0.5781, 0.4219],
        [0.2036, 0.7964],
        [0.4321, 0.5679],
        [0.6813, 0.3187],
        [0.2493, 0.7507],
        [0.0753, 0.9247],
        [0.3544, 0.6456],
        [0.3874, 0.6126]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4088, 0.5912],
        [0.2595, 0.7405],
        [0.8210, 0.1790],
        [0.4596, 0.5404],
        [0.2530, 0.7470],
        [0.2291, 0.7709],
        [0.1222, 0.8778],
        [0.0914, 0.9086],
        [0.0470, 0.9530],
        [0.2847, 0.7153],
        [0.0866, 0.9134],
        [0.4976, 0.5024]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3011, 0.6989],
        [0.7569, 0.2431],
        [0.2380, 0.7620],
        [0.3765, 0.6235],
        [0.7250, 0.2750],
        [0.1025, 0.8975],
        [0.1254, 0.8746],
        [0.4558, 0.5442],
        [0.2298, 0.7702],
        [0.3544, 0.6456],
        [0.4521, 0.5479],
        [0.4273, 0.5727]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5076, 0.4924],
        [0.4483, 0.5517],
        [0.4510, 0.5490],
        [0.6427, 0.3573],
        [0.6968, 0.3032],
        [0.6705, 0.3295],
        [0.6690, 0.3310],
        [0.4283, 0.5717],
        [0.2573, 0.7427],
        [0.6446, 0.3554],
        [0.5677, 0.4323],
        [0.2970, 0.7030]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.2849, 0.7151],
        [0.4356, 0.5644],
        [0.3161, 0.6839],
        [0.2125, 0.7875],
        [0.6141, 0.3859],
        [0.4457, 0.5543],
        [0.1723, 0.8277],
        [0.1610, 0.8390],
        [0.3431, 0.6569],
        [0.4367, 0.5633],
        [0.1617, 0.8383],
        [0.2025, 0.7975]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4100, 0.5900],
        [0.7446, 0.2554],
        [0.6592, 0.3408],
        [0.4744, 0.5256],
        [0.4686, 0.5314],
        [0.4464, 0.5536],
        [0.7178, 0.2822],
        [0.5345, 0.4655],
        [0.3511, 0.6489],
        [0.1696, 0.8304],
        [0.3399, 0.6601],
        [0.2878, 0.7122]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5673, 0.4327],
        [0.4683, 0.5317],
        [0.5179, 0.4821],
        [0.6182, 0.3818],
        [0.6712, 0.3288],
        [0.2717, 0.7283],
        [0.3645, 0.6355],
        [0.6536, 0.3464],
        [0.5269, 0.4731],
        [0.4757, 0.5243],
        [0.4522, 0.5478],
        [0.6683, 0.3317]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4490, 0.5510],
        [0.6060, 0.3940],
        [0.6888, 0.3112],
        [0.6264, 0.3736],
        [0.4293, 0.5707],
        [0.4613, 0.5387],
        [0.5961, 0.4039],
        [0.2449, 0.7551],
        [0.8529, 0.1471],
        [0.3428, 0.6572],
        [0.8710, 0.1290],
        [0.8622, 0.1378]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1778, 0.8222],
        [0.2480, 0.7520],
        [0.5153, 0.4847],
        [0.2934, 0.7066],
        [0.4225, 0.5775],
        [0.2108, 0.7892],
        [0.1348, 0.8652],
        [0.2174, 0.7826],
        [0.5286, 0.4714],
        [0.5853, 0.4147],
        [0.5470, 0.4530],
        [0.3600, 0.6400]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4042, 0.5958],
        [0.3714, 0.6286],
        [0.1927, 0.8073],
        [0.2259, 0.7741],
        [0.2212, 0.7788],
        [0.5295, 0.4705],
        [0.3880, 0.6120],
        [0.3365, 0.6635],
        [0.4178, 0.5822],
        [0.2322, 0.7678],
        [0.5071, 0.4929],
        [0.2390, 0.7610]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5561, 0.4439],
        [0.7207, 0.2793],
        [0.2784, 0.7216],
        [0.6184, 0.3816],
        [0.5220, 0.4780],
        [0.8135, 0.1865],
        [0.2203, 0.7797],
        [0.4114, 0.5886],
        [0.2718, 0.7282],
        [0.1623, 0.8377],
        [0.5037, 0.4963],
        [0.2053, 0.7947]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1846, 0.8154],
        [0.7294, 0.2706],
        [0.1225, 0.8775],
        [0.3541, 0.6459],
        [0.3464, 0.6536],
        [0.5207, 0.4793],
        [0.4082, 0.5918],
        [0.3528, 0.6472],
        [0.3044, 0.6956],
        [0.2680, 0.7320],
        [0.5706, 0.4294],
        [0.3419, 0.6581]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3284, 0.6716],
        [0.5776, 0.4224],
        [0.8344, 0.1656],
        [0.5146, 0.4854],
        [0.1977, 0.8023],
        [0.4645, 0.5355],
        [0.2474, 0.7526],
        [0.2010, 0.7990],
        [0.3307, 0.6693],
        [0.5339, 0.4661],
        [0.6056, 0.3944],
        [0.6716, 0.3284]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.2629, 0.7371],
        [0.6005, 0.3995],
        [0.6776, 0.3224],
        [0.3297, 0.6703],
        [0.7514, 0.2486],
        [0.3702, 0.6298],
        [0.6486, 0.3514],
        [0.9065, 0.0935],
        [0.5786, 0.4214],
        [0.7868, 0.2132],
        [0.2402, 0.7598],
        [0.4054, 0.5946]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.2294, 0.7706],
        [0.2377, 0.7623],
        [0.4343, 0.5657],
        [0.4544, 0.5456],
        [0.3827, 0.6173],
        [0.5257, 0.4743],
        [0.4097, 0.5903],
        [0.1345, 0.8655],
        [0.6550, 0.3450],
        [0.2570, 0.7430],
        [0.3186, 0.6814],
        [0.4284, 0.5716]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5907, 0.4093],
        [0.2888, 0.7112],
        [0.2843, 0.7157],
        [0.1644, 0.8356],
        [0.4166, 0.5834],
        [0.4551, 0.5449],
        [0.8598, 0.1402],
        [0.3506, 0.6494],
        [0.5613, 0.4387],
        [0.3688, 0.6312],
        [0.4640, 0.5360],
        [0.6166, 0.3834]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.6104, 0.3896],
        [0.3613, 0.6387],
        [0.1486, 0.8514],
        [0.1341, 0.8659],
        [0.4372, 0.5628],
        [0.4865, 0.5135],
        [0.5276, 0.4724],
        [0.3330, 0.6670],
        [0.1573, 0.8427],
        [0.2169, 0.7831],
        [0.3929, 0.6071],
        [0.0735, 0.9265]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3313, 0.6687],
        [0.6964, 0.3036],
        [0.5269, 0.4731],
        [0.4514, 0.5486],
        [0.8909, 0.1091],
        [0.5631, 0.4369],
        [0.2421, 0.7579],
        [0.5904, 0.4096],
        [0.4789, 0.5211],
        [0.1254, 0.8746],
        [0.4744, 0.5256],
        [0.5534, 0.4466]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4945, 0.5055],
        [0.7842, 0.2158],
        [0.2684, 0.7316],
        [0.2095, 0.7905],
        [0.4228, 0.5772],
        [0.8070, 0.1930],
        [0.2737, 0.7263],
        [0.4063, 0.5937],
        [0.6670, 0.3330],
        [0.4876, 0.5124],
        [0.7838, 0.2162],
        [0.5966, 0.4034]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1841, 0.8159],
        [0.4103, 0.5897],
        [0.2649, 0.7351],
        [0.6954, 0.3046],
        [0.4227, 0.5773],
        [0.3135, 0.6865],
        [0.1938, 0.8062],
        [0.5039, 0.4961],
        [0.3122, 0.6878],
        [0.5103, 0.4897],
        [0.6306, 0.3694],
        [0.7175, 0.2825]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1538, 0.8462],
        [0.5010, 0.4990],
        [0.4050, 0.5950],
        [0.5150, 0.4850],
        [0.3828, 0.6172],
        [0.5105, 0.4895],
        [0.4118, 0.5882],
        [0.7784, 0.2216],
        [0.6986, 0.3014],
        [0.1748, 0.8252],
        [0.6127, 0.3873],
        [0.1280, 0.8720]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5951, 0.4049],
        [0.7391, 0.2609],
        [0.5861, 0.4139],
        [0.4132, 0.5868],
        [0.4639, 0.5361],
        [0.4272, 0.5728],
        [0.6505, 0.3495],
        [0.5838, 0.4162],
        [0.3383, 0.6617],
        [0.4772, 0.5228],
        [0.4554, 0.5446],
        [0.4040, 0.5960]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.5192, 0.4808],
        [0.8473, 0.1527],
        [0.3745, 0.6255],
        [0.8004, 0.1996],
        [0.2273, 0.7727],
        [0.2552, 0.7448],
        [0.5378, 0.4622],
        [0.5552, 0.4448],
        [0.7863, 0.2137],
        [0.1806, 0.8194],
        [0.3987, 0.6013],
        [0.6594, 0.3406]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3732, 0.6268],
        [0.1689, 0.8311],
        [0.7631, 0.2369],
        [0.8314, 0.1686],
        [0.1267, 0.8733],
        [0.5046, 0.4954],
        [0.3189, 0.6811],
        [0.4835, 0.5165],
        [0.7139, 0.2861],
        [0.5245, 0.4755],
        [0.7595, 0.2405],
        [0.6679, 0.3321]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3192, 0.6808],
        [0.6533, 0.3467],
        [0.7245, 0.2755],
        [0.3558, 0.6442],
        [0.5590, 0.4410],
        [0.3975, 0.6025],
        [0.6328, 0.3672],
        [0.5220, 0.4780],
        [0.4157, 0.5843],
        [0.8461, 0.1539],
        [0.1313, 0.8687],
        [0.1814, 0.8186]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1836, 0.8164],
        [0.4406, 0.5594],
        [0.3521, 0.6479],
        [0.5574, 0.4426],
        [0.6129, 0.3871],
        [0.6661, 0.3339],
        [0.3478, 0.6522],
        [0.1172, 0.8828],
        [0.6487, 0.3513],
        [0.5123, 0.4877],
        [0.3113, 0.6887],
        [0.1851, 0.8149]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4848, 0.5152],
        [0.4049, 0.5951],
        [0.0889, 0.9111],
        [0.3535, 0.6465],
        [0.7522, 0.2478],
        [0.5931, 0.4069],
        [0.1815, 0.8185],
        [0.2589, 0.7411],
        [0.5771, 0.4229],
        [0.4228, 0.5772],
        [0.2542, 0.7458],
        [0.2651, 0.7349]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4002, 0.5998],
        [0.8706, 0.1294],
        [0.7781, 0.2219],
        [0.6168, 0.3832],
        [0.4426, 0.5574],
        [0.5898, 0.4102],
        [0.6472, 0.3528],
        [0.4342, 0.5658],
        [0.4555, 0.5445],
        [0.4366, 0.5634],
        [0.2086, 0.7914],
        [0.1267, 0.8733]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.1728, 0.8272],
        [0.4674, 0.5326],
        [0.2881, 0.7119],
        [0.8342, 0.1658],
        [0.6750, 0.3250],
        [0.1458, 0.8542],
        [0.2154, 0.7846],
        [0.1788, 0.8212],
        [0.4601, 0.5399],
        [0.9465, 0.0535],
        [0.4770, 0.5230],
        [0.6209, 0.3791]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4787, 0.5213],
        [0.6944, 0.3056],
        [0.4781, 0.5219],
        [0.2963, 0.7037],
        [0.2724, 0.7276],
        [0.3930, 0.6070],
        [0.1203, 0.8797],
        [0.5149, 0.4851],
        [0.4907, 0.5093],
        [0.8243, 0.1757],
        [0.8610, 0.1390],
        [0.8181, 0.1819]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4849, 0.5151],
        [0.4392, 0.5608],
        [0.4177, 0.5823],
        [0.5343, 0.4657],
        [0.3469, 0.6531],
        [0.1978, 0.8022],
        [0.7881, 0.2119],
        [0.2889, 0.7111],
        [0.8208, 0.1792],
        [0.7206, 0.2794],
        [0.6498, 0.3502],
        [0.5510, 0.4490]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.7750, 0.2250],
        [0.6428, 0.3572],
        [0.8052, 0.1948],
        [0.7142, 0.2858],
        [0.7813, 0.2187],
        [0.7535, 0.2465],
        [0.7716, 0.2284],
        [0.7377, 0.2623],
        [0.8143, 0.1857],
        [0.7232, 0.2768],
        [0.5749, 0.4251],
        [0.4103, 0.5897]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.2465, 0.7535],
        [0.4389, 0.5611],
        [0.0939, 0.9061],
        [0.1070, 0.8930],
        [0.1989, 0.8011],
        [0.4664, 0.5336],
        [0.1740, 0.8260],
        [0.0826, 0.9174],
        [0.2235, 0.7765],
        [0.1271, 0.8729],
        [0.2981, 0.7019],
        [0.0793, 0.9207]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.5162, 0.4838],
        [0.4656, 0.5344],
        [0.2812, 0.7188],
        [0.1373, 0.8627],
        [0.3066, 0.6934],
        [0.1455, 0.8545],
        [0.3121, 0.6879],
        [0.5614, 0.4386],
        [0.2402, 0.7598],
        [0.1867, 0.8133],
        [0.5565, 0.4435],
        [0.2309, 0.7691]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3560, 0.6440],
        [0.3593, 0.6407],
        [0.6059, 0.3941],
        [0.3788, 0.6212],
        [0.0842, 0.9158],
        [0.1910, 0.8090],
        [0.1581, 0.8419],
        [0.1016, 0.8984],
        [0.1211, 0.8789],
        [0.3186, 0.6814],
        [0.5444, 0.4556],
        [0.2658, 0.7342]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.2253, 0.7747],
        [0.1617, 0.8383],
        [0.2930, 0.7070],
        [0.2517, 0.7483],
        [0.5819, 0.4181],
        [0.5820, 0.4180],
        [0.4772, 0.5228],
        [0.1510, 0.8490],
        [0.1208, 0.8792],
        [0.4703, 0.5297],
        [0.1807, 0.8193],
        [0.4038, 0.5962]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1399, 0.8601],
        [0.3784, 0.6216],
        [0.4813, 0.5187],
        [0.5725, 0.4275],
        [0.7870, 0.2130],
        [0.3174, 0.6826],
        [0.1844, 0.8156],
        [0.1107, 0.8893],
        [0.3957, 0.6043],
        [0.4753, 0.5247],
        [0.3035, 0.6965],
        [0.2516, 0.7484]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7989, 0.2011],
        [0.5843, 0.4157],
        [0.0789, 0.9211],
        [0.2083, 0.7917],
        [0.5293, 0.4707],
        [0.6799, 0.3201],
        [0.3552, 0.6448],
        [0.4132, 0.5868],
        [0.7515, 0.2485],
        [0.2468, 0.7532],
        [0.2425, 0.7575],
        [0.5484, 0.4516]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4072, 0.5928],
        [0.5113, 0.4887],
        [0.3750, 0.6250],
        [0.2868, 0.7132],
        [0.5251, 0.4749],
        [0.1917, 0.8083],
        [0.2918, 0.7082],
        [0.5603, 0.4397],
        [0.4631, 0.5369],
        [0.1753, 0.8247],
        [0.2838, 0.7162],
        [0.3225, 0.6775]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2653, 0.7347],
        [0.5582, 0.4418],
        [0.3757, 0.6243],
        [0.5833, 0.4167],
        [0.3448, 0.6552],
        [0.3063, 0.6937],
        [0.1375, 0.8625],
        [0.2311, 0.7689],
        [0.2375, 0.7625],
        [0.1259, 0.8741],
        [0.4148, 0.5852],
        [0.7365, 0.2635]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.2942, 0.7058],
        [0.5655, 0.4345],
        [0.6623, 0.3377],
        [0.2421, 0.7579],
        [0.0994, 0.9006],
        [0.1768, 0.8232],
        [0.1227, 0.8773],
        [0.1885, 0.8115],
        [0.8499, 0.1501],
        [0.4819, 0.5181],
        [0.6725, 0.3275],
        [0.3724, 0.6276]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4296, 0.5704],
        [0.2948, 0.7052],
        [0.0602, 0.9398],
        [0.3072, 0.6928],
        [0.2135, 0.7865],
        [0.6011, 0.3989],
        [0.4634, 0.5366],
        [0.1740, 0.8260],
        [0.4413, 0.5587],
        [0.5322, 0.4678],
        [0.3976, 0.6024],
        [0.8661, 0.1339]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3831, 0.6169],
        [0.7674, 0.2326],
        [0.6442, 0.3558],
        [0.5594, 0.4406],
        [0.1737, 0.8263],
        [0.8357, 0.1643],
        [0.6124, 0.3876],
        [0.2710, 0.7290],
        [0.6117, 0.3883],
        [0.2979, 0.7021],
        [0.1070, 0.8930],
        [0.5361, 0.4639]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3709, 0.6291],
        [0.2989, 0.7011],
        [0.2707, 0.7293],
        [0.2170, 0.7830],
        [0.6373, 0.3627],
        [0.3678, 0.6322],
        [0.2654, 0.7346],
        [0.3177, 0.6823],
        [0.1665, 0.8335],
        [0.4659, 0.5341],
        [0.5402, 0.4598],
        [0.3543, 0.6457]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3687, 0.6313],
        [0.6771, 0.3229],
        [0.1049, 0.8951],
        [0.5184, 0.4816],
        [0.6043, 0.3957],
        [0.3209, 0.6791],
        [0.5758, 0.4242],
        [0.4747, 0.5253],
        [0.8503, 0.1497],
        [0.2777, 0.7223],
        [0.5050, 0.4950],
        [0.3457, 0.6543]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3965, 0.6035],
        [0.4140, 0.5860],
        [0.8446, 0.1554],
        [0.2668, 0.7332],
        [0.2319, 0.7681],
        [0.3139, 0.6861],
        [0.5776, 0.4224],
        [0.1847, 0.8153],
        [0.1936, 0.8064],
        [0.3648, 0.6352],
        [0.4443, 0.5557],
        [0.3102, 0.6898]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1357, 0.8643],
        [0.4080, 0.5920],
        [0.3911, 0.6089],
        [0.2035, 0.7965],
        [0.4151, 0.5849],
        [0.4909, 0.5091],
        [0.7112, 0.2888],
        [0.4463, 0.5537],
        [0.1706, 0.8294],
        [0.6122, 0.3878],
        [0.2386, 0.7614],
        [0.5644, 0.4356]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4170, 0.5830],
        [0.6906, 0.3094],
        [0.3010, 0.6990],
        [0.7458, 0.2542],
        [0.6568, 0.3432],
        [0.4304, 0.5696],
        [0.7892, 0.2108],
        [0.2997, 0.7003],
        [0.7013, 0.2987],
        [0.4837, 0.5163],
        [0.4305, 0.5695],
        [0.4496, 0.5504]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4499, 0.5501],
        [0.5215, 0.4785],
        [0.0610, 0.9390],
        [0.1247, 0.8753],
        [0.8162, 0.1838],
        [0.9247, 0.0753],
        [0.2319, 0.7681],
        [0.1783, 0.8217],
        [0.4567, 0.5433],
        [0.3631, 0.6369],
        [0.2341, 0.7659],
        [0.4139, 0.5861]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.8420, 0.1580],
        [0.7146, 0.2854],
        [0.7017, 0.2983],
        [0.4464, 0.5536],
        [0.4118, 0.5882],
        [0.3779, 0.6221],
        [0.3968, 0.6032],
        [0.4883, 0.5117],
        [0.1743, 0.8257],
        [0.3224, 0.6776],
        [0.3934, 0.6066],
        [0.6356, 0.3644]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2867, 0.7133],
        [0.2406, 0.7594],
        [0.5804, 0.4196],
        [0.5503, 0.4497],
        [0.4958, 0.5042],
        [0.5689, 0.4311],
        [0.1984, 0.8016],
        [0.7754, 0.2246],
        [0.5649, 0.4351],
        [0.5202, 0.4798],
        [0.1303, 0.8697],
        [0.8125, 0.1875]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3601, 0.6399],
        [0.3590, 0.6410],
        [0.8142, 0.1858],
        [0.1944, 0.8056],
        [0.2822, 0.7178],
        [0.3980, 0.6020],
        [0.0652, 0.9348],
        [0.3736, 0.6264],
        [0.5265, 0.4735],
        [0.5368, 0.4632],
        [0.3557, 0.6443],
        [0.7827, 0.2173]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4457, 0.5543],
        [0.2624, 0.7376],
        [0.2169, 0.7831],
        [0.5454, 0.4546],
        [0.6729, 0.3271],
        [0.2548, 0.7452],
        [0.2528, 0.7472],
        [0.3452, 0.6548],
        [0.2104, 0.7896],
        [0.6113, 0.3887],
        [0.4716, 0.5284],
        [0.4693, 0.5307]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4237, 0.5763],
        [0.1880, 0.8120],
        [0.4190, 0.5810],
        [0.3284, 0.6716],
        [0.3895, 0.6105],
        [0.3085, 0.6915],
        [0.2503, 0.7497],
        [0.2892, 0.7108],
        [0.3546, 0.6454],
        [0.1851, 0.8149],
        [0.5950, 0.4050],
        [0.1999, 0.8001]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.1207, 0.8793],
        [0.4356, 0.5644],
        [0.3823, 0.6177],
        [0.4985, 0.5015],
        [0.5352, 0.4648],
        [0.2513, 0.7487],
        [0.5525, 0.4475],
        [0.3218, 0.6782],
        [0.5582, 0.4418],
        [0.2830, 0.7170],
        [0.7853, 0.2147],
        [0.5365, 0.4635]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1930, 0.8070],
        [0.3994, 0.6006],
        [0.0530, 0.9470],
        [0.1886, 0.8114],
        [0.5030, 0.4970],
        [0.2319, 0.7681],
        [0.3995, 0.6005],
        [0.2996, 0.7004],
        [0.6618, 0.3382],
        [0.3288, 0.6712],
        [0.3597, 0.6403],
        [0.1511, 0.8489]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3140, 0.6860],
        [0.1494, 0.8506],
        [0.8069, 0.1931],
        [0.2210, 0.7790],
        [0.1715, 0.8285],
        [0.4367, 0.5633],
        [0.4090, 0.5910],
        [0.1406, 0.8594],
        [0.5496, 0.4504],
        [0.3878, 0.6122],
        [0.3899, 0.6101],
        [0.5520, 0.4480]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.2786, 0.7214],
        [0.3745, 0.6255],
        [0.3110, 0.6890],
        [0.2669, 0.7331],
        [0.4576, 0.5424],
        [0.6180, 0.3820],
        [0.9455, 0.0545],
        [0.5698, 0.4302],
        [0.3869, 0.6131],
        [0.2419, 0.7581],
        [0.3044, 0.6956],
        [0.2564, 0.7436]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5114, 0.4886],
        [0.7182, 0.2818],
        [0.6240, 0.3760],
        [0.4248, 0.5752],
        [0.5971, 0.4029],
        [0.2824, 0.7176],
        [0.4292, 0.5708],
        [0.1557, 0.8443],
        [0.3463, 0.6537],
        [0.2466, 0.7534],
        [0.4163, 0.5837],
        [0.5174, 0.4826]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7750, 0.2250],
        [0.4951, 0.5049],
        [0.1448, 0.8552],
        [0.2793, 0.7207],
        [0.6351, 0.3649],
        [0.2351, 0.7649],
        [0.4358, 0.5642],
        [0.3857, 0.6143],
        [0.8386, 0.1614],
        [0.2368, 0.7632],
        [0.3202, 0.6798],
        [0.3582, 0.6418]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.9022, 0.0978],
        [0.5205, 0.4795],
        [0.7463, 0.2537],
        [0.3102, 0.6898],
        [0.2425, 0.7575],
        [0.7727, 0.2273],
        [0.3011, 0.6989],
        [0.5859, 0.4141],
        [0.4124, 0.5876],
        [0.1090, 0.8910],
        [0.2766, 0.7234],
        [0.1210, 0.8790]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3872, 0.6128],
        [0.5023, 0.4977],
        [0.5713, 0.4287],
        [0.8185, 0.1815],
        [0.5746, 0.4254],
        [0.5404, 0.4596],
        [0.3540, 0.6460],
        [0.3701, 0.6299],
        [0.3066, 0.6934],
        [0.1356, 0.8644],
        [0.1715, 0.8285],
        [0.6494, 0.3506]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.6247, 0.3753],
        [0.5871, 0.4129],
        [0.6935, 0.3065],
        [0.2620, 0.7380],
        [0.2885, 0.7115],
        [0.1761, 0.8239],
        [0.2332, 0.7668],
        [0.5052, 0.4948],
        [0.8876, 0.1124],
        [0.0949, 0.9051],
        [0.6792, 0.3208],
        [0.9163, 0.0837]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.6946, 0.3054],
        [0.5187, 0.4813],
        [0.1891, 0.8109],
        [0.5253, 0.4747],
        [0.7946, 0.2054],
        [0.5850, 0.4150],
        [0.1586, 0.8414],
        [0.3974, 0.6026],
        [0.4594, 0.5406],
        [0.2768, 0.7232],
        [0.7136, 0.2864],
        [0.1874, 0.8126]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5052, 0.4948],
        [0.1706, 0.8294],
        [0.7317, 0.2683],
        [0.2797, 0.7203],
        [0.3284, 0.6716],
        [0.6482, 0.3518],
        [0.5106, 0.4894],
        [0.2088, 0.7912],
        [0.5734, 0.4266],
        [0.4939, 0.5061],
        [0.7767, 0.2233],
        [0.8623, 0.1377]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5335, 0.4665],
        [0.3974, 0.6026],
        [0.2331, 0.7669],
        [0.5703, 0.4297],
        [0.3416, 0.6584],
        [0.8341, 0.1659],
        [0.7490, 0.2510],
        [0.7583, 0.2417],
        [0.0789, 0.9211],
        [0.4333, 0.5667],
        [0.7462, 0.2538],
        [0.2817, 0.7183]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.5065, 0.4935],
        [0.3376, 0.6624],
        [0.5155, 0.4845],
        [0.2267, 0.7733],
        [0.7676, 0.2324],
        [0.2444, 0.7556],
        [0.5512, 0.4488],
        [0.4522, 0.5478],
        [0.5004, 0.4996],
        [0.6359, 0.3641],
        [0.6949, 0.3051],
        [0.3243, 0.6757]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.5315, 0.4685],
        [0.3004, 0.6996],
        [0.9504, 0.0496],
        [0.4766, 0.5234],
        [0.0520, 0.9480],
        [0.3681, 0.6319],
        [0.3468, 0.6532],
        [0.0274, 0.9726],
        [0.0371, 0.9629],
        [0.3458, 0.6542],
        [0.2720, 0.7280],
        [0.6894, 0.3106]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3236, 0.6764],
        [0.6020, 0.3980],
        [0.5349, 0.4651],
        [0.3317, 0.6683],
        [0.0799, 0.9201],
        [0.2956, 0.7044],
        [0.5311, 0.4689],
        [0.4657, 0.5343],
        [0.4064, 0.5936],
        [0.6938, 0.3062],
        [0.2153, 0.7847],
        [0.1503, 0.8497]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0742, 0.9258],
        [0.1878, 0.8122],
        [0.8503, 0.1497],
        [0.1547, 0.8453],
        [0.6829, 0.3171],
        [0.2575, 0.7425],
        [0.4506, 0.5494],
        [0.2290, 0.7710],
        [0.2487, 0.7513],
        [0.1255, 0.8745],
        [0.5024, 0.4976],
        [0.0782, 0.9218]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3355, 0.6645],
        [0.4514, 0.5486],
        [0.1826, 0.8174],
        [0.0282, 0.9718],
        [0.6649, 0.3351],
        [0.4301, 0.5699],
        [0.1824, 0.8176],
        [0.1389, 0.8611],
        [0.5776, 0.4224],
        [0.0938, 0.9062],
        [0.1870, 0.8130],
        [0.4025, 0.5975]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.6066, 0.3934],
        [0.4754, 0.5246],
        [0.5991, 0.4009],
        [0.4523, 0.5477],
        [0.3348, 0.6652],
        [0.4328, 0.5672],
        [0.1461, 0.8539],
        [0.1308, 0.8692],
        [0.1886, 0.8114],
        [0.3461, 0.6539],
        [0.2023, 0.7977],
        [0.1112, 0.8888]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3774, 0.6226],
        [0.3805, 0.6195],
        [0.5112, 0.4888],
        [0.3714, 0.6286],
        [0.0951, 0.9049],
        [0.2090, 0.7910],
        [0.2797, 0.7203],
        [0.2987, 0.7013],
        [0.4802, 0.5198],
        [0.5025, 0.4975],
        [0.3961, 0.6039],
        [0.3639, 0.6361]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3985, 0.6015],
        [0.3815, 0.6185],
        [0.5180, 0.4820],
        [0.1913, 0.8087],
        [0.3013, 0.6987],
        [0.0979, 0.9021],
        [0.1263, 0.8737],
        [0.2170, 0.7830],
        [0.9395, 0.0605],
        [0.1567, 0.8433],
        [0.5272, 0.4728],
        [0.9543, 0.0457]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.2117, 0.7883],
        [0.5285, 0.4715],
        [0.0906, 0.9094],
        [0.3388, 0.6612],
        [0.5561, 0.4439],
        [0.5919, 0.4081],
        [0.1846, 0.8154],
        [0.4554, 0.5446],
        [0.5101, 0.4899],
        [0.3818, 0.6182],
        [0.1256, 0.8744],
        [0.9125, 0.0875]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1851, 0.8149],
        [0.5344, 0.4656],
        [0.7103, 0.2897],
        [0.7525, 0.2475],
        [0.3882, 0.6118],
        [0.7217, 0.2783],
        [0.8161, 0.1839],
        [0.7664, 0.2336],
        [0.8358, 0.1642],
        [0.1456, 0.8544],
        [0.5532, 0.4468],
        [0.7397, 0.2603]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.8218, 0.1782],
        [0.7179, 0.2821],
        [0.1638, 0.8362],
        [0.4913, 0.5087],
        [0.6606, 0.3394],
        [0.5500, 0.4500],
        [0.6349, 0.3651],
        [0.2679, 0.7322],
        [0.3169, 0.6831],
        [0.2051, 0.7949],
        [0.3787, 0.6213],
        [0.7976, 0.2024]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3746, 0.6254],
        [0.7983, 0.2017],
        [0.4919, 0.5081],
        [0.7268, 0.2732],
        [0.2913, 0.7087],
        [0.3102, 0.6898],
        [0.7676, 0.2324],
        [0.2845, 0.7155],
        [0.5317, 0.4683],
        [0.1357, 0.8643],
        [0.5636, 0.4364],
        [0.1286, 0.8714]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.2850, 0.7150],
        [0.5895, 0.4105],
        [0.9472, 0.0528],
        [0.7941, 0.2059],
        [0.2781, 0.7219],
        [0.3335, 0.6665],
        [0.3848, 0.6152],
        [0.1251, 0.8749],
        [0.1952, 0.8048],
        [0.6315, 0.3685],
        [0.8459, 0.1541],
        [0.1847, 0.8153]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5250, 0.4750],
        [0.5946, 0.4054],
        [0.6464, 0.3536],
        [0.3142, 0.6858],
        [0.9267, 0.0733],
        [0.7381, 0.2619],
        [0.9344, 0.0656],
        [0.8053, 0.1947],
        [0.5198, 0.4802],
        [0.5758, 0.4242],
        [0.1540, 0.8460],
        [0.5389, 0.4611]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4725, 0.5275],
        [0.5136, 0.4864],
        [0.5120, 0.4880],
        [0.8249, 0.1751],
        [0.4617, 0.5383],
        [0.9289, 0.0711],
        [0.1602, 0.8398],
        [0.5818, 0.4182],
        [0.6016, 0.3984],
        [0.4938, 0.5062],
        [0.4318, 0.5682],
        [0.3380, 0.6620]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.6394, 0.3606],
        [0.3343, 0.6657],
        [0.4992, 0.5008],
        [0.4723, 0.5277],
        [0.5137, 0.4863],
        [0.4657, 0.5343],
        [0.3960, 0.6040],
        [0.1691, 0.8309],
        [0.3656, 0.6344],
        [0.3721, 0.6279],
        [0.4553, 0.5447],
        [0.4373, 0.5627]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.9234, 0.0766],
        [0.1029, 0.8971],
        [0.6285, 0.3715],
        [0.6157, 0.3843],
        [0.9553, 0.0447],
        [0.8529, 0.1471],
        [0.5057, 0.4943],
        [0.4642, 0.5358],
        [0.5767, 0.4233],
        [0.5509, 0.4491],
        [0.5537, 0.4463],
        [0.5373, 0.4627]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2560, 0.7440],
        [0.4423, 0.5577],
        [0.5480, 0.4520],
        [0.5924, 0.4076],
        [0.4373, 0.5627],
        [0.4693, 0.5307],
        [0.3107, 0.6893],
        [0.1881, 0.8119],
        [0.5788, 0.4212],
        [0.6151, 0.3849],
        [0.2733, 0.7267],
        [0.7376, 0.2624]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.2020, 0.7980],
        [0.6426, 0.3574],
        [0.4650, 0.5350],
        [0.4062, 0.5938],
        [0.0731, 0.9269],
        [0.3832, 0.6168],
        [0.1423, 0.8577],
        [0.1236, 0.8764],
        [0.9371, 0.0629],
        [0.3799, 0.6201],
        [0.4289, 0.5711],
        [0.8359, 0.1641]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1413, 0.8587],
        [0.2265, 0.7735],
        [0.1001, 0.8999],
        [0.6930, 0.3070],
        [0.3989, 0.6011],
        [0.4214, 0.5786],
        [0.7541, 0.2459],
        [0.5142, 0.4858],
        [0.4482, 0.5518],
        [0.4348, 0.5652],
        [0.5759, 0.4241],
        [0.8142, 0.1858]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5919, 0.4081],
        [0.5422, 0.4578],
        [0.5058, 0.4942],
        [0.8636, 0.1364],
        [0.2423, 0.7577],
        [0.3646, 0.6354],
        [0.3304, 0.6696],
        [0.1317, 0.8683],
        [0.7595, 0.2405],
        [0.0574, 0.9426],
        [0.2120, 0.7880],
        [0.1156, 0.8844]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3282, 0.6718],
        [0.6052, 0.3948],
        [0.1682, 0.8318],
        [0.2326, 0.7674],
        [0.4606, 0.5394],
        [0.3841, 0.6159],
        [0.3642, 0.6358],
        [0.1211, 0.8789],
        [0.4687, 0.5313],
        [0.1575, 0.8425],
        [0.2764, 0.7236],
        [0.2372, 0.7628]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3176, 0.6824],
        [0.8492, 0.1508],
        [0.5583, 0.4417],
        [0.8711, 0.1289],
        [0.8683, 0.1317],
        [0.5412, 0.4588],
        [0.8430, 0.1570],
        [0.6128, 0.3872],
        [0.5447, 0.4553],
        [0.1829, 0.8171],
        [0.9549, 0.0451],
        [0.6599, 0.3401]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1236, 0.8764],
        [0.1503, 0.8497],
        [0.8596, 0.1404],
        [0.6457, 0.3543],
        [0.3382, 0.6618],
        [0.5774, 0.4226],
        [0.4263, 0.5737],
        [0.0202, 0.9798],
        [0.1200, 0.8800],
        [0.2999, 0.7001],
        [0.6370, 0.3630],
        [0.3636, 0.6364]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1476, 0.8524],
        [0.7911, 0.2089],
        [0.1012, 0.8988],
        [0.0655, 0.9345],
        [0.3738, 0.6262],
        [0.0721, 0.9279],
        [0.7226, 0.2774],
        [0.7496, 0.2504],
        [0.5260, 0.4740],
        [0.6114, 0.3886],
        [0.0698, 0.9302],
        [0.4513, 0.5487]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.6374, 0.3626],
        [0.5008, 0.4992],
        [0.1911, 0.8089],
        [0.1383, 0.8617],
        [0.9161, 0.0839],
        [0.1746, 0.8254],
        [0.1928, 0.8072],
        [0.2219, 0.7781],
        [0.2061, 0.7939],
        [0.0877, 0.9123],
        [0.2414, 0.7586],
        [0.0562, 0.9438]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.8986, 0.1014],
        [0.7556, 0.2444],
        [0.1570, 0.8430],
        [0.0534, 0.9466],
        [0.5325, 0.4675],
        [0.2959, 0.7041],
        [0.1667, 0.8333],
        [0.3956, 0.6044],
        [0.3409, 0.6591],
        [0.3782, 0.6218],
        [0.4378, 0.5622],
        [0.6086, 0.3914]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.7344, 0.2656],
        [0.3071, 0.6929],
        [0.5978, 0.4022],
        [0.6189, 0.3811],
        [0.4550, 0.5450],
        [0.5718, 0.4282],
        [0.4441, 0.5559],
        [0.1029, 0.8971],
        [0.5117, 0.4883],
        [0.0544, 0.9456],
        [0.1419, 0.8581],
        [0.0469, 0.9531]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5401, 0.4599],
        [0.5826, 0.4174],
        [0.5833, 0.4167],
        [0.6596, 0.3404],
        [0.8214, 0.1786],
        [0.1873, 0.8127],
        [0.3284, 0.6716],
        [0.8329, 0.1671],
        [0.6904, 0.3096],
        [0.2736, 0.7264],
        [0.1876, 0.8124],
        [0.5160, 0.4840]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3324, 0.6676],
        [0.5404, 0.4596],
        [0.1684, 0.8316],
        [0.3355, 0.6645],
        [0.0915, 0.9085],
        [0.1665, 0.8335],
        [0.0743, 0.9257],
        [0.0688, 0.9312],
        [0.8658, 0.1342],
        [0.1086, 0.8914],
        [0.8788, 0.1212],
        [0.5925, 0.4075]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.2700, 0.7300],
        [0.4612, 0.5388],
        [0.1597, 0.8403],
        [0.6311, 0.3689],
        [0.4035, 0.5965],
        [0.4124, 0.5876],
        [0.2402, 0.7598],
        [0.1790, 0.8210],
        [0.5891, 0.4109],
        [0.6497, 0.3503],
        [0.8763, 0.1237],
        [0.5669, 0.4331]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.7151, 0.2849],
        [0.2617, 0.7383],
        [0.4918, 0.5082],
        [0.3584, 0.6416],
        [0.4356, 0.5644],
        [0.5447, 0.4553],
        [0.5900, 0.4100],
        [0.3055, 0.6945],
        [0.7044, 0.2956],
        [0.0794, 0.9206],
        [0.3220, 0.6780],
        [0.6163, 0.3837]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.2806, 0.7194],
        [0.8379, 0.1621],
        [0.3478, 0.6522],
        [0.6729, 0.3271],
        [0.3854, 0.6146],
        [0.7616, 0.2384],
        [0.2012, 0.7988],
        [0.6297, 0.3703],
        [0.5863, 0.4137],
        [0.6522, 0.3478],
        [0.2884, 0.7116],
        [0.2881, 0.7119]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.6107, 0.3893],
        [0.1922, 0.8078],
        [0.7809, 0.2191],
        [0.4284, 0.5716],
        [0.9211, 0.0789],
        [0.4961, 0.5039],
        [0.2797, 0.7203],
        [0.3117, 0.6883],
        [0.4192, 0.5808],
        [0.1962, 0.8038],
        [0.3117, 0.6883],
        [0.6294, 0.3706]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1304, 0.8696],
        [0.7067, 0.2933],
        [0.8100, 0.1900],
        [0.4985, 0.5015],
        [0.0708, 0.9292],
        [0.4421, 0.5579],
        [0.0320, 0.9680],
        [0.0246, 0.9754],
        [0.1361, 0.8639],
        [0.0872, 0.9128],
        [0.1579, 0.8421],
        [0.6724, 0.3276]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.2364, 0.7636],
        [0.6963, 0.3037],
        [0.4312, 0.5688],
        [0.8695, 0.1305],
        [0.0970, 0.9030],
        [0.2897, 0.7103],
        [0.5857, 0.4143],
        [0.6220, 0.3780],
        [0.3173, 0.6827],
        [0.5645, 0.4355],
        [0.0620, 0.9380],
        [0.2283, 0.7717]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0358, 0.9642],
        [0.5808, 0.4192],
        [0.1635, 0.8365],
        [0.3766, 0.6234],
        [0.2897, 0.7103],
        [0.0655, 0.9345],
        [0.2275, 0.7725],
        [0.4762, 0.5238],
        [0.2647, 0.7353],
        [0.4889, 0.5111],
        [0.6131, 0.3869],
        [0.4633, 0.5367]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5176, 0.4824],
        [0.7330, 0.2670],
        [0.5073, 0.4927],
        [0.0635, 0.9365],
        [0.8497, 0.1503],
        [0.1136, 0.8864],
        [0.2586, 0.7414],
        [0.3038, 0.6962],
        [0.4529, 0.5471],
        [0.8107, 0.1893],
        [0.5433, 0.4567],
        [0.2707, 0.7293]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.2060, 0.7940],
        [0.2760, 0.7240],
        [0.5029, 0.4971],
        [0.8729, 0.1271],
        [0.0562, 0.9438],
        [0.7210, 0.2790],
        [0.1825, 0.8175],
        [0.0617, 0.9383],
        [0.1335, 0.8665],
        [0.4502, 0.5498],
        [0.1649, 0.8351],
        [0.0166, 0.9834]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.7272, 0.2728],
        [0.6011, 0.3989],
        [0.4244, 0.5756],
        [0.1145, 0.8855],
        [0.5346, 0.4654],
        [0.3650, 0.6350],
        [0.2539, 0.7461],
        [0.0838, 0.9162],
        [0.8919, 0.1081],
        [0.2351, 0.7649],
        [0.1038, 0.8962],
        [0.1454, 0.8546]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.2222, 0.7778],
        [0.4745, 0.5255],
        [0.6039, 0.3961],
        [0.2029, 0.7971],
        [0.1361, 0.8639],
        [0.5031, 0.4969],
        [0.3494, 0.6506],
        [0.2049, 0.7951],
        [0.9390, 0.0610],
        [0.3119, 0.6881],
        [0.6868, 0.3132],
        [0.4132, 0.5868]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0349, 0.9651],
        [0.0702, 0.9298],
        [0.0878, 0.9122],
        [0.8336, 0.1664],
        [0.9736, 0.0264],
        [0.2821, 0.7179],
        [0.2941, 0.7059],
        [0.2289, 0.7711],
        [0.7903, 0.2097],
        [0.2281, 0.7719],
        [0.5921, 0.4079],
        [0.6561, 0.3439]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2809, 0.7191],
        [0.2890, 0.7110],
        [0.7017, 0.2983],
        [0.4798, 0.5202],
        [0.3606, 0.6394],
        [0.3644, 0.6356],
        [0.7410, 0.2590],
        [0.1426, 0.8574],
        [0.8695, 0.1305],
        [0.1106, 0.8894],
        [0.4613, 0.5387],
        [0.0980, 0.9020]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.8890, 0.1110],
        [0.8531, 0.1469],
        [0.2236, 0.7764],
        [0.2707, 0.7293],
        [0.5950, 0.4050],
        [0.4459, 0.5541],
        [0.8208, 0.1792],
        [0.2810, 0.7190],
        [0.2027, 0.7973],
        [0.6023, 0.3977],
        [0.0950, 0.9050],
        [0.6235, 0.3765]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.5847, 0.4153],
        [0.8379, 0.1621],
        [0.3727, 0.6273],
        [0.4486, 0.5514],
        [0.7670, 0.2330],
        [0.6379, 0.3621],
        [0.5291, 0.4709],
        [0.9601, 0.0399],
        [0.4491, 0.5509],
        [0.1106, 0.8894],
        [0.8002, 0.1998],
        [0.0624, 0.9376]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1087, 0.8913],
        [0.2387, 0.7613],
        [0.8948, 0.1052],
        [0.6339, 0.3661],
        [0.3089, 0.6911],
        [0.4652, 0.5348],
        [0.2205, 0.7795],
        [0.0601, 0.9399],
        [0.3050, 0.6950],
        [0.0918, 0.9082],
        [0.7373, 0.2627],
        [0.8328, 0.1672]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3155, 0.6845],
        [0.3668, 0.6332],
        [0.2509, 0.7491],
        [0.1750, 0.8250],
        [0.1105, 0.8895],
        [0.1097, 0.8903],
        [0.2301, 0.7699],
        [0.7384, 0.2616],
        [0.0957, 0.9043],
        [0.3236, 0.6764],
        [0.1748, 0.8252],
        [0.0966, 0.9034]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.2152, 0.7848],
        [0.1676, 0.8324],
        [0.6976, 0.3024],
        [0.7392, 0.2608],
        [0.9070, 0.0930],
        [0.7623, 0.2377],
        [0.0454, 0.9546],
        [0.7988, 0.2012],
        [0.0794, 0.9206],
        [0.7690, 0.2310],
        [0.8208, 0.1792],
        [0.5187, 0.4813]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.8960, 0.1040],
        [0.9059, 0.0941],
        [0.0775, 0.9225],
        [0.0648, 0.9352],
        [0.4960, 0.5040],
        [0.2001, 0.7999],
        [0.2508, 0.7492],
        [0.1041, 0.8959],
        [0.3334, 0.6666],
        [0.3681, 0.6319],
        [0.3482, 0.6518],
        [0.5883, 0.4117]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5803, 0.4197],
        [0.0444, 0.9556],
        [0.3558, 0.6442],
        [0.2418, 0.7582],
        [0.2549, 0.7451],
        [0.3377, 0.6623],
        [0.3149, 0.6851],
        [0.2761, 0.7239],
        [0.1118, 0.8882],
        [0.6227, 0.3773],
        [0.0518, 0.9482],
        [0.0283, 0.9717]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.8647, 0.1353],
        [0.4189, 0.5811],
        [0.1211, 0.8789],
        [0.8792, 0.1208],
        [0.4842, 0.5158],
        [0.7776, 0.2224],
        [0.1118, 0.8882],
        [0.0755, 0.9245],
        [0.4911, 0.5089],
        [0.4840, 0.5160],
        [0.1260, 0.8740],
        [0.3854, 0.6146]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.6619, 0.3381],
        [0.5935, 0.4065],
        [0.2006, 0.7994],
        [0.0620, 0.9380],
        [0.2036, 0.7964],
        [0.6428, 0.3572],
        [0.1594, 0.8406],
        [0.0835, 0.9165],
        [0.9275, 0.0725],
        [0.5143, 0.4857],
        [0.4200, 0.5800],
        [0.9688, 0.0312]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0618, 0.9382],
        [0.0106, 0.9894],
        [0.0397, 0.9603],
        [0.3008, 0.6992],
        [0.7803, 0.2197],
        [0.0211, 0.9789],
        [0.0371, 0.9629],
        [0.0653, 0.9347],
        [0.2759, 0.7241],
        [0.2542, 0.7458],
        [0.3557, 0.6443],
        [0.1194, 0.8806]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0538, 0.9462],
        [0.1662, 0.8338],
        [0.0801, 0.9199],
        [0.0662, 0.9338],
        [0.0740, 0.9260],
        [0.1055, 0.8945],
        [0.1697, 0.8303],
        [0.0235, 0.9765],
        [0.2285, 0.7715],
        [0.0399, 0.9601],
        [0.2051, 0.7949],
        [0.1571, 0.8429]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.1937, 0.8063],
        [0.3207, 0.6793],
        [0.0284, 0.9716],
        [0.3998, 0.6002],
        [0.7161, 0.2839],
        [0.3068, 0.6932],
        [0.1898, 0.8102],
        [0.0745, 0.9255],
        [0.0513, 0.9487],
        [0.1894, 0.8106],
        [0.0603, 0.9397],
        [0.3294, 0.6706]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3100, 0.6900],
        [0.4695, 0.5305],
        [0.4131, 0.5869],
        [0.5368, 0.4632],
        [0.5090, 0.4910],
        [0.3476, 0.6524],
        [0.8192, 0.1808],
        [0.8469, 0.1531],
        [0.3764, 0.6236],
        [0.2650, 0.7350],
        [0.3841, 0.6159],
        [0.5990, 0.4010]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3250, 0.6750],
        [0.2799, 0.7201],
        [0.6183, 0.3817],
        [0.2158, 0.7842],
        [0.2296, 0.7704],
        [0.5644, 0.4356],
        [0.1666, 0.8334],
        [0.0210, 0.9790],
        [0.5271, 0.4729],
        [0.0785, 0.9215],
        [0.5110, 0.4890],
        [0.6618, 0.3382]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1332, 0.8668],
        [0.2048, 0.7952],
        [0.3366, 0.6634],
        [0.0569, 0.9431],
        [0.5583, 0.4417],
        [0.0753, 0.9247],
        [0.7882, 0.2118],
        [0.8272, 0.1728],
        [0.1873, 0.8127],
        [0.2463, 0.7537],
        [0.1280, 0.8720],
        [0.1186, 0.8814]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1463, 0.8537],
        [0.4608, 0.5392],
        [0.1680, 0.8320],
        [0.1775, 0.8225],
        [0.4249, 0.5751],
        [0.1243, 0.8757],
        [0.2585, 0.7415],
        [0.1067, 0.8933],
        [0.3716, 0.6284],
        [0.1078, 0.8922],
        [0.4974, 0.5026],
        [0.7567, 0.2433]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4876, 0.5124],
        [0.2295, 0.7705],
        [0.0611, 0.9389],
        [0.0224, 0.9776],
        [0.4612, 0.5388],
        [0.2436, 0.7564],
        [0.2417, 0.7583],
        [0.3506, 0.6494],
        [0.4384, 0.5616],
        [0.3921, 0.6079],
        [0.2087, 0.7913],
        [0.5788, 0.4212]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.1302, 0.8698],
        [0.5803, 0.4197],
        [0.4789, 0.5211],
        [0.6681, 0.3319],
        [0.1280, 0.8720],
        [0.3707, 0.6293],
        [0.4820, 0.5180],
        [0.2253, 0.7747],
        [0.5600, 0.4400],
        [0.2090, 0.7910],
        [0.0173, 0.9827],
        [0.0973, 0.9027]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.7890, 0.2110],
        [0.3019, 0.6981],
        [0.0257, 0.9743],
        [0.2736, 0.7264],
        [0.7627, 0.2373],
        [0.6947, 0.3053],
        [0.0403, 0.9597],
        [0.1005, 0.8995],
        [0.7504, 0.2496],
        [0.5151, 0.4849],
        [0.0374, 0.9626],
        [0.7493, 0.2507]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.9278, 0.0722],
        [0.7408, 0.2592],
        [0.8814, 0.1186],
        [0.4552, 0.5448],
        [0.1018, 0.8982],
        [0.1820, 0.8180],
        [0.0852, 0.9148],
        [0.0165, 0.9835],
        [0.9765, 0.0235],
        [0.3654, 0.6346],
        [0.5365, 0.4635],
        [0.6524, 0.3476]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.2975, 0.7025],
        [0.1172, 0.8828],
        [0.0588, 0.9412],
        [0.8055, 0.1945],
        [0.7554, 0.2446],
        [0.1591, 0.8409],
        [0.1468, 0.8532],
        [0.2548, 0.7452],
        [0.9726, 0.0274],
        [0.8365, 0.1635],
        [0.8514, 0.1486],
        [0.8572, 0.1428]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.6155, 0.3845],
        [0.4886, 0.5114],
        [0.1619, 0.8381],
        [0.4143, 0.5857],
        [0.4091, 0.5909],
        [0.3639, 0.6361],
        [0.2508, 0.7492],
        [0.5803, 0.4197],
        [0.8602, 0.1398],
        [0.0945, 0.9055],
        [0.3416, 0.6584],
        [0.1288, 0.8712]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.2277, 0.7723],
        [0.9725, 0.0275],
        [0.0915, 0.9085],
        [0.3184, 0.6816],
        [0.7222, 0.2778],
        [0.9223, 0.0777],
        [0.3845, 0.6155],
        [0.4975, 0.5025],
        [0.1326, 0.8674],
        [0.1263, 0.8737],
        [0.0389, 0.9611],
        [0.3026, 0.6974]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.6897, 0.3103],
        [0.3029, 0.6971],
        [0.1725, 0.8275],
        [0.1816, 0.8184],
        [0.6891, 0.3109],
        [0.2826, 0.7174],
        [0.7400, 0.2600],
        [0.0883, 0.9117],
        [0.2981, 0.7019],
        [0.2631, 0.7369],
        [0.8806, 0.1194],
        [0.2223, 0.7777]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0430, 0.9570],
        [0.8026, 0.1974],
        [0.8662, 0.1338],
        [0.1240, 0.8760],
        [0.7847, 0.2153],
        [0.6731, 0.3269],
        [0.1031, 0.8969],
        [0.0109, 0.9891],
        [0.7110, 0.2890],
        [0.0466, 0.9534],
        [0.7670, 0.2330],
        [0.4507, 0.5493]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1025, 0.8975],
        [0.4419, 0.5581],
        [0.0980, 0.9020],
        [0.3683, 0.6317],
        [0.4195, 0.5805],
        [0.3039, 0.6961],
        [0.5685, 0.4315],
        [0.8363, 0.1637],
        [0.5443, 0.4557],
        [0.5868, 0.4132],
        [0.7113, 0.2887],
        [0.2425, 0.7575]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0440, 0.9560],
        [0.0551, 0.9449],
        [0.2597, 0.7403],
        [0.3180, 0.6820],
        [0.3603, 0.6397],
        [0.3909, 0.6091],
        [0.2916, 0.7084],
        [0.7733, 0.2267],
        [0.0822, 0.9178],
        [0.3447, 0.6553],
        [0.9553, 0.0447],
        [0.5515, 0.4485]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.9119, 0.0881],
        [0.2560, 0.7440],
        [0.1233, 0.8767],
        [0.0460, 0.9540],
        [0.0461, 0.9539],
        [0.0458, 0.9542],
        [0.6535, 0.3465],
        [0.1383, 0.8617],
        [0.9212, 0.0788],
        [0.0558, 0.9442],
        [0.0539, 0.9461],
        [0.5917, 0.4083]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.8065, 0.1935],
        [0.4209, 0.5791],
        [0.2878, 0.7122],
        [0.2887, 0.7113],
        [0.0860, 0.9140],
        [0.5040, 0.4960],
        [0.6234, 0.3766],
        [0.1798, 0.8202],
        [0.2083, 0.7917],
        [0.7295, 0.2705],
        [0.1524, 0.8476],
        [0.0120, 0.9880]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2983, 0.7017],
        [0.2728, 0.7272],
        [0.2622, 0.7378],
        [0.2774, 0.7226],
        [0.3438, 0.6562],
        [0.9226, 0.0774],
        [0.1165, 0.8835],
        [0.0474, 0.9526],
        [0.9545, 0.0455],
        [0.2225, 0.7775],
        [0.0070, 0.9930],
        [0.3341, 0.6659]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3977, 0.6023],
        [0.0711, 0.9289],
        [0.4245, 0.5755],
        [0.4244, 0.5756],
        [0.0856, 0.9144],
        [0.2490, 0.7510],
        [0.0897, 0.9103],
        [0.0194, 0.9806],
        [0.6657, 0.3343],
        [0.3203, 0.6797],
        [0.1457, 0.8543],
        [0.7445, 0.2555]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1378, 0.8622],
        [0.1030, 0.8970],
        [0.0563, 0.9437],
        [0.0342, 0.9658],
        [0.8646, 0.1354],
        [0.0575, 0.9425],
        [0.2692, 0.7308],
        [0.1479, 0.8521],
        [0.9336, 0.0664],
        [0.2935, 0.7065],
        [0.7638, 0.2362],
        [0.5037, 0.4963]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0923, 0.9077],
        [0.4234, 0.5766],
        [0.2809, 0.7191],
        [0.3407, 0.6593],
        [0.3378, 0.6622],
        [0.8957, 0.1043],
        [0.1836, 0.8164],
        [0.0232, 0.9768],
        [0.9163, 0.0837],
        [0.0161, 0.9839],
        [0.0774, 0.9226],
        [0.2168, 0.7832]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.1122, 0.8878],
        [0.8425, 0.1575],
        [0.6342, 0.3658],
        [0.3857, 0.6143],
        [0.8177, 0.1823],
        [0.5002, 0.4998],
        [0.6668, 0.3332],
        [0.5172, 0.4828],
        [0.1757, 0.8243],
        [0.6100, 0.3900],
        [0.4409, 0.5591],
        [0.8606, 0.1394]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.6851, 0.3149],
        [0.7283, 0.2717],
        [0.9238, 0.0762],
        [0.2208, 0.7792],
        [0.0783, 0.9217],
        [0.9082, 0.0918],
        [0.7640, 0.2360],
        [0.9429, 0.0571],
        [0.6768, 0.3232],
        [0.4756, 0.5244],
        [0.8992, 0.1008],
        [0.0488, 0.9512]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0528, 0.9472],
        [0.7543, 0.2457],
        [0.2236, 0.7764],
        [0.9167, 0.0833],
        [0.7130, 0.2870],
        [0.9679, 0.0321],
        [0.2118, 0.7882],
        [0.0061, 0.9939],
        [0.8201, 0.1799],
        [0.0207, 0.9793],
        [0.2628, 0.7372],
        [0.3077, 0.6923]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.2212, 0.7788],
        [0.3443, 0.6557],
        [0.1698, 0.8302],
        [0.2576, 0.7424],
        [0.1759, 0.8241],
        [0.5663, 0.4337],
        [0.8361, 0.1639],
        [0.0910, 0.9090],
        [0.2037, 0.7963],
        [0.6256, 0.3744],
        [0.0264, 0.9736],
        [0.0454, 0.9546]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3571, 0.6429],
        [0.0491, 0.9509],
        [0.1510, 0.8490],
        [0.1489, 0.8511],
        [0.8883, 0.1117],
        [0.0589, 0.9411],
        [0.0554, 0.9446],
        [0.8021, 0.1979],
        [0.2958, 0.7042],
        [0.4521, 0.5479],
        [0.8860, 0.1140],
        [0.0591, 0.9409]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5161, 0.4839],
        [0.1418, 0.8582],
        [0.1553, 0.8447],
        [0.0157, 0.9843],
        [0.0407, 0.9593],
        [0.0448, 0.9552],
        [0.3365, 0.6635],
        [0.0404, 0.9596],
        [0.6313, 0.3687],
        [0.0220, 0.9780],
        [0.0701, 0.9299],
        [0.0962, 0.9038]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3495, 0.6505],
        [0.1842, 0.8158],
        [0.3124, 0.6876],
        [0.2263, 0.7737],
        [0.2632, 0.7368],
        [0.2584, 0.7416],
        [0.3480, 0.6520],
        [0.2013, 0.7987],
        [0.1471, 0.8529],
        [0.5383, 0.4617],
        [0.0877, 0.9123],
        [0.0129, 0.9871]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5862, 0.4138],
        [0.1764, 0.8236],
        [0.3013, 0.6987],
        [0.1686, 0.8314],
        [0.4142, 0.5858],
        [0.1059, 0.8941],
        [0.0572, 0.9428],
        [0.0974, 0.9026],
        [0.2938, 0.7062],
        [0.2204, 0.7796],
        [0.0805, 0.9195],
        [0.0178, 0.9822]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3016, 0.6984],
        [0.1464, 0.8536],
        [0.4208, 0.5792],
        [0.1106, 0.8894],
        [0.0840, 0.9160],
        [0.0607, 0.9393],
        [0.0285, 0.9715],
        [0.0896, 0.9104],
        [0.6110, 0.3890],
        [0.0231, 0.9769],
        [0.9418, 0.0582],
        [0.2269, 0.7731]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1750, 0.8250],
        [0.0234, 0.9766],
        [0.0425, 0.9575],
        [0.2150, 0.7850],
        [0.8882, 0.1118],
        [0.0264, 0.9736],
        [0.0715, 0.9285],
        [0.1082, 0.8918],
        [0.9393, 0.0607],
        [0.3345, 0.6655],
        [0.4118, 0.5882],
        [0.3595, 0.6405]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0907, 0.9093],
        [0.5846, 0.4154],
        [0.0588, 0.9412],
        [0.0641, 0.9359],
        [0.1597, 0.8403],
        [0.5939, 0.4061],
        [0.8492, 0.1508],
        [0.0201, 0.9799],
        [0.3528, 0.6472],
        [0.1959, 0.8041],
        [0.0878, 0.9122],
        [0.0387, 0.9613]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.1444, 0.8556],
        [0.2542, 0.7458],
        [0.2350, 0.7650],
        [0.0901, 0.9099],
        [0.9623, 0.0377],
        [0.2325, 0.7675],
        [0.3127, 0.6873],
        [0.1579, 0.8421],
        [0.5780, 0.4220],
        [0.1310, 0.8690],
        [0.3823, 0.6177],
        [0.4935, 0.5065]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.8247, 0.1753],
        [0.5894, 0.4106],
        [0.5806, 0.4194],
        [0.5915, 0.4085],
        [0.7012, 0.2988],
        [0.9398, 0.0602],
        [0.1715, 0.8285],
        [0.7353, 0.2647],
        [0.6347, 0.3653],
        [0.2524, 0.7476],
        [0.6909, 0.3091],
        [0.0296, 0.9704]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.6313, 0.3687],
        [0.6430, 0.3570],
        [0.8643, 0.1357],
        [0.9852, 0.0148],
        [0.9458, 0.0542],
        [0.8657, 0.1343],
        [0.4314, 0.5686],
        [0.0145, 0.9855],
        [0.5078, 0.4922],
        [0.2438, 0.7562],
        [0.6256, 0.3744],
        [0.0506, 0.9494]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5045, 0.4955],
        [0.0254, 0.9746],
        [0.6437, 0.3563],
        [0.1154, 0.8846],
        [0.5284, 0.4716],
        [0.0571, 0.9429],
        [0.9755, 0.0245],
        [0.4013, 0.5987],
        [0.4276, 0.5724],
        [0.2872, 0.7128],
        [0.1192, 0.8808],
        [0.2361, 0.7639]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.2731, 0.7269],
        [0.1907, 0.8093],
        [0.2267, 0.7733],
        [0.7329, 0.2671],
        [0.6545, 0.3455],
        [0.0180, 0.9820],
        [0.9616, 0.0384],
        [0.4891, 0.5109],
        [0.6912, 0.3088],
        [0.1334, 0.8666],
        [0.5152, 0.4848],
        [0.7907, 0.2093]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.8626, 0.1374],
        [0.8453, 0.1547],
        [0.0522, 0.9478],
        [0.0083, 0.9917],
        [0.1935, 0.8065],
        [0.0268, 0.9732],
        [0.1157, 0.8843],
        [0.6268, 0.3732],
        [0.6511, 0.3489],
        [0.5777, 0.4223],
        [0.1868, 0.8132],
        [0.5200, 0.4800]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5533, 0.4467],
        [0.4032, 0.5968],
        [0.9188, 0.0812],
        [0.0487, 0.9513],
        [0.4086, 0.5914],
        [0.4989, 0.5011],
        [0.1672, 0.8328],
        [0.0825, 0.9175],
        [0.0518, 0.9482],
        [0.2578, 0.7422],
        [0.0312, 0.9688],
        [0.0103, 0.9897]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.7072, 0.2928],
        [0.2383, 0.7617],
        [0.1676, 0.8324],
        [0.3053, 0.6947],
        [0.1180, 0.8820],
        [0.3871, 0.6129],
        [0.0303, 0.9697],
        [0.0487, 0.9513],
        [0.5752, 0.4248],
        [0.7791, 0.2209],
        [0.0141, 0.9859],
        [0.1322, 0.8678]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.7976, 0.2024],
        [0.2101, 0.7899],
        [0.1354, 0.8646],
        [0.2570, 0.7430],
        [0.0675, 0.9325],
        [0.3824, 0.6176],
        [0.1403, 0.8597],
        [0.0086, 0.9914],
        [0.9122, 0.0878],
        [0.0816, 0.9184],
        [0.9349, 0.0651],
        [0.7074, 0.2926]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0835, 0.9165],
        [0.1141, 0.8859],
        [0.3408, 0.6592],
        [0.1533, 0.8467],
        [0.1579, 0.8421],
        [0.0728, 0.9272],
        [0.0157, 0.9843],
        [0.0369, 0.9631],
        [0.9619, 0.0381],
        [0.2048, 0.7952],
        [0.3452, 0.6548],
        [0.9234, 0.0766]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2840, 0.7160],
        [0.9053, 0.0947],
        [0.1270, 0.8730],
        [0.0189, 0.9811],
        [0.0358, 0.9642],
        [0.7854, 0.2146],
        [0.8958, 0.1042],
        [0.0147, 0.9853],
        [0.2546, 0.7454],
        [0.0684, 0.9316],
        [0.3700, 0.6300],
        [0.2119, 0.7881]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3431, 0.6569],
        [0.4984, 0.5016],
        [0.0569, 0.9431],
        [0.5543, 0.4457],
        [0.2817, 0.7183],
        [0.8874, 0.1126],
        [0.1354, 0.8646],
        [0.3897, 0.6103],
        [0.5499, 0.4501],
        [0.4140, 0.5860],
        [0.0669, 0.9331],
        [0.3097, 0.6903]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0717, 0.9283],
        [0.1194, 0.8806],
        [0.5073, 0.4927],
        [0.7989, 0.2011],
        [0.3947, 0.6053],
        [0.9058, 0.0942],
        [0.8964, 0.1036],
        [0.3555, 0.6445],
        [0.7291, 0.2709],
        [0.1265, 0.8735],
        [0.1530, 0.8470],
        [0.1240, 0.8760]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1755, 0.8245],
        [0.3550, 0.6450],
        [0.9555, 0.0445],
        [0.2069, 0.7931],
        [0.9792, 0.0208],
        [0.9505, 0.0495],
        [0.3167, 0.6833],
        [0.0391, 0.9609],
        [0.9910, 0.0090],
        [0.2942, 0.7058],
        [0.8351, 0.1649],
        [0.2444, 0.7556]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4041, 0.5959],
        [0.2699, 0.7301],
        [0.4150, 0.5850],
        [0.6374, 0.3626],
        [0.3739, 0.6261],
        [0.2860, 0.7140],
        [0.6806, 0.3194],
        [0.7620, 0.2380],
        [0.2375, 0.7625],
        [0.6337, 0.3663],
        [0.2625, 0.7375],
        [0.2205, 0.7795]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0835, 0.9165],
        [0.8410, 0.1590],
        [0.3611, 0.6389],
        [0.7161, 0.2839],
        [0.4199, 0.5801],
        [0.2868, 0.7132],
        [0.1713, 0.8287],
        [0.3073, 0.6927],
        [0.7490, 0.2510],
        [0.3661, 0.6339],
        [0.8882, 0.1118],
        [0.7360, 0.2640]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4897, 0.5103],
        [0.8814, 0.1186],
        [0.0202, 0.9798],
        [0.0233, 0.9767],
        [0.8727, 0.1273],
        [0.5759, 0.4241],
        [0.3151, 0.6849],
        [0.0854, 0.9146],
        [0.3045, 0.6955],
        [0.3896, 0.6104],
        [0.3022, 0.6978],
        [0.1108, 0.8892]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.9458, 0.0542],
        [0.5374, 0.4626],
        [0.2951, 0.7049],
        [0.2718, 0.7282],
        [0.1478, 0.8522],
        [0.6101, 0.3899],
        [0.9282, 0.0718],
        [0.2292, 0.7708],
        [0.0664, 0.9336],
        [0.7445, 0.2555],
        [0.0421, 0.9579],
        [0.0163, 0.9837]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.6858, 0.3142],
        [0.0559, 0.9441],
        [0.0634, 0.9366],
        [0.0562, 0.9438],
        [0.1483, 0.8517],
        [0.2502, 0.7498],
        [0.0759, 0.9241],
        [0.5714, 0.4286],
        [0.9083, 0.0917],
        [0.4263, 0.5737],
        [0.3831, 0.6169],
        [0.0457, 0.9543]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4932, 0.5068],
        [0.4802, 0.5198],
        [0.0740, 0.9260],
        [0.0636, 0.9364],
        [0.4893, 0.5107],
        [0.2126, 0.7874],
        [0.0199, 0.9801],
        [0.0238, 0.9762],
        [0.9865, 0.0135],
        [0.0271, 0.9729],
        [0.9099, 0.0901],
        [0.9616, 0.0384]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.6832, 0.3168],
        [0.2745, 0.7255],
        [0.2125, 0.7875],
        [0.7083, 0.2917],
        [0.7844, 0.2156],
        [0.1611, 0.8389],
        [0.0750, 0.9250],
        [0.2902, 0.7098],
        [0.8868, 0.1132],
        [0.9060, 0.0940],
        [0.3638, 0.6362],
        [0.9660, 0.0340]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3413, 0.6587],
        [0.5003, 0.4997],
        [0.2959, 0.7041],
        [0.3124, 0.6876],
        [0.0328, 0.9672],
        [0.9536, 0.0464],
        [0.4195, 0.5805],
        [0.0165, 0.9835],
        [0.8312, 0.1688],
        [0.3591, 0.6409],
        [0.8946, 0.1054],
        [0.0850, 0.9150]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0895, 0.9105],
        [0.8598, 0.1402],
        [0.0878, 0.9122],
        [0.1226, 0.8774],
        [0.7834, 0.2166],
        [0.3602, 0.6398],
        [0.2466, 0.7534],
        [0.1502, 0.8498],
        [0.2105, 0.7895],
        [0.0364, 0.9636],
        [0.7778, 0.2222],
        [0.6991, 0.3009]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3848, 0.6152],
        [0.1796, 0.8204],
        [0.3533, 0.6467],
        [0.0241, 0.9759],
        [0.0861, 0.9139],
        [0.3851, 0.6149],
        [0.1099, 0.8901],
        [0.7700, 0.2300],
        [0.2287, 0.7713],
        [0.1629, 0.8371],
        [0.0899, 0.9101],
        [0.0064, 0.9936]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0725, 0.9275],
        [0.9217, 0.0783],
        [0.8694, 0.1306],
        [0.0565, 0.9435],
        [0.9192, 0.0808],
        [0.4789, 0.5211],
        [0.5437, 0.4563],
        [0.0166, 0.9834],
        [0.6721, 0.3279],
        [0.0075, 0.9925],
        [0.1919, 0.8081],
        [0.1249, 0.8751]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.0496, 0.9504],
        [0.0085, 0.9915],
        [0.0156, 0.9844],
        [0.0358, 0.9642],
        [0.0866, 0.9134],
        [0.0368, 0.9632],
        [0.4281, 0.5719],
        [0.5672, 0.4328],
        [0.2440, 0.7560],
        [0.3260, 0.6740],
        [0.1794, 0.8206],
        [0.0338, 0.9662]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1232, 0.8768],
        [0.0244, 0.9756],
        [0.3627, 0.6373],
        [0.0479, 0.9521],
        [0.0418, 0.9582],
        [0.0205, 0.9795],
        [0.1266, 0.8734],
        [0.3312, 0.6688],
        [0.7858, 0.2142],
        [0.3387, 0.6613],
        [0.7863, 0.2137],
        [0.0953, 0.9047]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7872, 0.2128],
        [0.7390, 0.2610],
        [0.0401, 0.9599],
        [0.0553, 0.9447],
        [0.1910, 0.8090],
        [0.1081, 0.8919],
        [0.3975, 0.6025],
        [0.1562, 0.8438],
        [0.5375, 0.4625],
        [0.3974, 0.6026],
        [0.1133, 0.8867],
        [0.7346, 0.2654]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.2729, 0.7271],
        [0.2033, 0.7967],
        [0.0389, 0.9611],
        [0.2188, 0.7812],
        [0.0993, 0.9007],
        [0.0276, 0.9724],
        [0.3858, 0.6142],
        [0.0966, 0.9034],
        [0.1078, 0.8922],
        [0.2766, 0.7234],
        [0.0349, 0.9651],
        [0.0067, 0.9933]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.1368, 0.8632],
        [0.0371, 0.9629],
        [0.4030, 0.5970],
        [0.0109, 0.9891],
        [0.3685, 0.6315],
        [0.2810, 0.7190],
        [0.0208, 0.9792],
        [0.0270, 0.9730],
        [0.8896, 0.1104],
        [0.2272, 0.7728],
        [0.0117, 0.9883],
        [0.0112, 0.9888]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0253, 0.9747],
        [0.1286, 0.8714],
        [0.0110, 0.9890],
        [0.0983, 0.9017],
        [0.0579, 0.9421],
        [0.6450, 0.3550],
        [0.1576, 0.8424],
        [0.0082, 0.9918],
        [0.9576, 0.0424],
        [0.2944, 0.7056],
        [0.0804, 0.9196],
        [0.1678, 0.8322]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.2404, 0.7596],
        [0.5440, 0.4560],
        [0.3862, 0.6138],
        [0.8471, 0.1529],
        [0.5733, 0.4267],
        [0.0185, 0.9815],
        [0.1678, 0.8322],
        [0.1652, 0.8348],
        [0.4810, 0.5190],
        [0.3239, 0.6761],
        [0.0311, 0.9689],
        [0.4604, 0.5396]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1056, 0.8944],
        [0.1190, 0.8810],
        [0.3169, 0.6831],
        [0.0274, 0.9726],
        [0.0825, 0.9175],
        [0.4785, 0.5215],
        [0.1084, 0.8916],
        [0.0119, 0.9881],
        [0.8049, 0.1951],
        [0.0412, 0.9588],
        [0.3719, 0.6281],
        [0.0128, 0.9872]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0970, 0.9030],
        [0.4836, 0.5164],
        [0.0293, 0.9707],
        [0.1790, 0.8210],
        [0.4645, 0.5355],
        [0.9643, 0.0357],
        [0.0419, 0.9581],
        [0.2132, 0.7868],
        [0.0781, 0.9219],
        [0.1477, 0.8523],
        [0.7009, 0.2991],
        [0.3072, 0.6928]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1349, 0.8651],
        [0.0720, 0.9280],
        [0.9637, 0.0363],
        [0.7139, 0.2861],
        [0.1655, 0.8345],
        [0.1105, 0.8895],
        [0.9555, 0.0445],
        [0.0579, 0.9421],
        [0.2876, 0.7124],
        [0.0485, 0.9515],
        [0.8219, 0.1781],
        [0.0703, 0.9297]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.2038, 0.7962],
        [0.1903, 0.8097],
        [0.5889, 0.4111],
        [0.7330, 0.2670],
        [0.7918, 0.2082],
        [0.7317, 0.2683],
        [0.2337, 0.7663],
        [0.0575, 0.9425],
        [0.1189, 0.8811],
        [0.1502, 0.8498],
        [0.0797, 0.9203],
        [0.5457, 0.4543]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1419, 0.8581],
        [0.0073, 0.9927],
        [0.0575, 0.9425],
        [0.6155, 0.3845],
        [0.6090, 0.3910],
        [0.0269, 0.9731],
        [0.4156, 0.5844],
        [0.4904, 0.5096],
        [0.3667, 0.6333],
        [0.8375, 0.1625],
        [0.6593, 0.3407],
        [0.2384, 0.7616]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0581, 0.9419],
        [0.5749, 0.4251],
        [0.0228, 0.9772],
        [0.2064, 0.7936],
        [0.4751, 0.5249],
        [0.0317, 0.9683],
        [0.8704, 0.1296],
        [0.8798, 0.1202],
        [0.5799, 0.4201],
        [0.2413, 0.7587],
        [0.8147, 0.1853],
        [0.7031, 0.2969]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7100, 0.2900],
        [0.6256, 0.3744],
        [0.1712, 0.8288],
        [0.0345, 0.9655],
        [0.0960, 0.9040],
        [0.0535, 0.9465],
        [0.9350, 0.0650],
        [0.1130, 0.8870],
        [0.5588, 0.4412],
        [0.2657, 0.7343],
        [0.0993, 0.9007],
        [0.4681, 0.5319]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.9659, 0.0341],
        [0.0975, 0.9025],
        [0.3593, 0.6407],
        [0.1194, 0.8806],
        [0.1056, 0.8944],
        [0.5391, 0.4610],
        [0.2107, 0.7893],
        [0.2746, 0.7254],
        [0.0052, 0.9948],
        [0.6250, 0.3750],
        [0.0654, 0.9346],
        [0.0082, 0.9918]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2211, 0.7789],
        [0.2416, 0.7584],
        [0.0398, 0.9602],
        [0.0052, 0.9948],
        [0.3925, 0.6075],
        [0.5845, 0.4155],
        [0.0153, 0.9847],
        [0.0893, 0.9107],
        [0.9842, 0.0158],
        [0.4874, 0.5126],
        [0.0177, 0.9823],
        [0.0372, 0.9628]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.7025, 0.2975],
        [0.0604, 0.9396],
        [0.0648, 0.9352],
        [0.8561, 0.1439],
        [0.2987, 0.7013],
        [0.7133, 0.2867],
        [0.0695, 0.9305],
        [0.0371, 0.9629],
        [0.9872, 0.0128],
        [0.0273, 0.9727],
        [0.9666, 0.0334],
        [0.9829, 0.0171]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3477, 0.6523],
        [0.1570, 0.8430],
        [0.5819, 0.4181],
        [0.2795, 0.7205],
        [0.2546, 0.7454],
        [0.0869, 0.9131],
        [0.7914, 0.2086],
        [0.0477, 0.9523],
        [0.6339, 0.3661],
        [0.6199, 0.3801],
        [0.2368, 0.7632],
        [0.9455, 0.0545]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1619, 0.8381],
        [0.1788, 0.8212],
        [0.5867, 0.4133],
        [0.0348, 0.9652],
        [0.0101, 0.9899],
        [0.9728, 0.0272],
        [0.4737, 0.5263],
        [0.1562, 0.8438],
        [0.9128, 0.0872],
        [0.0508, 0.9492],
        [0.1697, 0.8303],
        [0.0167, 0.9833]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0508, 0.9492],
        [0.8271, 0.1729],
        [0.0175, 0.9825],
        [0.3373, 0.6627],
        [0.6985, 0.3015],
        [0.9015, 0.0985],
        [0.1041, 0.8959],
        [0.6766, 0.3234],
        [0.8949, 0.1051],
        [0.3487, 0.6513],
        [0.8592, 0.1408],
        [0.0161, 0.9839]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.8969, 0.1031],
        [0.6183, 0.3817],
        [0.9699, 0.0301],
        [0.2155, 0.7845],
        [0.2474, 0.7526],
        [0.2832, 0.7168],
        [0.8758, 0.1242],
        [0.9265, 0.0735],
        [0.0647, 0.9353],
        [0.5945, 0.4055],
        [0.0773, 0.9227],
        [0.0509, 0.9491]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0516, 0.9484],
        [0.9568, 0.0432],
        [0.9852, 0.0148],
        [0.6865, 0.3135],
        [0.9533, 0.0467],
        [0.9713, 0.0287],
        [0.9672, 0.0328],
        [0.1516, 0.8484],
        [0.8846, 0.1154],
        [0.0201, 0.9799],
        [0.1985, 0.8015],
        [0.9094, 0.0906]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.2996, 0.7004],
        [0.0124, 0.9876],
        [0.1100, 0.8900],
        [0.4156, 0.5844],
        [0.5292, 0.4708],
        [0.0523, 0.9477],
        [0.4097, 0.5903],
        [0.8390, 0.1610],
        [0.9766, 0.0234],
        [0.7707, 0.2293],
        [0.4141, 0.5859],
        [0.9048, 0.0952]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.1254, 0.8746],
        [0.6127, 0.3873],
        [0.7236, 0.2764],
        [0.9749, 0.0251],
        [0.7072, 0.2928],
        [0.0917, 0.9083],
        [0.3753, 0.6247],
        [0.3216, 0.6784],
        [0.9569, 0.0431],
        [0.2733, 0.7267],
        [0.4050, 0.5950],
        [0.8601, 0.1399]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.0307, 0.9693],
        [0.9520, 0.0480],
        [0.6599, 0.3401],
        [0.0630, 0.9370],
        [0.8391, 0.1609],
        [0.0417, 0.9583],
        [0.0806, 0.9194],
        [0.0215, 0.9785],
        [0.0214, 0.9786],
        [0.1436, 0.8564],
        [0.0068, 0.9932],
        [0.4555, 0.5445]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3789, 0.6211],
        [0.7796, 0.2204],
        [0.5626, 0.4374],
        [0.7413, 0.2587],
        [0.7282, 0.2718],
        [0.0894, 0.9106],
        [0.7816, 0.2184],
        [0.3793, 0.6207],
        [0.0245, 0.9755],
        [0.9434, 0.0566],
        [0.0198, 0.9802],
        [0.0118, 0.9882]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.9456, 0.0544],
        [0.2465, 0.7535],
        [0.7142, 0.2858],
        [0.0103, 0.9897],
        [0.9148, 0.0852],
        [0.4326, 0.5674],
        [0.1708, 0.8292],
        [0.1131, 0.8869],
        [0.7302, 0.2698],
        [0.0630, 0.9370],
        [0.4123, 0.5877],
        [0.3736, 0.6264]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0289, 0.9711],
        [0.0036, 0.9964],
        [0.0032, 0.9968],
        [0.0162, 0.9838],
        [0.0077, 0.9923],
        [0.0035, 0.9965],
        [0.0082, 0.9918],
        [0.0051, 0.9949],
        [0.1169, 0.8831],
        [0.0325, 0.9675],
        [0.0262, 0.9738],
        [0.1901, 0.8099]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0046, 0.9954],
        [0.0025, 0.9975],
        [0.0602, 0.9398],
        [0.0020, 0.9980],
        [0.0576, 0.9424],
        [0.0031, 0.9969],
        [0.0053, 0.9947],
        [0.0201, 0.9799],
        [0.0017, 0.9983],
        [0.0054, 0.9946],
        [0.1630, 0.8370],
        [0.0074, 0.9926]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0088, 0.9912],
        [0.0088, 0.9912],
        [0.0116, 0.9884],
        [0.0065, 0.9935],
        [0.0038, 0.9962],
        [0.0031, 0.9969],
        [0.0258, 0.9742],
        [0.0468, 0.9532],
        [0.1120, 0.8880],
        [0.0099, 0.9901],
        [0.0046, 0.9954],
        [0.0059, 0.9941]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0317, 0.9683],
        [0.0430, 0.9570],
        [0.0057, 0.9943],
        [0.0039, 0.9961],
        [0.0139, 0.9861],
        [0.1151, 0.8849],
        [0.0471, 0.9529],
        [0.0095, 0.9905],
        [0.1092, 0.8908],
        [0.0523, 0.9477],
        [0.0211, 0.9789],
        [0.0177, 0.9823]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0205, 0.9795],
        [0.0242, 0.9758],
        [0.0562, 0.9438],
        [0.0681, 0.9319],
        [0.0090, 0.9910],
        [0.0052, 0.9948],
        [0.0655, 0.9345],
        [0.0479, 0.9521],
        [0.3576, 0.6424],
        [0.0317, 0.9683],
        [0.0080, 0.9920],
        [0.0050, 0.9950]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0085, 0.9915],
        [0.0163, 0.9837],
        [0.0536, 0.9464],
        [0.0583, 0.9417],
        [0.3009, 0.6991],
        [0.0538, 0.9462],
        [0.0694, 0.9306],
        [0.0160, 0.9840],
        [0.1209, 0.8791],
        [0.0080, 0.9920],
        [0.0977, 0.9023],
        [0.1532, 0.8468]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1770, 0.8230],
        [0.0091, 0.9909],
        [0.0135, 0.9865],
        [0.2330, 0.7670],
        [0.0530, 0.9470],
        [0.0068, 0.9932],
        [0.2139, 0.7861],
        [0.3040, 0.6960],
        [0.0187, 0.9813],
        [0.0354, 0.9646],
        [0.0183, 0.9817],
        [0.1574, 0.8426]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0223, 0.9778],
        [0.0405, 0.9595],
        [0.0283, 0.9717],
        [0.1538, 0.8462],
        [0.0142, 0.9858],
        [0.0288, 0.9712],
        [0.0075, 0.9925],
        [0.0349, 0.9651],
        [0.0853, 0.9147],
        [0.0431, 0.9569],
        [0.0552, 0.9448],
        [0.0527, 0.9473]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3335, 0.6665],
        [0.1410, 0.8590],
        [0.0552, 0.9448],
        [0.0640, 0.9360],
        [0.0324, 0.9676],
        [0.0269, 0.9731],
        [0.0357, 0.9643],
        [0.0283, 0.9717],
        [0.0556, 0.9444],
        [0.0273, 0.9727],
        [0.0121, 0.9879],
        [0.0755, 0.9245]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.2494, 0.7506],
        [0.0330, 0.9670],
        [0.0283, 0.9717],
        [0.0096, 0.9904],
        [0.0087, 0.9913],
        [0.0070, 0.9930],
        [0.0250, 0.9750],
        [0.0449, 0.9551],
        [0.0124, 0.9876],
        [0.1312, 0.8688],
        [0.0863, 0.9137],
        [0.0084, 0.9916]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0149, 0.9851],
        [0.0189, 0.9811],
        [0.0079, 0.9921],
        [0.0092, 0.9908],
        [0.0543, 0.9457],
        [0.0433, 0.9567],
        [0.0549, 0.9451],
        [0.0990, 0.9010],
        [0.0878, 0.9122],
        [0.0340, 0.9660],
        [0.0383, 0.9617],
        [0.0130, 0.9870]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0130, 0.9870],
        [0.0173, 0.9827],
        [0.0144, 0.9856],
        [0.0061, 0.9939],
        [0.0195, 0.9805],
        [0.0191, 0.9809],
        [0.0124, 0.9876],
        [0.0407, 0.9593],
        [0.6476, 0.3524],
        [0.0328, 0.9672],
        [0.1730, 0.8270],
        [0.5055, 0.4945]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0277, 0.9723],
        [0.1100, 0.8900],
        [0.6457, 0.3543],
        [0.0206, 0.9794],
        [0.0835, 0.9165],
        [0.0378, 0.9622],
        [0.0434, 0.9566],
        [0.0637, 0.9363],
        [0.0167, 0.9833],
        [0.3413, 0.6587],
        [0.4863, 0.5137],
        [0.0246, 0.9754]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0719, 0.9281],
        [0.0833, 0.9167],
        [0.0198, 0.9802],
        [0.0055, 0.9945],
        [0.0688, 0.9312],
        [0.0669, 0.9331],
        [0.0477, 0.9523],
        [0.0879, 0.9121],
        [0.1592, 0.8408],
        [0.0068, 0.9932],
        [0.0684, 0.9316],
        [0.0165, 0.9835]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0210, 0.9790],
        [0.0392, 0.9608],
        [0.0081, 0.9919],
        [0.0592, 0.9408],
        [0.0669, 0.9331],
        [0.2701, 0.7299],
        [0.0153, 0.9847],
        [0.0544, 0.9456],
        [0.1009, 0.8991],
        [0.0528, 0.9472],
        [0.0117, 0.9883],
        [0.0748, 0.9252]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1226, 0.8774],
        [0.0302, 0.9698],
        [0.0520, 0.9480],
        [0.0564, 0.9436],
        [0.0651, 0.9349],
        [0.0465, 0.9535],
        [0.0311, 0.9689],
        [0.0814, 0.9186],
        [0.0158, 0.9842],
        [0.0181, 0.9819],
        [0.0057, 0.9943],
        [0.0081, 0.9919]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0160, 0.9840],
        [0.0655, 0.9345],
        [0.1232, 0.8768],
        [0.0633, 0.9367],
        [0.3435, 0.6565],
        [0.0677, 0.9323],
        [0.1605, 0.8395],
        [0.0364, 0.9636],
        [0.1928, 0.8072],
        [0.0198, 0.9802],
        [0.0289, 0.9711],
        [0.0651, 0.9349]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.0587, 0.9413],
        [0.0120, 0.9880],
        [0.0105, 0.9895],
        [0.0405, 0.9595],
        [0.0320, 0.9680],
        [0.0091, 0.9909],
        [0.2130, 0.7870],
        [0.4121, 0.5879],
        [0.0736, 0.9264],
        [0.0334, 0.9666],
        [0.0243, 0.9757],
        [0.0501, 0.9499]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0941, 0.9059],
        [0.0196, 0.9804],
        [0.0180, 0.9820],
        [0.5089, 0.4911],
        [0.0073, 0.9927],
        [0.0723, 0.9277],
        [0.0086, 0.9914],
        [0.0218, 0.9782],
        [0.4559, 0.5441],
        [0.2968, 0.7032],
        [0.0358, 0.9642],
        [0.1271, 0.8729]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.6021, 0.3979],
        [0.1006, 0.8994],
        [0.0559, 0.9441],
        [0.0898, 0.9102],
        [0.1204, 0.8796],
        [0.0338, 0.9662],
        [0.0105, 0.9895],
        [0.0676, 0.9324],
        [0.0193, 0.9807],
        [0.1357, 0.8643],
        [0.0584, 0.9416],
        [0.0335, 0.9665]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.1812, 0.8188],
        [0.0364, 0.9636],
        [0.0309, 0.9691],
        [0.0111, 0.9889],
        [0.1021, 0.8979],
        [0.0297, 0.9703],
        [0.0751, 0.9249],
        [0.0341, 0.9659],
        [0.0152, 0.9848],
        [0.0768, 0.9232],
        [0.0416, 0.9584],
        [0.0125, 0.9875]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0703, 0.9297],
        [0.3965, 0.6035],
        [0.1187, 0.8813],
        [0.0063, 0.9937],
        [0.0327, 0.9673],
        [0.0719, 0.9281],
        [0.0109, 0.9891],
        [0.0872, 0.9128],
        [0.0794, 0.9206],
        [0.0584, 0.9416],
        [0.0204, 0.9796],
        [0.0410, 0.9590]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.1212, 0.8788],
        [0.0440, 0.9560],
        [0.0218, 0.9782],
        [0.0112, 0.9888],
        [0.0537, 0.9463],
        [0.1256, 0.8744],
        [0.0464, 0.9536],
        [0.0810, 0.9190],
        [0.3673, 0.6327],
        [0.4346, 0.5654],
        [0.0659, 0.9341],
        [0.7664, 0.2336]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1924, 0.8076],
        [0.0196, 0.9804],
        [0.2610, 0.7390],
        [0.1487, 0.8513],
        [0.0563, 0.9437],
        [0.0543, 0.9457],
        [0.0965, 0.9035],
        [0.0215, 0.9785],
        [0.0145, 0.9855],
        [0.0822, 0.9178],
        [0.1101, 0.8899],
        [0.3310, 0.6690]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.1241, 0.8759],
        [0.0555, 0.9445],
        [0.0644, 0.9356],
        [0.0136, 0.9864],
        [0.0256, 0.9744],
        [0.1664, 0.8336],
        [0.0131, 0.9869],
        [0.0147, 0.9853],
        [0.1799, 0.8201],
        [0.0056, 0.9944],
        [0.0618, 0.9382],
        [0.1211, 0.8789]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0527, 0.9473],
        [0.0693, 0.9307],
        [0.0111, 0.9889],
        [0.0208, 0.9792],
        [0.0213, 0.9787],
        [0.2220, 0.7780],
        [0.0442, 0.9558],
        [0.1726, 0.8274],
        [0.2169, 0.7831],
        [0.2607, 0.7393],
        [0.1400, 0.8600],
        [0.3159, 0.6841]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0739, 0.9261],
        [0.0053, 0.9947],
        [0.0674, 0.9326],
        [0.2858, 0.7142],
        [0.1960, 0.8040],
        [0.0218, 0.9782],
        [0.3942, 0.6058],
        [0.1768, 0.8232],
        [0.0772, 0.9228],
        [0.0383, 0.9617],
        [0.1378, 0.8622],
        [0.0355, 0.9645]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0167, 0.9833],
        [0.0967, 0.9033],
        [0.4979, 0.5021],
        [0.3538, 0.6462],
        [0.4074, 0.5926],
        [0.1941, 0.8059],
        [0.0375, 0.9625],
        [0.0714, 0.9286],
        [0.6798, 0.3202],
        [0.0273, 0.9727],
        [0.0765, 0.9235],
        [0.7073, 0.2927]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.0775, 0.9225],
        [0.0157, 0.9843],
        [0.0699, 0.9301],
        [0.6006, 0.3994],
        [0.0349, 0.9651],
        [0.0599, 0.9401],
        [0.6862, 0.3138],
        [0.6495, 0.3505],
        [0.2993, 0.7007],
        [0.0719, 0.9281],
        [0.0244, 0.9756],
        [0.0495, 0.9505]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3176, 0.6824],
        [0.0642, 0.9358],
        [0.0278, 0.9722],
        [0.4022, 0.5978],
        [0.0335, 0.9665],
        [0.2077, 0.7923],
        [0.0523, 0.9477],
        [0.4335, 0.5665],
        [0.1196, 0.8804],
        [0.5581, 0.4419],
        [0.3977, 0.6023],
        [0.4407, 0.5593]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7305, 0.2695],
        [0.0819, 0.9181],
        [0.0374, 0.9626],
        [0.0164, 0.9836],
        [0.0230, 0.9770],
        [0.1547, 0.8453],
        [0.0130, 0.9870],
        [0.0733, 0.9267],
        [0.1455, 0.8545],
        [0.0588, 0.9412],
        [0.1720, 0.8280],
        [0.0905, 0.9095]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5216, 0.4784],
        [0.0411, 0.9589],
        [0.0345, 0.9655],
        [0.1655, 0.8345],
        [0.0091, 0.9909],
        [0.2119, 0.7881],
        [0.3671, 0.6329],
        [0.3382, 0.6618],
        [0.0134, 0.9866],
        [0.3488, 0.6512],
        [0.1041, 0.8959],
        [0.0135, 0.9865]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.1513, 0.8487],
        [0.3820, 0.6180],
        [0.1316, 0.8684],
        [0.0447, 0.9553],
        [0.2184, 0.7816],
        [0.0609, 0.9391],
        [0.0558, 0.9442],
        [0.0172, 0.9828],
        [0.2747, 0.7253],
        [0.1776, 0.8224],
        [0.0809, 0.9191],
        [0.0448, 0.9552]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.1077, 0.8923],
        [0.0455, 0.9545],
        [0.0124, 0.9876],
        [0.0081, 0.9919],
        [0.0165, 0.9835],
        [0.2046, 0.7954],
        [0.0770, 0.9230],
        [0.0762, 0.9238],
        [0.9208, 0.0792],
        [0.8634, 0.1366],
        [0.8863, 0.1137],
        [0.9631, 0.0369]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1560, 0.8440],
        [0.0596, 0.9404],
        [0.8723, 0.1277],
        [0.0234, 0.9766],
        [0.2971, 0.7029],
        [0.0455, 0.9545],
        [0.0089, 0.9911],
        [0.1504, 0.8496],
        [0.0167, 0.9833],
        [0.1057, 0.8943],
        [0.3073, 0.6927],
        [0.0275, 0.9725]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0285, 0.9715],
        [0.0598, 0.9402],
        [0.3521, 0.6479],
        [0.0044, 0.9956],
        [0.0173, 0.9827],
        [0.0133, 0.9867],
        [0.1208, 0.8792],
        [0.0930, 0.9070],
        [0.6222, 0.3778],
        [0.1004, 0.8996],
        [0.1179, 0.8821],
        [0.1127, 0.8873]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0556, 0.9444],
        [0.0535, 0.9465],
        [0.0405, 0.9595],
        [0.0063, 0.9937],
        [0.0238, 0.9762],
        [0.3202, 0.6798],
        [0.0597, 0.9403],
        [0.2943, 0.7057],
        [0.3197, 0.6803],
        [0.5694, 0.4306],
        [0.0990, 0.9010],
        [0.3782, 0.6218]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0651, 0.9349],
        [0.0861, 0.9139],
        [0.0830, 0.9170],
        [0.4421, 0.5579],
        [0.1842, 0.8158],
        [0.0532, 0.9468],
        [0.2942, 0.7058],
        [0.5341, 0.4659],
        [0.0870, 0.9130],
        [0.0164, 0.9836],
        [0.0196, 0.9804],
        [0.0082, 0.9918]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.1466, 0.8534],
        [0.1030, 0.8970],
        [0.5949, 0.4051],
        [0.3847, 0.6153],
        [0.3118, 0.6882],
        [0.1915, 0.8085],
        [0.2050, 0.7950],
        [0.2968, 0.7032],
        [0.3557, 0.6443],
        [0.0219, 0.9781],
        [0.0526, 0.9474],
        [0.0894, 0.9106]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.6194, 0.3806],
        [0.0233, 0.9767],
        [0.1395, 0.8605],
        [0.7501, 0.2499],
        [0.0199, 0.9801],
        [0.0638, 0.9362],
        [0.0196, 0.9804],
        [0.8037, 0.1963],
        [0.3386, 0.6614],
        [0.4675, 0.5325],
        [0.0046, 0.9954],
        [0.0840, 0.9160]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4163, 0.5837],
        [0.0186, 0.9814],
        [0.0494, 0.9506],
        [0.1982, 0.8018],
        [0.2496, 0.7504],
        [0.0530, 0.9470],
        [0.0279, 0.9721],
        [0.3894, 0.6106],
        [0.1200, 0.8800],
        [0.6622, 0.3378],
        [0.0099, 0.9901],
        [0.2558, 0.7442]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5517, 0.4483],
        [0.5928, 0.4072],
        [0.2787, 0.7213],
        [0.0043, 0.9957],
        [0.2267, 0.7733],
        [0.0222, 0.9778],
        [0.0579, 0.9421],
        [0.0090, 0.9910],
        [0.0660, 0.9340],
        [0.1305, 0.8695],
        [0.1346, 0.8654],
        [0.0737, 0.9263]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4612, 0.5388],
        [0.0633, 0.9367],
        [0.0343, 0.9657],
        [0.0189, 0.9811],
        [0.0063, 0.9937],
        [0.4244, 0.5756],
        [0.0222, 0.9778],
        [0.3065, 0.6935],
        [0.0137, 0.9863],
        [0.4520, 0.5480],
        [0.0198, 0.9802],
        [0.0228, 0.9772]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.2034, 0.7966],
        [0.0311, 0.9689],
        [0.0110, 0.9890],
        [0.1001, 0.8999],
        [0.4274, 0.5726],
        [0.0988, 0.9012],
        [0.0180, 0.9820],
        [0.1184, 0.8816],
        [0.0604, 0.9396],
        [0.0242, 0.9758],
        [0.1317, 0.8683],
        [0.0307, 0.9693]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0332, 0.9668],
        [0.0499, 0.9501],
        [0.0090, 0.9910],
        [0.0229, 0.9771],
        [0.0724, 0.9276],
        [0.0610, 0.9390],
        [0.0498, 0.9502],
        [0.0187, 0.9813],
        [0.6345, 0.3655],
        [0.2131, 0.7869],
        [0.3445, 0.6555],
        [0.7344, 0.2656]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0984, 0.9016],
        [0.0582, 0.9418],
        [0.6206, 0.3794],
        [0.1709, 0.8291],
        [0.2479, 0.7521],
        [0.2530, 0.7470],
        [0.0766, 0.9234],
        [0.0102, 0.9898],
        [0.0283, 0.9717],
        [0.4943, 0.5057],
        [0.1990, 0.8010],
        [0.7149, 0.2851]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2242, 0.7758],
        [0.1241, 0.8759],
        [0.0589, 0.9411],
        [0.0148, 0.9852],
        [0.0937, 0.9063],
        [0.0206, 0.9794],
        [0.0811, 0.9189],
        [0.0592, 0.9408],
        [0.2653, 0.7347],
        [0.0197, 0.9803],
        [0.2098, 0.7902],
        [0.0111, 0.9889]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0406, 0.9594],
        [0.0594, 0.9406],
        [0.0029, 0.9971],
        [0.0449, 0.9551],
        [0.0536, 0.9464],
        [0.1170, 0.8830],
        [0.0517, 0.9483],
        [0.0876, 0.9124],
        [0.3970, 0.6030],
        [0.1332, 0.8668],
        [0.0875, 0.9125],
        [0.2562, 0.7438]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.1379, 0.8621],
        [0.0202, 0.9798],
        [0.0596, 0.9404],
        [0.0694, 0.9306],
        [0.1285, 0.8715],
        [0.0452, 0.9548],
        [0.0491, 0.9509],
        [0.7112, 0.2888],
        [0.1592, 0.8408],
        [0.0184, 0.9816],
        [0.0101, 0.9899],
        [0.0058, 0.9942]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0898, 0.9102],
        [0.0735, 0.9265],
        [0.2571, 0.7429],
        [0.1175, 0.8825],
        [0.3950, 0.6050],
        [0.0329, 0.9671],
        [0.5960, 0.4040],
        [0.0737, 0.9263],
        [0.8570, 0.1430],
        [0.0045, 0.9955],
        [0.8021, 0.1979],
        [0.1464, 0.8536]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.1787, 0.8213],
        [0.3198, 0.6802],
        [0.1465, 0.8535],
        [0.1885, 0.8115],
        [0.4460, 0.5540],
        [0.0120, 0.9880],
        [0.9518, 0.0482],
        [0.5821, 0.4179],
        [0.4449, 0.5551],
        [0.2956, 0.7044],
        [0.0292, 0.9708],
        [0.0321, 0.9679]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5967, 0.4033],
        [0.1234, 0.8766],
        [0.2373, 0.7627],
        [0.4058, 0.5942],
        [0.0038, 0.9962],
        [0.1594, 0.8406],
        [0.0085, 0.9915],
        [0.8723, 0.1277],
        [0.5741, 0.4259],
        [0.9149, 0.0851],
        [0.0170, 0.9830],
        [0.1144, 0.8856]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7781, 0.2219],
        [0.2993, 0.7007],
        [0.2106, 0.7894],
        [0.0225, 0.9775],
        [0.0666, 0.9334],
        [0.6680, 0.3320],
        [0.1551, 0.8449],
        [0.0075, 0.9925],
        [0.2696, 0.7304],
        [0.3724, 0.6276],
        [0.0401, 0.9599],
        [0.2161, 0.7839]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3010, 0.6990],
        [0.5947, 0.4053],
        [0.4391, 0.5609],
        [0.0250, 0.9750],
        [0.0180, 0.9820],
        [0.0219, 0.9781],
        [0.8187, 0.1813],
        [0.9829, 0.0171],
        [0.0232, 0.9768],
        [0.1128, 0.8872],
        [0.0491, 0.9509],
        [0.0936, 0.9064]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0689, 0.9311],
        [0.0312, 0.9688],
        [0.3650, 0.6350],
        [0.0173, 0.9827],
        [0.4781, 0.5219],
        [0.0861, 0.9139],
        [0.0192, 0.9808],
        [0.0587, 0.9413],
        [0.4715, 0.5285],
        [0.1272, 0.8728],
        [0.0276, 0.9724],
        [0.0564, 0.9436]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0394, 0.9606],
        [0.0079, 0.9921],
        [0.0178, 0.9822],
        [0.0132, 0.9868],
        [0.0360, 0.9640],
        [0.0433, 0.9567],
        [0.0288, 0.9712],
        [0.0230, 0.9770],
        [0.5648, 0.4352],
        [0.1448, 0.8552],
        [0.3137, 0.6863],
        [0.8763, 0.1237]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0154, 0.9846],
        [0.5155, 0.4845],
        [0.7048, 0.2952],
        [0.2531, 0.7469],
        [0.0132, 0.9868],
        [0.4089, 0.5911],
        [0.0055, 0.9945],
        [0.0092, 0.9908],
        [0.0169, 0.9831],
        [0.3814, 0.6186],
        [0.7043, 0.2957],
        [0.4476, 0.5524]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2395, 0.7605],
        [0.0144, 0.9856],
        [0.1742, 0.8258],
        [0.0355, 0.9645],
        [0.0708, 0.9292],
        [0.1236, 0.8764],
        [0.1171, 0.8829],
        [0.1013, 0.8987],
        [0.0581, 0.9419],
        [0.0632, 0.9368],
        [0.0220, 0.9780],
        [0.0916, 0.9084]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0181, 0.9819],
        [0.0154, 0.9846],
        [0.0117, 0.9883],
        [0.0222, 0.9778],
        [0.0493, 0.9507],
        [0.4947, 0.5053],
        [0.0478, 0.9522],
        [0.0148, 0.9852],
        [0.4622, 0.5378],
        [0.1273, 0.8727],
        [0.3184, 0.6816],
        [0.1356, 0.8644]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0415, 0.9585],
        [0.2859, 0.7141],
        [0.2453, 0.7547],
        [0.1354, 0.8646],
        [0.0510, 0.9490],
        [0.0189, 0.9811],
        [0.7415, 0.2585],
        [0.1597, 0.8403],
        [0.0145, 0.9855],
        [0.0322, 0.9678],
        [0.1357, 0.8643],
        [0.0081, 0.9919]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0952, 0.9048],
        [0.0754, 0.9246],
        [0.4131, 0.5869],
        [0.3876, 0.6124],
        [0.5270, 0.4730],
        [0.1590, 0.8410],
        [0.7697, 0.2303],
        [0.0525, 0.9475],
        [0.3158, 0.6842],
        [0.0288, 0.9712],
        [0.0865, 0.9135],
        [0.6889, 0.3111]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4479, 0.5521],
        [0.0319, 0.9681],
        [0.0855, 0.9145],
        [0.5018, 0.4982],
        [0.0380, 0.9620],
        [0.0058, 0.9942],
        [0.4309, 0.5691],
        [0.7314, 0.2686],
        [0.4379, 0.5621],
        [0.0604, 0.9396],
        [0.0193, 0.9807],
        [0.0084, 0.9916]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4671, 0.5329],
        [0.0785, 0.9215],
        [0.0390, 0.9610],
        [0.4080, 0.5920],
        [0.2910, 0.7090],
        [0.0735, 0.9265],
        [0.0609, 0.9391],
        [0.3742, 0.6258],
        [0.7875, 0.2125],
        [0.1442, 0.8558],
        [0.0145, 0.9855],
        [0.6888, 0.3112]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.7362, 0.2638],
        [0.8887, 0.1113],
        [0.6016, 0.3984],
        [0.0046, 0.9954],
        [0.0135, 0.9865],
        [0.7225, 0.2775],
        [0.0034, 0.9966],
        [0.0088, 0.9912],
        [0.0218, 0.9782],
        [0.0108, 0.9892],
        [0.0697, 0.9303],
        [0.5235, 0.4765]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.9300, 0.0700],
        [0.0385, 0.9615],
        [0.2088, 0.7912],
        [0.0451, 0.9549],
        [0.0136, 0.9864],
        [0.4950, 0.5050],
        [0.1053, 0.8947],
        [0.8307, 0.1693],
        [0.0261, 0.9739],
        [0.7894, 0.2106],
        [0.0949, 0.9051],
        [0.0270, 0.9730]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0652, 0.9348],
        [0.0860, 0.9140],
        [0.0460, 0.9540],
        [0.0066, 0.9934],
        [0.6365, 0.3635],
        [0.0301, 0.9699],
        [0.0078, 0.9922],
        [0.0112, 0.9888],
        [0.6807, 0.3193],
        [0.0089, 0.9911],
        [0.0278, 0.9722],
        [0.0113, 0.9887]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0783, 0.9217],
        [0.2577, 0.7423],
        [0.0133, 0.9867],
        [0.5473, 0.4527],
        [0.0395, 0.9605],
        [0.0232, 0.9768],
        [0.0117, 0.9883],
        [0.0191, 0.9809],
        [0.4230, 0.5770],
        [0.8531, 0.1469],
        [0.0859, 0.9141],
        [0.0852, 0.9148]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0540, 0.9460],
        [0.0584, 0.9416],
        [0.8695, 0.1305],
        [0.1851, 0.8149],
        [0.2387, 0.7613],
        [0.3308, 0.6692],
        [0.0093, 0.9907],
        [0.0203, 0.9797],
        [0.0167, 0.9833],
        [0.6428, 0.3572],
        [0.9312, 0.0688],
        [0.7588, 0.2412]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2300, 0.7700],
        [0.0093, 0.9907],
        [0.2993, 0.7007],
        [0.0134, 0.9866],
        [0.2123, 0.7877],
        [0.2063, 0.7937],
        [0.4749, 0.5251],
        [0.1233, 0.8767],
        [0.3414, 0.6586],
        [0.0056, 0.9944],
        [0.0782, 0.9218],
        [0.0119, 0.9881]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.7989, 0.2011],
        [0.0359, 0.9641],
        [0.0141, 0.9859],
        [0.2265, 0.7735],
        [0.0100, 0.9900],
        [0.6735, 0.3265],
        [0.0133, 0.9867],
        [0.1529, 0.8471],
        [0.7855, 0.2145],
        [0.1047, 0.8953],
        [0.3446, 0.6554],
        [0.1661, 0.8339]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0304, 0.9696],
        [0.0202, 0.9798],
        [0.0170, 0.9830],
        [0.0167, 0.9833],
        [0.0128, 0.9872],
        [0.0237, 0.9763],
        [0.1266, 0.8734],
        [0.2982, 0.7018],
        [0.4179, 0.5821],
        [0.0961, 0.9039],
        [0.0020, 0.9980],
        [0.0024, 0.9976]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.6023, 0.3977],
        [0.0082, 0.9918],
        [0.8700, 0.1300],
        [0.2216, 0.7784],
        [0.4407, 0.5593],
        [0.7434, 0.2566],
        [0.3476, 0.6524],
        [0.0272, 0.9728],
        [0.1891, 0.8109],
        [0.0097, 0.9903],
        [0.4288, 0.5712],
        [0.4871, 0.5129]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4733, 0.5267],
        [0.0276, 0.9724],
        [0.0278, 0.9722],
        [0.6755, 0.3245],
        [0.1232, 0.8768],
        [0.1429, 0.8571],
        [0.6689, 0.3311],
        [0.3753, 0.6247],
        [0.1282, 0.8718],
        [0.6290, 0.3710],
        [0.0558, 0.9442],
        [0.0397, 0.9603]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5003, 0.4997],
        [0.1307, 0.8693],
        [0.0708, 0.9292],
        [0.8998, 0.1002],
        [0.0678, 0.9322],
        [0.1642, 0.8358],
        [0.0299, 0.9701],
        [0.6536, 0.3464],
        [0.8224, 0.1776],
        [0.9463, 0.0537],
        [0.3073, 0.6927],
        [0.1513, 0.8487]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3451, 0.6549],
        [0.6744, 0.3256],
        [0.1589, 0.8411],
        [0.0751, 0.9249],
        [0.0150, 0.9850],
        [0.0207, 0.9793],
        [0.0642, 0.9358],
        [0.1617, 0.8383],
        [0.2070, 0.7930],
        [0.0344, 0.9656],
        [0.1816, 0.8184],
        [0.1597, 0.8403]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.6598, 0.3402],
        [0.2588, 0.7412],
        [0.0311, 0.9689],
        [0.1052, 0.8948],
        [0.0076, 0.9924],
        [0.1776, 0.8224],
        [0.4087, 0.5913],
        [0.8907, 0.1093],
        [0.3234, 0.6766],
        [0.8364, 0.1636],
        [0.0494, 0.9506],
        [0.1012, 0.8988]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0044, 0.9956],
        [0.8583, 0.1417],
        [0.0162, 0.9838],
        [0.0110, 0.9890],
        [0.6271, 0.3729],
        [0.0241, 0.9759],
        [0.0751, 0.9249],
        [0.1934, 0.8066],
        [0.8245, 0.1755],
        [0.0505, 0.9495],
        [0.1349, 0.8651],
        [0.4906, 0.5094]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.7876, 0.2124],
        [0.3287, 0.6713],
        [0.0463, 0.9537],
        [0.0897, 0.9103],
        [0.4544, 0.5456],
        [0.0480, 0.9520],
        [0.0342, 0.9658],
        [0.0678, 0.9322],
        [0.5256, 0.4744],
        [0.8952, 0.1048],
        [0.5219, 0.4781],
        [0.6242, 0.3758]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.1443, 0.8557],
        [0.3284, 0.6716],
        [0.9707, 0.0293],
        [0.1882, 0.8118],
        [0.1659, 0.8341],
        [0.0140, 0.9860],
        [0.0118, 0.9882],
        [0.0596, 0.9404],
        [0.0453, 0.9547],
        [0.0341, 0.9659],
        [0.6311, 0.3689],
        [0.3835, 0.6165]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0929, 0.9071],
        [0.0489, 0.9511],
        [0.1099, 0.8901],
        [0.0030, 0.9970],
        [0.0485, 0.9515],
        [0.2170, 0.7830],
        [0.0548, 0.9452],
        [0.0311, 0.9689],
        [0.7456, 0.2544],
        [0.2030, 0.7970],
        [0.1349, 0.8651],
        [0.0895, 0.9105]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0472, 0.9528],
        [0.1253, 0.8747],
        [0.0029, 0.9971],
        [0.0461, 0.9539],
        [0.0082, 0.9918],
        [0.1221, 0.8779],
        [0.0364, 0.9636],
        [0.3134, 0.6866],
        [0.7041, 0.2959],
        [0.0099, 0.9901],
        [0.2172, 0.7828],
        [0.0272, 0.9728]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0476, 0.9524],
        [0.1127, 0.8873],
        [0.0324, 0.9676],
        [0.7118, 0.2882],
        [0.0111, 0.9889],
        [0.0834, 0.9166],
        [0.5866, 0.4134],
        [0.0617, 0.9383],
        [0.1098, 0.8902],
        [0.0529, 0.9471],
        [0.0465, 0.9535],
        [0.0352, 0.9648]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.8352, 0.1648],
        [0.1547, 0.8453],
        [0.0709, 0.9291],
        [0.4721, 0.5279],
        [0.6769, 0.3231],
        [0.8404, 0.1596],
        [0.8299, 0.1701],
        [0.1698, 0.8302],
        [0.3548, 0.6452],
        [0.1371, 0.8629],
        [0.5612, 0.4388],
        [0.9567, 0.0433]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5392, 0.4608],
        [0.0211, 0.9789],
        [0.2296, 0.7704],
        [0.9616, 0.0384],
        [0.0187, 0.9813],
        [0.0760, 0.9240],
        [0.8514, 0.1486],
        [0.2664, 0.7336],
        [0.0341, 0.9659],
        [0.0490, 0.9510],
        [0.0303, 0.9697],
        [0.0036, 0.9964]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3452, 0.6548],
        [0.2937, 0.7063],
        [0.0546, 0.9454],
        [0.0484, 0.9516],
        [0.0907, 0.9093],
        [0.0179, 0.9821],
        [0.0594, 0.9406],
        [0.3953, 0.6047],
        [0.8360, 0.1640],
        [0.8197, 0.1803],
        [0.3580, 0.6420],
        [0.1451, 0.8549]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.6566, 0.3434],
        [0.2032, 0.7968],
        [0.1901, 0.8099],
        [0.0045, 0.9955],
        [0.0049, 0.9951],
        [0.3775, 0.6225],
        [0.0259, 0.9741],
        [0.2948, 0.7052],
        [0.1590, 0.8410],
        [0.0330, 0.9670],
        [0.0532, 0.9468],
        [0.0250, 0.9750]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.7044, 0.2956],
        [0.2369, 0.7631],
        [0.0098, 0.9902],
        [0.0073, 0.9927],
        [0.0029, 0.9971],
        [0.0285, 0.9715],
        [0.1974, 0.8026],
        [0.9187, 0.0813],
        [0.0453, 0.9547],
        [0.1008, 0.8992],
        [0.0056, 0.9944],
        [0.0462, 0.9538]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4661, 0.5339],
        [0.5376, 0.4624],
        [0.5474, 0.4526],
        [0.0036, 0.9964],
        [0.9007, 0.0993],
        [0.2741, 0.7259],
        [0.5869, 0.4131],
        [0.0527, 0.9473],
        [0.9349, 0.0651],
        [0.0048, 0.9952],
        [0.1163, 0.8837],
        [0.1256, 0.8744]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5376, 0.4624],
        [0.3273, 0.6727],
        [0.2391, 0.7609],
        [0.2512, 0.7488],
        [0.0501, 0.9499],
        [0.1677, 0.8323],
        [0.2441, 0.7559],
        [0.2702, 0.7298],
        [0.7998, 0.2002],
        [0.9142, 0.0858],
        [0.6123, 0.3877],
        [0.8342, 0.1658]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4867, 0.5133],
        [0.8399, 0.1601],
        [0.9300, 0.0700],
        [0.0463, 0.9537],
        [0.5257, 0.4743],
        [0.7423, 0.2577],
        [0.7010, 0.2990],
        [0.0180, 0.9820],
        [0.0460, 0.9540],
        [0.9097, 0.0903],
        [0.9583, 0.0417],
        [0.8626, 0.1374]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3499, 0.6501],
        [0.1743, 0.8257],
        [0.7111, 0.2889],
        [0.0820, 0.9180],
        [0.0093, 0.9907],
        [0.7825, 0.2175],
        [0.3282, 0.6718],
        [0.3446, 0.6554],
        [0.2656, 0.7344],
        [0.0785, 0.9215],
        [0.0041, 0.9959],
        [0.0236, 0.9764]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.9296, 0.0704],
        [0.8977, 0.1023],
        [0.0014, 0.9986],
        [0.0133, 0.9867],
        [0.3112, 0.6888],
        [0.3008, 0.6992],
        [0.0308, 0.9692],
        [0.8408, 0.1592],
        [0.3903, 0.6097],
        [0.0060, 0.9940],
        [0.6640, 0.3360],
        [0.2319, 0.7681]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.9276, 0.0724],
        [0.0253, 0.9747],
        [0.0204, 0.9796],
        [0.0458, 0.9542],
        [0.0932, 0.9068],
        [0.0879, 0.9121],
        [0.3602, 0.6398],
        [0.6053, 0.3947],
        [0.2002, 0.7998],
        [0.0521, 0.9479],
        [0.0054, 0.9946],
        [0.0783, 0.9217]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3209, 0.6791],
        [0.2080, 0.7920],
        [0.4010, 0.5990],
        [0.2062, 0.7938],
        [0.3114, 0.6886],
        [0.0767, 0.9233],
        [0.6683, 0.3317],
        [0.0110, 0.9890],
        [0.2832, 0.7168],
        [0.0113, 0.9887],
        [0.7319, 0.2681],
        [0.1117, 0.8883]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.7001, 0.2999],
        [0.0061, 0.9939],
        [0.0486, 0.9514],
        [0.8873, 0.1127],
        [0.0521, 0.9479],
        [0.0074, 0.9926],
        [0.8909, 0.1091],
        [0.0283, 0.9717],
        [0.0129, 0.9871],
        [0.0089, 0.9911],
        [0.0226, 0.9774],
        [0.0015, 0.9985]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.8912, 0.1088],
        [0.0789, 0.9211],
        [0.0139, 0.9861],
        [0.6275, 0.3725],
        [0.0262, 0.9738],
        [0.0043, 0.9957],
        [0.0420, 0.9580],
        [0.8287, 0.1713],
        [0.9356, 0.0644],
        [0.0243, 0.9757],
        [0.1959, 0.8041],
        [0.0073, 0.9927]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.2905, 0.7095],
        [0.8754, 0.1246],
        [0.2168, 0.7832],
        [0.0176, 0.9824],
        [0.1112, 0.8888],
        [0.4209, 0.5791],
        [0.0361, 0.9639],
        [0.0023, 0.9977],
        [0.4085, 0.5915],
        [0.7077, 0.2923],
        [0.8024, 0.1976],
        [0.2445, 0.7555]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.8779, 0.1221],
        [0.0809, 0.9191],
        [0.3350, 0.6650],
        [0.0648, 0.9352],
        [0.0126, 0.9874],
        [0.3616, 0.6384],
        [0.0256, 0.9744],
        [0.9570, 0.0430],
        [0.0099, 0.9901],
        [0.4164, 0.5836],
        [0.0507, 0.9493],
        [0.1592, 0.8408]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0119, 0.9881],
        [0.0642, 0.9358],
        [0.1359, 0.8641],
        [0.0144, 0.9856],
        [0.2766, 0.7234],
        [0.1735, 0.8265],
        [0.0046, 0.9954],
        [0.0334, 0.9666],
        [0.8970, 0.1030],
        [0.1190, 0.8810],
        [0.0019, 0.9981],
        [0.0089, 0.9911]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0025, 0.9975],
        [0.0445, 0.9555],
        [0.0710, 0.9290],
        [0.0132, 0.9868],
        [0.0181, 0.9819],
        [0.0057, 0.9943],
        [0.0763, 0.9237],
        [0.0031, 0.9969],
        [0.7952, 0.2048],
        [0.4737, 0.5263],
        [0.1089, 0.8911],
        [0.4902, 0.5098]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0094, 0.9906],
        [0.0029, 0.9971],
        [0.5337, 0.4663],
        [0.0084, 0.9916],
        [0.0036, 0.9964],
        [0.0042, 0.9958],
        [0.0065, 0.9935],
        [0.0483, 0.9517],
        [0.1701, 0.8299],
        [0.0126, 0.9874],
        [0.4459, 0.5541],
        [0.0116, 0.9884]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0321, 0.9679],
        [0.0522, 0.9478],
        [0.3247, 0.6753],
        [0.0371, 0.9629],
        [0.0226, 0.9774],
        [0.1817, 0.8183],
        [0.3783, 0.6217],
        [0.1963, 0.8037],
        [0.5247, 0.4753],
        [0.0746, 0.9254],
        [0.1222, 0.8778],
        [0.0021, 0.9979]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0129, 0.9871],
        [0.6524, 0.3476],
        [0.0047, 0.9953],
        [0.4843, 0.5157],
        [0.1746, 0.8254],
        [0.5574, 0.4426],
        [0.4314, 0.5686],
        [0.8471, 0.1529],
        [0.1458, 0.8542],
        [0.0048, 0.9952],
        [0.1054, 0.8946],
        [0.8733, 0.1267]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0125, 0.9875],
        [0.0614, 0.9386],
        [0.0082, 0.9918],
        [0.7997, 0.2003],
        [0.1704, 0.8296],
        [0.0024, 0.9976],
        [0.3857, 0.6143],
        [0.4554, 0.5446],
        [0.0534, 0.9466],
        [0.0046, 0.9954],
        [0.0022, 0.9978],
        [0.0075, 0.9925]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[1.0019e-03, 9.9900e-01],
        [9.5905e-04, 9.9904e-01],
        [9.0364e-02, 9.0964e-01],
        [4.2292e-01, 5.7708e-01],
        [3.1224e-03, 9.9688e-01],
        [1.5075e-02, 9.8493e-01],
        [4.8005e-03, 9.9520e-01],
        [5.6921e-03, 9.9431e-01],
        [1.7295e-01, 8.2705e-01],
        [7.3677e-04, 9.9926e-01],
        [8.1419e-03, 9.9186e-01],
        [3.9836e-01, 6.0164e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[1.4812e-02, 9.8519e-01],
        [9.6809e-04, 9.9903e-01],
        [7.4211e-04, 9.9926e-01],
        [8.4253e-03, 9.9157e-01],
        [3.3389e-02, 9.6661e-01],
        [1.8993e-03, 9.9810e-01],
        [4.2492e-02, 9.5751e-01],
        [4.2531e-02, 9.5747e-01],
        [3.9497e-03, 9.9605e-01],
        [1.5660e-02, 9.8434e-01],
        [1.6254e-02, 9.8375e-01],
        [6.2802e-04, 9.9937e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[1.6041e-02, 9.8396e-01],
        [7.1855e-02, 9.2815e-01],
        [9.4939e-03, 9.9051e-01],
        [1.7259e-02, 9.8274e-01],
        [4.3432e-03, 9.9566e-01],
        [6.0426e-02, 9.3957e-01],
        [1.4342e-03, 9.9857e-01],
        [3.4211e-01, 6.5789e-01],
        [1.1480e-02, 9.8852e-01],
        [1.5745e-02, 9.8425e-01],
        [1.9192e-02, 9.8081e-01],
        [7.4605e-04, 9.9925e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[1.3718e-03, 9.9863e-01],
        [2.0040e-01, 7.9960e-01],
        [2.9557e-02, 9.7044e-01],
        [1.4803e-02, 9.8520e-01],
        [1.2289e-02, 9.8771e-01],
        [2.4916e-03, 9.9751e-01],
        [6.6666e-04, 9.9933e-01],
        [1.0757e-02, 9.8924e-01],
        [1.3821e-01, 8.6179e-01],
        [1.3020e-02, 9.8698e-01],
        [1.4021e-01, 8.5979e-01],
        [2.8536e-03, 9.9715e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[2.1650e-01, 7.8350e-01],
        [4.8300e-01, 5.1700e-01],
        [1.3321e-02, 9.8668e-01],
        [7.2950e-03, 9.9270e-01],
        [7.3356e-04, 9.9927e-01],
        [1.0154e-02, 9.8985e-01],
        [1.5580e-01, 8.4420e-01],
        [7.5422e-01, 2.4578e-01],
        [1.2437e-03, 9.9876e-01],
        [5.1597e-03, 9.9484e-01],
        [1.0859e-03, 9.9891e-01],
        [8.5835e-01, 1.4165e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0227, 0.9773],
        [0.0152, 0.9848],
        [0.2870, 0.7130],
        [0.0013, 0.9987],
        [0.0269, 0.9731],
        [0.0273, 0.9727],
        [0.1771, 0.8229],
        [0.0053, 0.9947],
        [0.2334, 0.7666],
        [0.3493, 0.6507],
        [0.0026, 0.9974],
        [0.0775, 0.9225]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0018, 0.9982],
        [0.4887, 0.5113],
        [0.0020, 0.9980],
        [0.0371, 0.9629],
        [0.3726, 0.6274],
        [0.0303, 0.9697],
        [0.0069, 0.9931],
        [0.0035, 0.9965],
        [0.2725, 0.7275],
        [0.1225, 0.8775],
        [0.1605, 0.8395],
        [0.7167, 0.2833]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[2.2420e-03, 9.9776e-01],
        [5.5169e-02, 9.4483e-01],
        [9.5123e-01, 4.8773e-02],
        [2.0569e-02, 9.7943e-01],
        [7.2220e-03, 9.9278e-01],
        [2.5294e-03, 9.9747e-01],
        [7.8507e-04, 9.9921e-01],
        [3.6846e-03, 9.9632e-01],
        [2.9024e-03, 9.9710e-01],
        [1.7560e-02, 9.8244e-01],
        [1.5638e-01, 8.4362e-01],
        [5.5763e-03, 9.9442e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0461, 0.9539],
        [0.0089, 0.9911],
        [0.0105, 0.9895],
        [0.0163, 0.9837],
        [0.0183, 0.9817],
        [0.0174, 0.9826],
        [0.0097, 0.9903],
        [0.0033, 0.9967],
        [0.0330, 0.9670],
        [0.1023, 0.8977],
        [0.0312, 0.9688],
        [0.0035, 0.9965]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.0019, 0.9981],
        [0.0290, 0.9710],
        [0.0068, 0.9932],
        [0.0118, 0.9882],
        [0.0115, 0.9885],
        [0.0791, 0.9209],
        [0.0124, 0.9876],
        [0.3772, 0.6228],
        [0.8988, 0.1012],
        [0.0144, 0.9856],
        [0.3847, 0.6153],
        [0.0089, 0.9911]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.0076, 0.9924],
        [0.0086, 0.9914],
        [0.0029, 0.9971],
        [0.9564, 0.0436],
        [0.0086, 0.9914],
        [0.0101, 0.9899],
        [0.9436, 0.0564],
        [0.0476, 0.9524],
        [0.1439, 0.8561],
        [0.0179, 0.9821],
        [0.0096, 0.9904],
        [0.0104, 0.9896]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.0022, 0.9978],
        [0.0019, 0.9981],
        [0.0983, 0.9017],
        [0.5498, 0.4502],
        [0.0080, 0.9920],
        [0.0028, 0.9972],
        [0.0050, 0.9950],
        [0.2113, 0.7887],
        [0.0259, 0.9741],
        [0.0042, 0.9958],
        [0.0059, 0.9941],
        [0.0273, 0.9727]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.0916, 0.9084],
        [0.0035, 0.9965],
        [0.0057, 0.9943],
        [0.0063, 0.9937],
        [0.0078, 0.9922],
        [0.0055, 0.9945],
        [0.0085, 0.9915],
        [0.0328, 0.9672],
        [0.0213, 0.9787],
        [0.0712, 0.9288],
        [0.0044, 0.9956],
        [0.0026, 0.9974]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.0600, 0.9400],
        [0.1002, 0.8998],
        [0.0058, 0.9942],
        [0.0057, 0.9943],
        [0.0238, 0.9762],
        [0.0145, 0.9855],
        [0.0044, 0.9956],
        [0.0070, 0.9930],
        [0.0054, 0.9946],
        [0.0100, 0.9900],
        [0.0032, 0.9968],
        [0.0070, 0.9930]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.0057, 0.9943],
        [0.1042, 0.8958],
        [0.0121, 0.9879],
        [0.0065, 0.9935],
        [0.0039, 0.9961],
        [0.0140, 0.9860],
        [0.0035, 0.9965],
        [0.0013, 0.9987],
        [0.0072, 0.9928],
        [0.0066, 0.9934],
        [0.1778, 0.8222],
        [0.0715, 0.9285]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[9.1265e-03, 9.9087e-01],
        [3.1396e-03, 9.9686e-01],
        [1.2921e-02, 9.8708e-01],
        [3.7751e-03, 9.9622e-01],
        [9.9130e-04, 9.9901e-01],
        [2.0541e-03, 9.9795e-01],
        [1.3353e-02, 9.8665e-01],
        [9.4299e-03, 9.9057e-01],
        [3.0462e-03, 9.9695e-01],
        [7.1670e-03, 9.9283e-01],
        [1.4194e-03, 9.9858e-01],
        [3.3225e-03, 9.9668e-01]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.0257, 0.9743],
        [0.0166, 0.9834],
        [0.0017, 0.9983],
        [0.0032, 0.9968],
        [0.0035, 0.9965],
        [0.0025, 0.9975],
        [0.0036, 0.9964],
        [0.0017, 0.9983],
        [0.0103, 0.9897],
        [0.0022, 0.9978],
        [0.0012, 0.9988],
        [0.0022, 0.9978]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.0011, 0.9989],
        [0.0119, 0.9881],
        [0.0020, 0.9980],
        [0.1861, 0.8139],
        [0.0468, 0.9532],
        [0.0066, 0.9934],
        [0.0593, 0.9407],
        [0.0523, 0.9477],
        [0.6378, 0.3622],
        [0.7221, 0.2779],
        [0.0199, 0.9801],
        [0.3207, 0.6793]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.0060, 0.9940],
        [0.0680, 0.9320],
        [0.6291, 0.3709],
        [0.0033, 0.9967],
        [0.0052, 0.9948],
        [0.0055, 0.9945],
        [0.0105, 0.9895],
        [0.0085, 0.9915],
        [0.0134, 0.9866],
        [0.0245, 0.9755],
        [0.0153, 0.9847],
        [0.0351, 0.9649]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.0086, 0.9914],
        [0.0101, 0.9899],
        [0.1365, 0.8635],
        [0.0025, 0.9975],
        [0.0026, 0.9974],
        [0.0861, 0.9139],
        [0.0101, 0.9899],
        [0.0033, 0.9967],
        [0.0070, 0.9930],
        [0.0056, 0.9944],
        [0.0020, 0.9980],
        [0.0037, 0.9963]], device='cuda:0', grad_fn=<SoftmaxBackward>)
