Iter #50: tensor([[0.5313, 0.4687],
        [0.5045, 0.4955],
        [0.4424, 0.5576],
        [0.4496, 0.5504],
        [0.4352, 0.5648],
        [0.5286, 0.4714],
        [0.4752, 0.5248],
        [0.4862, 0.5138],
        [0.5178, 0.4822],
        [0.5223, 0.4777],
        [0.4843, 0.5157],
        [0.4530, 0.5470]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4939, 0.5061],
        [0.4924, 0.5076],
        [0.4938, 0.5062],
        [0.4991, 0.5009],
        [0.4965, 0.5035],
        [0.5128, 0.4872],
        [0.5362, 0.4638],
        [0.3584, 0.6416],
        [0.4505, 0.5495],
        [0.4831, 0.5169],
        [0.4644, 0.5357],
        [0.4486, 0.5514]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.5553, 0.4447],
        [0.5402, 0.4598],
        [0.5105, 0.4895],
        [0.5030, 0.4970],
        [0.4972, 0.5028],
        [0.4899, 0.5101],
        [0.4906, 0.5094],
        [0.4871, 0.5129],
        [0.4561, 0.5439],
        [0.5291, 0.4709],
        [0.4545, 0.5455],
        [0.4776, 0.5224]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4240, 0.5760],
        [0.4399, 0.5601],
        [0.4597, 0.5403],
        [0.4808, 0.5192],
        [0.4492, 0.5508],
        [0.4853, 0.5147],
        [0.4788, 0.5212],
        [0.4974, 0.5026],
        [0.4967, 0.5033],
        [0.4368, 0.5632],
        [0.4935, 0.5065],
        [0.4864, 0.5136]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4663, 0.5337],
        [0.4541, 0.5459],
        [0.4998, 0.5002],
        [0.4351, 0.5649],
        [0.4839, 0.5161],
        [0.4345, 0.5655],
        [0.4613, 0.5387],
        [0.4832, 0.5168],
        [0.4503, 0.5497],
        [0.4656, 0.5344],
        [0.4298, 0.5702],
        [0.4991, 0.5009]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4407, 0.5593],
        [0.5232, 0.4768],
        [0.4356, 0.5644],
        [0.4803, 0.5197],
        [0.5525, 0.4475],
        [0.5590, 0.4410],
        [0.5329, 0.4671],
        [0.4790, 0.5210],
        [0.4532, 0.5468],
        [0.4293, 0.5707],
        [0.5797, 0.4203],
        [0.4926, 0.5074]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4593, 0.5407],
        [0.4452, 0.5548],
        [0.5000, 0.5000],
        [0.3965, 0.6035],
        [0.4781, 0.5219],
        [0.5137, 0.4863],
        [0.4800, 0.5200],
        [0.4750, 0.5250],
        [0.4700, 0.5300],
        [0.5171, 0.4829],
        [0.4711, 0.5289],
        [0.4752, 0.5248]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4567, 0.5433],
        [0.4056, 0.5944],
        [0.4920, 0.5080],
        [0.4547, 0.5453],
        [0.4933, 0.5067],
        [0.4451, 0.5549],
        [0.4481, 0.5519],
        [0.4261, 0.5739],
        [0.4404, 0.5596],
        [0.4654, 0.5346],
        [0.4402, 0.5598],
        [0.4215, 0.5785]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4611, 0.5389],
        [0.4630, 0.5370],
        [0.4848, 0.5152],
        [0.4747, 0.5253],
        [0.4818, 0.5182],
        [0.4894, 0.5106],
        [0.4464, 0.5536],
        [0.4923, 0.5077],
        [0.4460, 0.5540],
        [0.4328, 0.5672],
        [0.4837, 0.5163],
        [0.5021, 0.4979]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4146, 0.5854],
        [0.4468, 0.5532],
        [0.5036, 0.4964],
        [0.4040, 0.5960],
        [0.4886, 0.5114],
        [0.4539, 0.5461],
        [0.4841, 0.5159],
        [0.4795, 0.5205],
        [0.4153, 0.5847],
        [0.4639, 0.5361],
        [0.4653, 0.5347],
        [0.3270, 0.6730]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4330, 0.5670],
        [0.4892, 0.5108],
        [0.4847, 0.5153],
        [0.4322, 0.5678],
        [0.4688, 0.5312],
        [0.4985, 0.5015],
        [0.4155, 0.5845],
        [0.4789, 0.5211],
        [0.4243, 0.5757],
        [0.5344, 0.4656],
        [0.5011, 0.4989],
        [0.4659, 0.5341]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4360, 0.5640],
        [0.3650, 0.6350],
        [0.4664, 0.5336],
        [0.4536, 0.5464],
        [0.4649, 0.5351],
        [0.4874, 0.5126],
        [0.4773, 0.5227],
        [0.4782, 0.5218],
        [0.3871, 0.6129],
        [0.4565, 0.5435],
        [0.4909, 0.5091],
        [0.4593, 0.5407]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3898, 0.6102],
        [0.4656, 0.5344],
        [0.4320, 0.5680],
        [0.4517, 0.5483],
        [0.5078, 0.4922],
        [0.4922, 0.5078],
        [0.4667, 0.5333],
        [0.4383, 0.5617],
        [0.4556, 0.5444],
        [0.4217, 0.5783],
        [0.3756, 0.6244],
        [0.4821, 0.5179]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4344, 0.5656],
        [0.5016, 0.4984],
        [0.5021, 0.4979],
        [0.4274, 0.5726],
        [0.5138, 0.4862],
        [0.4722, 0.5278],
        [0.5002, 0.4998],
        [0.4725, 0.5275],
        [0.4826, 0.5174],
        [0.5513, 0.4487],
        [0.4503, 0.5497],
        [0.4256, 0.5744]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4799, 0.5201],
        [0.4585, 0.5415],
        [0.4462, 0.5538],
        [0.5053, 0.4947],
        [0.3838, 0.6162],
        [0.5167, 0.4833],
        [0.4434, 0.5566],
        [0.5237, 0.4763],
        [0.5003, 0.4997],
        [0.4738, 0.5262],
        [0.4872, 0.5128],
        [0.4384, 0.5616]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4816, 0.5184],
        [0.4931, 0.5069],
        [0.4564, 0.5436],
        [0.4704, 0.5296],
        [0.5195, 0.4805],
        [0.5221, 0.4779],
        [0.5157, 0.4843],
        [0.3993, 0.6007],
        [0.4101, 0.5899],
        [0.4320, 0.5680],
        [0.4856, 0.5144],
        [0.4134, 0.5866]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4594, 0.5406],
        [0.5120, 0.4880],
        [0.4581, 0.5419],
        [0.4789, 0.5211],
        [0.5270, 0.4730],
        [0.4838, 0.5162],
        [0.4060, 0.5940],
        [0.4837, 0.5163],
        [0.4557, 0.5443],
        [0.4750, 0.5250],
        [0.4895, 0.5105],
        [0.4239, 0.5761]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4902, 0.5098],
        [0.4940, 0.5060],
        [0.4449, 0.5551],
        [0.3591, 0.6409],
        [0.4789, 0.5211],
        [0.5243, 0.4757],
        [0.4933, 0.5067],
        [0.3961, 0.6039],
        [0.4717, 0.5283],
        [0.4819, 0.5181],
        [0.5144, 0.4856],
        [0.4915, 0.5085]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4634, 0.5366],
        [0.4853, 0.5147],
        [0.4870, 0.5130],
        [0.4232, 0.5768],
        [0.4375, 0.5625],
        [0.4113, 0.5887],
        [0.4618, 0.5382],
        [0.4828, 0.5172],
        [0.4821, 0.5179],
        [0.3879, 0.6121],
        [0.4883, 0.5117],
        [0.3535, 0.6465]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4163, 0.5837],
        [0.3651, 0.6349],
        [0.4029, 0.5971],
        [0.4318, 0.5682],
        [0.4330, 0.5670],
        [0.4378, 0.5622],
        [0.3787, 0.6213],
        [0.4893, 0.5107],
        [0.4517, 0.5483],
        [0.4291, 0.5709],
        [0.5011, 0.4989],
        [0.4271, 0.5729]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4486, 0.5514],
        [0.4836, 0.5164],
        [0.5044, 0.4956],
        [0.4087, 0.5913],
        [0.4879, 0.5121],
        [0.4449, 0.5551],
        [0.4431, 0.5569],
        [0.5177, 0.4823],
        [0.4605, 0.5395],
        [0.4737, 0.5263],
        [0.4427, 0.5573],
        [0.4017, 0.5983]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4315, 0.5685],
        [0.4127, 0.5873],
        [0.4112, 0.5888],
        [0.4292, 0.5708],
        [0.4571, 0.5429],
        [0.4771, 0.5229],
        [0.4076, 0.5924],
        [0.4418, 0.5582],
        [0.3932, 0.6068],
        [0.4681, 0.5319],
        [0.4975, 0.5025],
        [0.4565, 0.5435]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3772, 0.6228],
        [0.4458, 0.5542],
        [0.4092, 0.5908],
        [0.4361, 0.5639],
        [0.3970, 0.6030],
        [0.4546, 0.5454],
        [0.4217, 0.5783],
        [0.4316, 0.5684],
        [0.3878, 0.6122],
        [0.3353, 0.6647],
        [0.5027, 0.4973],
        [0.4333, 0.5667]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3342, 0.6658],
        [0.3672, 0.6328],
        [0.4539, 0.5461],
        [0.4328, 0.5672],
        [0.4852, 0.5148],
        [0.4539, 0.5461],
        [0.3751, 0.6249],
        [0.4499, 0.5501],
        [0.4122, 0.5878],
        [0.3991, 0.6009],
        [0.3697, 0.6303],
        [0.3839, 0.6161]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.5014, 0.4986],
        [0.4683, 0.5317],
        [0.4067, 0.5933],
        [0.4035, 0.5965],
        [0.3923, 0.6077],
        [0.4216, 0.5784],
        [0.4515, 0.5485],
        [0.4493, 0.5507],
        [0.4007, 0.5993],
        [0.4553, 0.5447],
        [0.3817, 0.6183],
        [0.4610, 0.5390]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3704, 0.6296],
        [0.4372, 0.5628],
        [0.3536, 0.6464],
        [0.4300, 0.5700],
        [0.4162, 0.5838],
        [0.4535, 0.5465],
        [0.3692, 0.6308],
        [0.4350, 0.5650],
        [0.4695, 0.5305],
        [0.4221, 0.5779],
        [0.4572, 0.5428],
        [0.4306, 0.5694]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4239, 0.5761],
        [0.4410, 0.5590],
        [0.4668, 0.5332],
        [0.4412, 0.5588],
        [0.4890, 0.5110],
        [0.4810, 0.5190],
        [0.3961, 0.6039],
        [0.4492, 0.5508],
        [0.4625, 0.5375],
        [0.4344, 0.5656],
        [0.4243, 0.5757],
        [0.4293, 0.5707]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4342, 0.5658],
        [0.4600, 0.5400],
        [0.3973, 0.6027],
        [0.4134, 0.5866],
        [0.4429, 0.5571],
        [0.5235, 0.4765],
        [0.4808, 0.5192],
        [0.4118, 0.5882],
        [0.3992, 0.6008],
        [0.4568, 0.5432],
        [0.4640, 0.5360],
        [0.4119, 0.5881]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4339, 0.5661],
        [0.4724, 0.5276],
        [0.4238, 0.5762],
        [0.4019, 0.5981],
        [0.4016, 0.5984],
        [0.4813, 0.5187],
        [0.4489, 0.5511],
        [0.4063, 0.5937],
        [0.3814, 0.6186],
        [0.4068, 0.5932],
        [0.4365, 0.5635],
        [0.4247, 0.5753]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4241, 0.5759],
        [0.4045, 0.5955],
        [0.3924, 0.6076],
        [0.4040, 0.5960],
        [0.4216, 0.5784],
        [0.4015, 0.5985],
        [0.4504, 0.5496],
        [0.4569, 0.5431],
        [0.4127, 0.5873],
        [0.3854, 0.6146],
        [0.4457, 0.5543],
        [0.4133, 0.5867]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4091, 0.5909],
        [0.4398, 0.5602],
        [0.4292, 0.5708],
        [0.4561, 0.5439],
        [0.4549, 0.5451],
        [0.4472, 0.5528],
        [0.4335, 0.5665],
        [0.4784, 0.5216],
        [0.4378, 0.5622],
        [0.4002, 0.5998],
        [0.4842, 0.5158],
        [0.3991, 0.6009]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4620, 0.5380],
        [0.4762, 0.5238],
        [0.5187, 0.4813],
        [0.4551, 0.5449],
        [0.3829, 0.6171],
        [0.4381, 0.5619],
        [0.4109, 0.5891],
        [0.4046, 0.5954],
        [0.4208, 0.5792],
        [0.4508, 0.5492],
        [0.4865, 0.5135],
        [0.4458, 0.5542]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4043, 0.5957],
        [0.4720, 0.5280],
        [0.4779, 0.5221],
        [0.4450, 0.5550],
        [0.4152, 0.5848],
        [0.4511, 0.5489],
        [0.4257, 0.5743],
        [0.4305, 0.5695],
        [0.4440, 0.5560],
        [0.4405, 0.5595],
        [0.4292, 0.5708],
        [0.4826, 0.5174]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4197, 0.5803],
        [0.3960, 0.6040],
        [0.4160, 0.5840],
        [0.4202, 0.5798],
        [0.4224, 0.5776],
        [0.4533, 0.5467],
        [0.3936, 0.6064],
        [0.4143, 0.5857],
        [0.4414, 0.5586],
        [0.4482, 0.5518],
        [0.4341, 0.5659],
        [0.4039, 0.5961]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3639, 0.6361],
        [0.4118, 0.5882],
        [0.4436, 0.5564],
        [0.4355, 0.5645],
        [0.4341, 0.5659],
        [0.4218, 0.5782],
        [0.4463, 0.5537],
        [0.3997, 0.6003],
        [0.4292, 0.5708],
        [0.4625, 0.5375],
        [0.4061, 0.5939],
        [0.4225, 0.5775]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4838, 0.5162],
        [0.4497, 0.5503],
        [0.4426, 0.5574],
        [0.3705, 0.6295],
        [0.4511, 0.5489],
        [0.4460, 0.5540],
        [0.3979, 0.6021],
        [0.4485, 0.5515],
        [0.4407, 0.5593],
        [0.4962, 0.5038],
        [0.4024, 0.5976],
        [0.4882, 0.5118]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4262, 0.5738],
        [0.4503, 0.5497],
        [0.4261, 0.5739],
        [0.4547, 0.5453],
        [0.4472, 0.5528],
        [0.4078, 0.5922],
        [0.4225, 0.5775],
        [0.4843, 0.5157],
        [0.5026, 0.4974],
        [0.4309, 0.5691],
        [0.4579, 0.5421],
        [0.4268, 0.5732]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4858, 0.5142],
        [0.4141, 0.5859],
        [0.5078, 0.4922],
        [0.4309, 0.5691],
        [0.4439, 0.5561],
        [0.4682, 0.5318],
        [0.4329, 0.5671],
        [0.3972, 0.6028],
        [0.3720, 0.6280],
        [0.4822, 0.5178],
        [0.4422, 0.5578],
        [0.4457, 0.5543]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3806, 0.6194],
        [0.4364, 0.5636],
        [0.4310, 0.5690],
        [0.4542, 0.5458],
        [0.5295, 0.4705],
        [0.5484, 0.4516],
        [0.4245, 0.5755],
        [0.3947, 0.6053],
        [0.4219, 0.5781],
        [0.4178, 0.5822],
        [0.4494, 0.5506],
        [0.4028, 0.5972]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4996, 0.5004],
        [0.4490, 0.5510],
        [0.4714, 0.5286],
        [0.4780, 0.5220],
        [0.4635, 0.5365],
        [0.5002, 0.4998],
        [0.4228, 0.5772],
        [0.4561, 0.5439],
        [0.4190, 0.5810],
        [0.3940, 0.6060],
        [0.4684, 0.5316],
        [0.4235, 0.5765]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4522, 0.5478],
        [0.4186, 0.5814],
        [0.4527, 0.5473],
        [0.4172, 0.5828],
        [0.4555, 0.5445],
        [0.4166, 0.5834],
        [0.4343, 0.5657],
        [0.4536, 0.5464],
        [0.4513, 0.5487],
        [0.4734, 0.5266],
        [0.4490, 0.5510],
        [0.4513, 0.5487]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3667, 0.6333],
        [0.4126, 0.5874],
        [0.4148, 0.5852],
        [0.4556, 0.5444],
        [0.4518, 0.5482],
        [0.4494, 0.5506],
        [0.3523, 0.6477],
        [0.4602, 0.5398],
        [0.4675, 0.5325],
        [0.4790, 0.5210],
        [0.4765, 0.5235],
        [0.4307, 0.5693]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4375, 0.5625],
        [0.4432, 0.5568],
        [0.4619, 0.5381],
        [0.4982, 0.5018],
        [0.4290, 0.5710],
        [0.3824, 0.6176],
        [0.4441, 0.5559],
        [0.4614, 0.5386],
        [0.4713, 0.5287],
        [0.4615, 0.5385],
        [0.4435, 0.5565],
        [0.4407, 0.5593]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3509, 0.6491],
        [0.4226, 0.5774],
        [0.3623, 0.6377],
        [0.4246, 0.5754],
        [0.4259, 0.5741],
        [0.4390, 0.5610],
        [0.3700, 0.6300],
        [0.4166, 0.5834],
        [0.4117, 0.5883],
        [0.5027, 0.4973],
        [0.4903, 0.5097],
        [0.4471, 0.5529]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4731, 0.5269],
        [0.3932, 0.6068],
        [0.4683, 0.5317],
        [0.4634, 0.5366],
        [0.5145, 0.4855],
        [0.4430, 0.5570],
        [0.4216, 0.5784],
        [0.4715, 0.5285],
        [0.4383, 0.5617],
        [0.4330, 0.5670],
        [0.4311, 0.5689],
        [0.4289, 0.5711]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4438, 0.5562],
        [0.4411, 0.5589],
        [0.4763, 0.5237],
        [0.4549, 0.5451],
        [0.4438, 0.5562],
        [0.4612, 0.5388],
        [0.4514, 0.5486],
        [0.4825, 0.5175],
        [0.4925, 0.5075],
        [0.4737, 0.5263],
        [0.4502, 0.5498],
        [0.4748, 0.5252]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4304, 0.5696],
        [0.4880, 0.5120],
        [0.3975, 0.6025],
        [0.4077, 0.5923],
        [0.4650, 0.5350],
        [0.5070, 0.4930],
        [0.4754, 0.5246],
        [0.4868, 0.5132],
        [0.4807, 0.5193],
        [0.5173, 0.4827],
        [0.4342, 0.5658],
        [0.4846, 0.5154]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4924, 0.5076],
        [0.4908, 0.5092],
        [0.5017, 0.4983],
        [0.4489, 0.5511],
        [0.4500, 0.5500],
        [0.4971, 0.5029],
        [0.3991, 0.6009],
        [0.4778, 0.5222],
        [0.4526, 0.5474],
        [0.4393, 0.5607],
        [0.4602, 0.5398],
        [0.4576, 0.5424]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5453, 0.4547],
        [0.4666, 0.5334],
        [0.4424, 0.5576],
        [0.4679, 0.5321],
        [0.4557, 0.5443],
        [0.4950, 0.5050],
        [0.5118, 0.4882],
        [0.4101, 0.5899],
        [0.4614, 0.5386],
        [0.4822, 0.5178],
        [0.5096, 0.4904],
        [0.5074, 0.4926]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4519, 0.5481],
        [0.4876, 0.5124],
        [0.4663, 0.5337],
        [0.4753, 0.5247],
        [0.5134, 0.4866],
        [0.5400, 0.4600],
        [0.4587, 0.5413],
        [0.4856, 0.5144],
        [0.4902, 0.5098],
        [0.4807, 0.5193],
        [0.4606, 0.5394],
        [0.3912, 0.6088]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5128, 0.4872],
        [0.4225, 0.5775],
        [0.3866, 0.6134],
        [0.3970, 0.6030],
        [0.4208, 0.5792],
        [0.5196, 0.4804],
        [0.4102, 0.5898],
        [0.4518, 0.5482],
        [0.4109, 0.5891],
        [0.4410, 0.5590],
        [0.4354, 0.5646],
        [0.4865, 0.5135]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4329, 0.5671],
        [0.4488, 0.5512],
        [0.4110, 0.5890],
        [0.4350, 0.5650],
        [0.4238, 0.5762],
        [0.4458, 0.5542],
        [0.5045, 0.4955],
        [0.4571, 0.5429],
        [0.4502, 0.5498],
        [0.4540, 0.5460],
        [0.4117, 0.5883],
        [0.4368, 0.5632]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4281, 0.5719],
        [0.4783, 0.5217],
        [0.4252, 0.5748],
        [0.4633, 0.5367],
        [0.4790, 0.5210],
        [0.4726, 0.5274],
        [0.4316, 0.5684],
        [0.4948, 0.5052],
        [0.4695, 0.5305],
        [0.4593, 0.5407],
        [0.4927, 0.5073],
        [0.4006, 0.5994]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4496, 0.5504],
        [0.4905, 0.5095],
        [0.5189, 0.4811],
        [0.4515, 0.5486],
        [0.4207, 0.5793],
        [0.5028, 0.4972],
        [0.3949, 0.6051],
        [0.4689, 0.5311],
        [0.3849, 0.6151],
        [0.3871, 0.6129],
        [0.4585, 0.5415],
        [0.4303, 0.5697]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4585, 0.5415],
        [0.4745, 0.5255],
        [0.4054, 0.5946],
        [0.4761, 0.5239],
        [0.4671, 0.5329],
        [0.4276, 0.5724],
        [0.4468, 0.5532],
        [0.4313, 0.5687],
        [0.5021, 0.4979],
        [0.5110, 0.4890],
        [0.4516, 0.5484],
        [0.5454, 0.4546]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4703, 0.5297],
        [0.3571, 0.6429],
        [0.3985, 0.6015],
        [0.4710, 0.5290],
        [0.4872, 0.5128],
        [0.4876, 0.5124],
        [0.4909, 0.5091],
        [0.4959, 0.5041],
        [0.4558, 0.5442],
        [0.4266, 0.5734],
        [0.4608, 0.5392],
        [0.4428, 0.5572]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3717, 0.6283],
        [0.4171, 0.5829],
        [0.5168, 0.4832],
        [0.4464, 0.5536],
        [0.4661, 0.5339],
        [0.4760, 0.5240],
        [0.4361, 0.5639],
        [0.4786, 0.5214],
        [0.4612, 0.5388],
        [0.4855, 0.5145],
        [0.4423, 0.5577],
        [0.4455, 0.5545]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4942, 0.5058],
        [0.4908, 0.5092],
        [0.4178, 0.5822],
        [0.4252, 0.5748],
        [0.4404, 0.5596],
        [0.5777, 0.4223],
        [0.4298, 0.5702],
        [0.4720, 0.5280],
        [0.4686, 0.5314],
        [0.5000, 0.5000],
        [0.3860, 0.6140],
        [0.4688, 0.5312]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4415, 0.5585],
        [0.4546, 0.5454],
        [0.4920, 0.5080],
        [0.4493, 0.5507],
        [0.4944, 0.5056],
        [0.4702, 0.5298],
        [0.4162, 0.5838],
        [0.4667, 0.5333],
        [0.4772, 0.5228],
        [0.4271, 0.5729],
        [0.4319, 0.5681],
        [0.4907, 0.5093]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4741, 0.5259],
        [0.4541, 0.5459],
        [0.4806, 0.5194],
        [0.4106, 0.5894],
        [0.4769, 0.5231],
        [0.4562, 0.5438],
        [0.4290, 0.5710],
        [0.3919, 0.6081],
        [0.4168, 0.5832],
        [0.4566, 0.5434],
        [0.4260, 0.5740],
        [0.4809, 0.5191]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4521, 0.5479],
        [0.4308, 0.5692],
        [0.4534, 0.5466],
        [0.4479, 0.5521],
        [0.4662, 0.5338],
        [0.4921, 0.5079],
        [0.4111, 0.5889],
        [0.4543, 0.5457],
        [0.4287, 0.5713],
        [0.4226, 0.5774],
        [0.4366, 0.5634],
        [0.3981, 0.6019]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4177, 0.5823],
        [0.4667, 0.5333],
        [0.4722, 0.5278],
        [0.4585, 0.5415],
        [0.4417, 0.5583],
        [0.4962, 0.5038],
        [0.4167, 0.5833],
        [0.5123, 0.4877],
        [0.4324, 0.5676],
        [0.4084, 0.5916],
        [0.4451, 0.5549],
        [0.4831, 0.5169]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4172, 0.5828],
        [0.5011, 0.4989],
        [0.5339, 0.4661],
        [0.4652, 0.5348],
        [0.4905, 0.5095],
        [0.5458, 0.4542],
        [0.4203, 0.5797],
        [0.4228, 0.5772],
        [0.4513, 0.5487],
        [0.4306, 0.5694],
        [0.4271, 0.5729],
        [0.4772, 0.5228]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4390, 0.5610],
        [0.4187, 0.5813],
        [0.4097, 0.5903],
        [0.4336, 0.5664],
        [0.4638, 0.5362],
        [0.4283, 0.5717],
        [0.4601, 0.5399],
        [0.4620, 0.5380],
        [0.4126, 0.5874],
        [0.4669, 0.5331],
        [0.4557, 0.5443],
        [0.4287, 0.5713]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3936, 0.6064],
        [0.4474, 0.5526],
        [0.5262, 0.4738],
        [0.4321, 0.5679],
        [0.3831, 0.6169],
        [0.5222, 0.4778],
        [0.4321, 0.5679],
        [0.4731, 0.5269],
        [0.4203, 0.5797],
        [0.4416, 0.5584],
        [0.4136, 0.5864],
        [0.4117, 0.5883]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4399, 0.5601],
        [0.4286, 0.5714],
        [0.4845, 0.5155],
        [0.4942, 0.5058],
        [0.4193, 0.5807],
        [0.4301, 0.5699],
        [0.4635, 0.5365],
        [0.4037, 0.5963],
        [0.4788, 0.5212],
        [0.4455, 0.5545],
        [0.4470, 0.5530],
        [0.4933, 0.5067]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4560, 0.5440],
        [0.4334, 0.5666],
        [0.4261, 0.5739],
        [0.4318, 0.5682],
        [0.5445, 0.4555],
        [0.4324, 0.5676],
        [0.3969, 0.6031],
        [0.4599, 0.5401],
        [0.4514, 0.5486],
        [0.4941, 0.5059],
        [0.4688, 0.5312],
        [0.4409, 0.5591]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3721, 0.6279],
        [0.4726, 0.5274],
        [0.4500, 0.5500],
        [0.4747, 0.5253],
        [0.3684, 0.6316],
        [0.4378, 0.5622],
        [0.3743, 0.6257],
        [0.4154, 0.5846],
        [0.4846, 0.5154],
        [0.4992, 0.5008],
        [0.4374, 0.5626],
        [0.4518, 0.5482]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4644, 0.5356],
        [0.4648, 0.5352],
        [0.4274, 0.5726],
        [0.3975, 0.6025],
        [0.4928, 0.5072],
        [0.5116, 0.4884],
        [0.4208, 0.5792],
        [0.4695, 0.5305],
        [0.4933, 0.5067],
        [0.5097, 0.4903],
        [0.3745, 0.6255],
        [0.4009, 0.5991]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4638, 0.5362],
        [0.5101, 0.4899],
        [0.4293, 0.5707],
        [0.4640, 0.5360],
        [0.5098, 0.4902],
        [0.4471, 0.5529],
        [0.4049, 0.5951],
        [0.4809, 0.5191],
        [0.4646, 0.5354],
        [0.4361, 0.5639],
        [0.4985, 0.5015],
        [0.4737, 0.5263]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4561, 0.5439],
        [0.4389, 0.5611],
        [0.4313, 0.5687],
        [0.3987, 0.6013],
        [0.4204, 0.5796],
        [0.4265, 0.5735],
        [0.4453, 0.5547],
        [0.4302, 0.5698],
        [0.4587, 0.5413],
        [0.4755, 0.5245],
        [0.4658, 0.5342],
        [0.5005, 0.4995]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4393, 0.5607],
        [0.4647, 0.5353],
        [0.4015, 0.5985],
        [0.4978, 0.5022],
        [0.4725, 0.5275],
        [0.5088, 0.4912],
        [0.4495, 0.5505],
        [0.4464, 0.5536],
        [0.4610, 0.5390],
        [0.4747, 0.5253],
        [0.4402, 0.5598],
        [0.4008, 0.5992]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4511, 0.5489],
        [0.5004, 0.4996],
        [0.4288, 0.5712],
        [0.4832, 0.5168],
        [0.4338, 0.5662],
        [0.4808, 0.5192],
        [0.4240, 0.5760],
        [0.4929, 0.5071],
        [0.4096, 0.5904],
        [0.4478, 0.5522],
        [0.4464, 0.5536],
        [0.3956, 0.6044]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4573, 0.5427],
        [0.4031, 0.5969],
        [0.4645, 0.5355],
        [0.4307, 0.5693],
        [0.4722, 0.5278],
        [0.5189, 0.4811],
        [0.4696, 0.5304],
        [0.3748, 0.6252],
        [0.4455, 0.5545],
        [0.4489, 0.5511],
        [0.3810, 0.6190],
        [0.4592, 0.5408]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4745, 0.5255],
        [0.3870, 0.6130],
        [0.4107, 0.5893],
        [0.4136, 0.5864],
        [0.4292, 0.5708],
        [0.4725, 0.5275],
        [0.4664, 0.5336],
        [0.4872, 0.5128],
        [0.4922, 0.5078],
        [0.4088, 0.5912],
        [0.4256, 0.5744],
        [0.3721, 0.6279]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4466, 0.5534],
        [0.4731, 0.5269],
        [0.5124, 0.4876],
        [0.4450, 0.5550],
        [0.3790, 0.6210],
        [0.4720, 0.5280],
        [0.4416, 0.5584],
        [0.4257, 0.5743],
        [0.4726, 0.5274],
        [0.4623, 0.5377],
        [0.4341, 0.5659],
        [0.4523, 0.5477]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4465, 0.5535],
        [0.4640, 0.5360],
        [0.4360, 0.5640],
        [0.4548, 0.5452],
        [0.4208, 0.5792],
        [0.4650, 0.5350],
        [0.3741, 0.6259],
        [0.4471, 0.5529],
        [0.4528, 0.5472],
        [0.5105, 0.4895],
        [0.5100, 0.4900],
        [0.5065, 0.4935]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4415, 0.5585],
        [0.4136, 0.5864],
        [0.3968, 0.6032],
        [0.4426, 0.5574],
        [0.4869, 0.5131],
        [0.4555, 0.5445],
        [0.3950, 0.6050],
        [0.4528, 0.5472],
        [0.4524, 0.5476],
        [0.4718, 0.5282],
        [0.4414, 0.5586],
        [0.4530, 0.5470]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4474, 0.5526],
        [0.4901, 0.5099],
        [0.4910, 0.5090],
        [0.4696, 0.5304],
        [0.4735, 0.5265],
        [0.4703, 0.5297],
        [0.4271, 0.5729],
        [0.5031, 0.4969],
        [0.4962, 0.5038],
        [0.4707, 0.5293],
        [0.4281, 0.5719],
        [0.4766, 0.5234]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4944, 0.5056],
        [0.4762, 0.5238],
        [0.4276, 0.5724],
        [0.3306, 0.6694],
        [0.4563, 0.5437],
        [0.4893, 0.5107],
        [0.4808, 0.5192],
        [0.5322, 0.4678],
        [0.4923, 0.5077],
        [0.4691, 0.5309],
        [0.3961, 0.6039],
        [0.4674, 0.5326]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4218, 0.5782],
        [0.4259, 0.5741],
        [0.4499, 0.5501],
        [0.4770, 0.5230],
        [0.5605, 0.4395],
        [0.4167, 0.5833],
        [0.4236, 0.5764],
        [0.5232, 0.4768],
        [0.5018, 0.4982],
        [0.4844, 0.5156],
        [0.4605, 0.5395],
        [0.4754, 0.5246]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5076, 0.4924],
        [0.4472, 0.5528],
        [0.4251, 0.5749],
        [0.4622, 0.5378],
        [0.4337, 0.5663],
        [0.4582, 0.5418],
        [0.4055, 0.5945],
        [0.4239, 0.5761],
        [0.4552, 0.5448],
        [0.4477, 0.5523],
        [0.4380, 0.5620],
        [0.5373, 0.4627]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5016, 0.4984],
        [0.4891, 0.5109],
        [0.3902, 0.6098],
        [0.5293, 0.4707],
        [0.4828, 0.5172],
        [0.5254, 0.4746],
        [0.3907, 0.6093],
        [0.4792, 0.5208],
        [0.4801, 0.5199],
        [0.4702, 0.5298],
        [0.4664, 0.5336],
        [0.5131, 0.4869]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4406, 0.5594],
        [0.5045, 0.4955],
        [0.4806, 0.5194],
        [0.4200, 0.5800],
        [0.4098, 0.5902],
        [0.5420, 0.4580],
        [0.3726, 0.6274],
        [0.5224, 0.4776],
        [0.4349, 0.5651],
        [0.3725, 0.6275],
        [0.3757, 0.6243],
        [0.4963, 0.5037]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4646, 0.5354],
        [0.4153, 0.5847],
        [0.4733, 0.5267],
        [0.5222, 0.4778],
        [0.4664, 0.5336],
        [0.5092, 0.4908],
        [0.4918, 0.5082],
        [0.4023, 0.5977],
        [0.4781, 0.5219],
        [0.4842, 0.5158],
        [0.4447, 0.5553],
        [0.5229, 0.4771]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4194, 0.5806],
        [0.4786, 0.5214],
        [0.4304, 0.5696],
        [0.4569, 0.5431],
        [0.4362, 0.5638],
        [0.4521, 0.5479],
        [0.5143, 0.4857],
        [0.5264, 0.4736],
        [0.4532, 0.5468],
        [0.4725, 0.5275],
        [0.5025, 0.4975],
        [0.4701, 0.5299]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4742, 0.5258],
        [0.4700, 0.5300],
        [0.5285, 0.4715],
        [0.4488, 0.5512],
        [0.4377, 0.5623],
        [0.4591, 0.5409],
        [0.3985, 0.6015],
        [0.4638, 0.5362],
        [0.4763, 0.5237],
        [0.4483, 0.5517],
        [0.4766, 0.5234],
        [0.4640, 0.5360]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4282, 0.5718],
        [0.4978, 0.5022],
        [0.4682, 0.5318],
        [0.4347, 0.5653],
        [0.4918, 0.5082],
        [0.5057, 0.4943],
        [0.4351, 0.5649],
        [0.4584, 0.5416],
        [0.4695, 0.5305],
        [0.4523, 0.5477],
        [0.5135, 0.4865],
        [0.5866, 0.4134]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4977, 0.5023],
        [0.4257, 0.5743],
        [0.4873, 0.5127],
        [0.4387, 0.5613],
        [0.4677, 0.5323],
        [0.4961, 0.5039],
        [0.4655, 0.5345],
        [0.4597, 0.5403],
        [0.4776, 0.5224],
        [0.4881, 0.5119],
        [0.4448, 0.5552],
        [0.3759, 0.6241]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4402, 0.5598],
        [0.5111, 0.4889],
        [0.5062, 0.4938],
        [0.5008, 0.4992],
        [0.5109, 0.4891],
        [0.4904, 0.5096],
        [0.4301, 0.5699],
        [0.4960, 0.5040],
        [0.5305, 0.4695],
        [0.5338, 0.4662],
        [0.4774, 0.5226],
        [0.5033, 0.4967]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4687, 0.5313],
        [0.5436, 0.4564],
        [0.4633, 0.5367],
        [0.3879, 0.6121],
        [0.4706, 0.5294],
        [0.5483, 0.4517],
        [0.4374, 0.5626],
        [0.4939, 0.5061],
        [0.4934, 0.5066],
        [0.4996, 0.5004],
        [0.3886, 0.6114],
        [0.5235, 0.4765]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4274, 0.5726],
        [0.4731, 0.5269],
        [0.4365, 0.5635],
        [0.4227, 0.5773],
        [0.4961, 0.5039],
        [0.4246, 0.5754],
        [0.4152, 0.5848],
        [0.5091, 0.4909],
        [0.4150, 0.5850],
        [0.4367, 0.5633],
        [0.5326, 0.4674],
        [0.5199, 0.4801]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4623, 0.5377],
        [0.4910, 0.5090],
        [0.4651, 0.5349],
        [0.4655, 0.5345],
        [0.4626, 0.5374],
        [0.4265, 0.5735],
        [0.4122, 0.5878],
        [0.4126, 0.5874],
        [0.4680, 0.5320],
        [0.4610, 0.5390],
        [0.4515, 0.5485],
        [0.4883, 0.5117]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4419, 0.5581],
        [0.5364, 0.4636],
        [0.4667, 0.5333],
        [0.4272, 0.5728],
        [0.4670, 0.5330],
        [0.5698, 0.4302],
        [0.4061, 0.5939],
        [0.4803, 0.5197],
        [0.4540, 0.5460],
        [0.4753, 0.5247],
        [0.4854, 0.5146],
        [0.4968, 0.5032]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5036, 0.4964],
        [0.4718, 0.5282],
        [0.4561, 0.5439],
        [0.5057, 0.4943],
        [0.5108, 0.4892],
        [0.5175, 0.4825],
        [0.4240, 0.5760],
        [0.4787, 0.5213],
        [0.4651, 0.5349],
        [0.3831, 0.6169],
        [0.4671, 0.5329],
        [0.4259, 0.5741]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4132, 0.5868],
        [0.4803, 0.5197],
        [0.4962, 0.5038],
        [0.5085, 0.4915],
        [0.5158, 0.4842],
        [0.4587, 0.5413],
        [0.4758, 0.5242],
        [0.4175, 0.5825],
        [0.4133, 0.5867],
        [0.4419, 0.5581],
        [0.4479, 0.5521],
        [0.4803, 0.5197]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4296, 0.5704],
        [0.4250, 0.5750],
        [0.4473, 0.5527],
        [0.4204, 0.5796],
        [0.4751, 0.5249],
        [0.4920, 0.5080],
        [0.5043, 0.4957],
        [0.4872, 0.5128],
        [0.4906, 0.5094],
        [0.4613, 0.5387],
        [0.4631, 0.5369],
        [0.4637, 0.5363]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4871, 0.5129],
        [0.4371, 0.5629],
        [0.5294, 0.4706],
        [0.4981, 0.5019],
        [0.4158, 0.5842],
        [0.5139, 0.4861],
        [0.4359, 0.5641],
        [0.5051, 0.4949],
        [0.4490, 0.5510],
        [0.5204, 0.4796],
        [0.4220, 0.5780],
        [0.4987, 0.5013]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4299, 0.5701],
        [0.4578, 0.5422],
        [0.5413, 0.4587],
        [0.4492, 0.5508],
        [0.4716, 0.5284],
        [0.4861, 0.5139],
        [0.4239, 0.5761],
        [0.4644, 0.5356],
        [0.4760, 0.5240],
        [0.5077, 0.4923],
        [0.4765, 0.5235],
        [0.5630, 0.4370]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4890, 0.5110],
        [0.3971, 0.6029],
        [0.4370, 0.5630],
        [0.4990, 0.5010],
        [0.4552, 0.5448],
        [0.4531, 0.5469],
        [0.4303, 0.5697],
        [0.4916, 0.5084],
        [0.5045, 0.4955],
        [0.4632, 0.5368],
        [0.4058, 0.5942],
        [0.3877, 0.6123]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4193, 0.5807],
        [0.5146, 0.4854],
        [0.5227, 0.4773],
        [0.5244, 0.4756],
        [0.5019, 0.4981],
        [0.5205, 0.4795],
        [0.4688, 0.5312],
        [0.5104, 0.4896],
        [0.4836, 0.5164],
        [0.4928, 0.5072],
        [0.4530, 0.5470],
        [0.5058, 0.4942]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4551, 0.5449],
        [0.4564, 0.5436],
        [0.4812, 0.5188],
        [0.4052, 0.5948],
        [0.5300, 0.4700],
        [0.5129, 0.4871],
        [0.4663, 0.5337],
        [0.4493, 0.5507],
        [0.4965, 0.5035],
        [0.5391, 0.4609],
        [0.4259, 0.5741],
        [0.5243, 0.4757]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4841, 0.5159],
        [0.5264, 0.4736],
        [0.4613, 0.5387],
        [0.4499, 0.5501],
        [0.4952, 0.5048],
        [0.4414, 0.5586],
        [0.4430, 0.5570],
        [0.5504, 0.4496],
        [0.4602, 0.5398],
        [0.4198, 0.5802],
        [0.4185, 0.5815],
        [0.4641, 0.5359]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4535, 0.5465],
        [0.5037, 0.4963],
        [0.4818, 0.5182],
        [0.4184, 0.5816],
        [0.4606, 0.5394],
        [0.4025, 0.5975],
        [0.4146, 0.5854],
        [0.4403, 0.5597],
        [0.4234, 0.5766],
        [0.5065, 0.4935],
        [0.4052, 0.5948],
        [0.5032, 0.4968]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4876, 0.5124],
        [0.4092, 0.5908],
        [0.4120, 0.5880],
        [0.4201, 0.5799],
        [0.4701, 0.5299],
        [0.5152, 0.4848],
        [0.3691, 0.6309],
        [0.3738, 0.6262],
        [0.4383, 0.5617],
        [0.4617, 0.5383],
        [0.4262, 0.5738],
        [0.4371, 0.5629]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4557, 0.5443],
        [0.4286, 0.5714],
        [0.4693, 0.5307],
        [0.4295, 0.5705],
        [0.4702, 0.5298],
        [0.4806, 0.5194],
        [0.4058, 0.5942],
        [0.4970, 0.5030],
        [0.4504, 0.5496],
        [0.4274, 0.5726],
        [0.4235, 0.5765],
        [0.4660, 0.5340]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4534, 0.5466],
        [0.4592, 0.5408],
        [0.4851, 0.5149],
        [0.5149, 0.4851],
        [0.4952, 0.5048],
        [0.5013, 0.4987],
        [0.4431, 0.5569],
        [0.3609, 0.6391],
        [0.4154, 0.5846],
        [0.4303, 0.5697],
        [0.4921, 0.5079],
        [0.4752, 0.5248]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4745, 0.5255],
        [0.4211, 0.5789],
        [0.3995, 0.6005],
        [0.3540, 0.6460],
        [0.4737, 0.5263],
        [0.4392, 0.5608],
        [0.4885, 0.5115],
        [0.5080, 0.4920],
        [0.4665, 0.5335],
        [0.4913, 0.5087],
        [0.4814, 0.5186],
        [0.4513, 0.5487]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4809, 0.5191],
        [0.3966, 0.6034],
        [0.4886, 0.5114],
        [0.4432, 0.5568],
        [0.4007, 0.5993],
        [0.4186, 0.5814],
        [0.4342, 0.5658],
        [0.4993, 0.5007],
        [0.4741, 0.5259],
        [0.4342, 0.5658],
        [0.4009, 0.5991],
        [0.4463, 0.5537]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4688, 0.5312],
        [0.5012, 0.4988],
        [0.4870, 0.5130],
        [0.4128, 0.5872],
        [0.4632, 0.5368],
        [0.4757, 0.5243],
        [0.4424, 0.5576],
        [0.4733, 0.5267],
        [0.4688, 0.5312],
        [0.4640, 0.5360],
        [0.5088, 0.4912],
        [0.5465, 0.4535]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4498, 0.5502],
        [0.3691, 0.6309],
        [0.4203, 0.5797],
        [0.4772, 0.5228],
        [0.5448, 0.4552],
        [0.4843, 0.5157],
        [0.4268, 0.5732],
        [0.4669, 0.5331],
        [0.4741, 0.5259],
        [0.4571, 0.5429],
        [0.4591, 0.5409],
        [0.4516, 0.5484]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4309, 0.5691],
        [0.4649, 0.5351],
        [0.4706, 0.5294],
        [0.4910, 0.5090],
        [0.4724, 0.5276],
        [0.4817, 0.5183],
        [0.3807, 0.6193],
        [0.4764, 0.5236],
        [0.4657, 0.5343],
        [0.5251, 0.4749],
        [0.4477, 0.5523],
        [0.5330, 0.4670]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4109, 0.5891],
        [0.4589, 0.5411],
        [0.4512, 0.5488],
        [0.3974, 0.6026],
        [0.4692, 0.5308],
        [0.5927, 0.4073],
        [0.4607, 0.5393],
        [0.5229, 0.4771],
        [0.4612, 0.5388],
        [0.5362, 0.4638],
        [0.3587, 0.6413],
        [0.4124, 0.5876]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4577, 0.5423],
        [0.5070, 0.4930],
        [0.4709, 0.5291],
        [0.4815, 0.5185],
        [0.5321, 0.4679],
        [0.4306, 0.5694],
        [0.4374, 0.5626],
        [0.5484, 0.4516],
        [0.4422, 0.5578],
        [0.4967, 0.5033],
        [0.4276, 0.5724],
        [0.5244, 0.4756]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4541, 0.5459],
        [0.4998, 0.5002],
        [0.4944, 0.5056],
        [0.4267, 0.5733],
        [0.4435, 0.5565],
        [0.4619, 0.5381],
        [0.4763, 0.5237],
        [0.4843, 0.5157],
        [0.4435, 0.5565],
        [0.4408, 0.5592],
        [0.4901, 0.5099],
        [0.5338, 0.4662]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4397, 0.5603],
        [0.4360, 0.5640],
        [0.3744, 0.6256],
        [0.4184, 0.5816],
        [0.4840, 0.5160],
        [0.5048, 0.4952],
        [0.3106, 0.6894],
        [0.4443, 0.5557],
        [0.4664, 0.5336],
        [0.4295, 0.5705],
        [0.4057, 0.5943],
        [0.4864, 0.5136]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5007, 0.4993],
        [0.4282, 0.5718],
        [0.4114, 0.5886],
        [0.4691, 0.5309],
        [0.4283, 0.5717],
        [0.5072, 0.4928],
        [0.4805, 0.5195],
        [0.5012, 0.4988],
        [0.4763, 0.5237],
        [0.3779, 0.6221],
        [0.4172, 0.5828],
        [0.5091, 0.4909]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4512, 0.5488],
        [0.4339, 0.5661],
        [0.4633, 0.5367],
        [0.4859, 0.5141],
        [0.5064, 0.4936],
        [0.5110, 0.4890],
        [0.4325, 0.5675],
        [0.3818, 0.6182],
        [0.4694, 0.5306],
        [0.4220, 0.5780],
        [0.4695, 0.5305],
        [0.4899, 0.5101]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4549, 0.5451],
        [0.4720, 0.5280],
        [0.5093, 0.4907],
        [0.3896, 0.6104],
        [0.5312, 0.4688],
        [0.5130, 0.4870],
        [0.5622, 0.4378],
        [0.4731, 0.5269],
        [0.5030, 0.4970],
        [0.3933, 0.6067],
        [0.5039, 0.4961],
        [0.4471, 0.5529]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4904, 0.5096],
        [0.4361, 0.5639],
        [0.5556, 0.4444],
        [0.4676, 0.5324],
        [0.3896, 0.6104],
        [0.4924, 0.5076],
        [0.4238, 0.5762],
        [0.5116, 0.4884],
        [0.5152, 0.4848],
        [0.4421, 0.5579],
        [0.4687, 0.5313],
        [0.4774, 0.5226]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4302, 0.5698],
        [0.4582, 0.5418],
        [0.5365, 0.4635],
        [0.4761, 0.5239],
        [0.4079, 0.5921],
        [0.4535, 0.5465],
        [0.3970, 0.6030],
        [0.4327, 0.5673],
        [0.4884, 0.5116],
        [0.4842, 0.5158],
        [0.4833, 0.5167],
        [0.5788, 0.4212]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4831, 0.5169],
        [0.4421, 0.5579],
        [0.4340, 0.5660],
        [0.5182, 0.4818],
        [0.5253, 0.4747],
        [0.5279, 0.4721],
        [0.4719, 0.5281],
        [0.4424, 0.5576],
        [0.4262, 0.5738],
        [0.4647, 0.5353],
        [0.4787, 0.5213],
        [0.4515, 0.5485]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4635, 0.5365],
        [0.4338, 0.5662],
        [0.4705, 0.5295],
        [0.4725, 0.5275],
        [0.4950, 0.5050],
        [0.5058, 0.4942],
        [0.4172, 0.5828],
        [0.5645, 0.4355],
        [0.4947, 0.5053],
        [0.5304, 0.4696],
        [0.4676, 0.5324],
        [0.4240, 0.5760]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4421, 0.5579],
        [0.4402, 0.5598],
        [0.4894, 0.5106],
        [0.3885, 0.6115],
        [0.3942, 0.6058],
        [0.5841, 0.4159],
        [0.3834, 0.6166],
        [0.4700, 0.5300],
        [0.4763, 0.5237],
        [0.4327, 0.5673],
        [0.3659, 0.6341],
        [0.5006, 0.4994]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5150, 0.4850],
        [0.4958, 0.5042],
        [0.4307, 0.5693],
        [0.4466, 0.5534],
        [0.4722, 0.5278],
        [0.3866, 0.6134],
        [0.4265, 0.5735],
        [0.4804, 0.5196],
        [0.4535, 0.5465],
        [0.4667, 0.5333],
        [0.4457, 0.5543],
        [0.4865, 0.5135]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4073, 0.5927],
        [0.4716, 0.5284],
        [0.4442, 0.5558],
        [0.4658, 0.5342],
        [0.4346, 0.5654],
        [0.4129, 0.5871],
        [0.4543, 0.5457],
        [0.4304, 0.5696],
        [0.4264, 0.5736],
        [0.4663, 0.5337],
        [0.3657, 0.6343],
        [0.5336, 0.4664]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4131, 0.5869],
        [0.4111, 0.5889],
        [0.3746, 0.6254],
        [0.4472, 0.5528],
        [0.4606, 0.5394],
        [0.5261, 0.4739],
        [0.3907, 0.6093],
        [0.3978, 0.6022],
        [0.4555, 0.5445],
        [0.4008, 0.5992],
        [0.4293, 0.5707],
        [0.4573, 0.5427]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4547, 0.5453],
        [0.4198, 0.5802],
        [0.4561, 0.5439],
        [0.4006, 0.5994],
        [0.4476, 0.5524],
        [0.5286, 0.4714],
        [0.4393, 0.5607],
        [0.4956, 0.5044],
        [0.3583, 0.6417],
        [0.3912, 0.6088],
        [0.4355, 0.5645],
        [0.3874, 0.6126]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4510, 0.5490],
        [0.5131, 0.4869],
        [0.4605, 0.5395],
        [0.4945, 0.5055],
        [0.4317, 0.5683],
        [0.4867, 0.5133],
        [0.4864, 0.5136],
        [0.4044, 0.5956],
        [0.4316, 0.5684],
        [0.4322, 0.5678],
        [0.4634, 0.5366],
        [0.4480, 0.5520]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4151, 0.5849],
        [0.4469, 0.5531],
        [0.5108, 0.4892],
        [0.3975, 0.6025],
        [0.5190, 0.4810],
        [0.5376, 0.4624],
        [0.4950, 0.5050],
        [0.4650, 0.5350],
        [0.5123, 0.4877],
        [0.4775, 0.5225],
        [0.4026, 0.5974],
        [0.4003, 0.5997]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4766, 0.5234],
        [0.4380, 0.5620],
        [0.5057, 0.4943],
        [0.4599, 0.5401],
        [0.4153, 0.5847],
        [0.4517, 0.5483],
        [0.4109, 0.5891],
        [0.4712, 0.5288],
        [0.5225, 0.4775],
        [0.4517, 0.5483],
        [0.4301, 0.5699],
        [0.4254, 0.5746]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4747, 0.5253],
        [0.5099, 0.4901],
        [0.4843, 0.5157],
        [0.4637, 0.5363],
        [0.3695, 0.6305],
        [0.4453, 0.5547],
        [0.4273, 0.5727],
        [0.3985, 0.6015],
        [0.4532, 0.5468],
        [0.4452, 0.5548],
        [0.4281, 0.5719],
        [0.5314, 0.4686]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4368, 0.5632],
        [0.4111, 0.5889],
        [0.4020, 0.5980],
        [0.4424, 0.5576],
        [0.4975, 0.5025],
        [0.4584, 0.5416],
        [0.4350, 0.5650],
        [0.4817, 0.5183],
        [0.5008, 0.4992],
        [0.4581, 0.5419],
        [0.4109, 0.5891],
        [0.4292, 0.5708]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4835, 0.5165],
        [0.4141, 0.5859],
        [0.5057, 0.4943],
        [0.4833, 0.5167],
        [0.4504, 0.5496],
        [0.4480, 0.5520],
        [0.3336, 0.6664],
        [0.4088, 0.5912],
        [0.4795, 0.5205],
        [0.5268, 0.4732],
        [0.4508, 0.5492],
        [0.4954, 0.5046]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4415, 0.5585],
        [0.4558, 0.5442],
        [0.5101, 0.4899],
        [0.3045, 0.6955],
        [0.4484, 0.5516],
        [0.4749, 0.5251],
        [0.4417, 0.5583],
        [0.5063, 0.4937],
        [0.4465, 0.5535],
        [0.5245, 0.4755],
        [0.3550, 0.6450],
        [0.4500, 0.5500]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4742, 0.5258],
        [0.4437, 0.5563],
        [0.3913, 0.6087],
        [0.5138, 0.4862],
        [0.4698, 0.5302],
        [0.4323, 0.5677],
        [0.4832, 0.5168],
        [0.4754, 0.5246],
        [0.4299, 0.5701],
        [0.4799, 0.5201],
        [0.4534, 0.5466],
        [0.4967, 0.5033]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4489, 0.5511],
        [0.4966, 0.5034],
        [0.4298, 0.5702],
        [0.4126, 0.5874],
        [0.4437, 0.5563],
        [0.4380, 0.5620],
        [0.3753, 0.6247],
        [0.3950, 0.6050],
        [0.4497, 0.5503],
        [0.4616, 0.5384],
        [0.4226, 0.5774],
        [0.5079, 0.4921]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4133, 0.5867],
        [0.3942, 0.6058],
        [0.4266, 0.5734],
        [0.4070, 0.5930],
        [0.5068, 0.4932],
        [0.5107, 0.4893],
        [0.3350, 0.6650],
        [0.4579, 0.5421],
        [0.4781, 0.5219],
        [0.4210, 0.5790],
        [0.4174, 0.5826],
        [0.4980, 0.5020]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5280, 0.4720],
        [0.3964, 0.6036],
        [0.4964, 0.5036],
        [0.4949, 0.5051],
        [0.5012, 0.4988],
        [0.5009, 0.4991],
        [0.3783, 0.6217],
        [0.5034, 0.4966],
        [0.4209, 0.5791],
        [0.4101, 0.5899],
        [0.4733, 0.5267],
        [0.4955, 0.5045]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5015, 0.4985],
        [0.5157, 0.4843],
        [0.5426, 0.4574],
        [0.5152, 0.4848],
        [0.5093, 0.4907],
        [0.5821, 0.4179],
        [0.4154, 0.5846],
        [0.4263, 0.5737],
        [0.4711, 0.5289],
        [0.4188, 0.5812],
        [0.4546, 0.5454],
        [0.4496, 0.5504]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4622, 0.5378],
        [0.4420, 0.5580],
        [0.5170, 0.4830],
        [0.4249, 0.5751],
        [0.5036, 0.4964],
        [0.5235, 0.4765],
        [0.5172, 0.4828],
        [0.4774, 0.5226],
        [0.4734, 0.5266],
        [0.5592, 0.4408],
        [0.4459, 0.5541],
        [0.3920, 0.6080]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4404, 0.5596],
        [0.4528, 0.5472],
        [0.5257, 0.4743],
        [0.4002, 0.5998],
        [0.4478, 0.5522],
        [0.4495, 0.5505],
        [0.4413, 0.5587],
        [0.4645, 0.5355],
        [0.5332, 0.4668],
        [0.4659, 0.5341],
        [0.4628, 0.5372],
        [0.5203, 0.4797]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5109, 0.4891],
        [0.5094, 0.4906],
        [0.5381, 0.4619],
        [0.4953, 0.5047],
        [0.4754, 0.5246],
        [0.4519, 0.5481],
        [0.4794, 0.5206],
        [0.4437, 0.5563],
        [0.4522, 0.5478],
        [0.5466, 0.4534],
        [0.5127, 0.4873],
        [0.6230, 0.3770]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5148, 0.4852],
        [0.4880, 0.5120],
        [0.4632, 0.5368],
        [0.4343, 0.5657],
        [0.4730, 0.5270],
        [0.4402, 0.5598],
        [0.4423, 0.5577],
        [0.4493, 0.5507],
        [0.5717, 0.4283],
        [0.4465, 0.5535],
        [0.4286, 0.5714],
        [0.4761, 0.5239]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4245, 0.5755],
        [0.4555, 0.5445],
        [0.4696, 0.5304],
        [0.4747, 0.5253],
        [0.3983, 0.6017],
        [0.5037, 0.4963],
        [0.3954, 0.6046],
        [0.4560, 0.5440],
        [0.5107, 0.4893],
        [0.5801, 0.4199],
        [0.4881, 0.5119],
        [0.4825, 0.5175]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4670, 0.5330],
        [0.5230, 0.4770],
        [0.5093, 0.4907],
        [0.4297, 0.5703],
        [0.4706, 0.5294],
        [0.5672, 0.4328],
        [0.4461, 0.5539],
        [0.5628, 0.4372],
        [0.5359, 0.4641],
        [0.5270, 0.4730],
        [0.4193, 0.5807],
        [0.5145, 0.4855]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4751, 0.5249],
        [0.4829, 0.5171],
        [0.4728, 0.5272],
        [0.5267, 0.4733],
        [0.5191, 0.4809],
        [0.4338, 0.5662],
        [0.3933, 0.6067],
        [0.5073, 0.4927],
        [0.4407, 0.5593],
        [0.4803, 0.5197],
        [0.4562, 0.5438],
        [0.4514, 0.5486]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5252, 0.4748],
        [0.5366, 0.4634],
        [0.4815, 0.5185],
        [0.5526, 0.4474],
        [0.4797, 0.5203],
        [0.4278, 0.5722],
        [0.4637, 0.5363],
        [0.4457, 0.5543],
        [0.4952, 0.5048],
        [0.4746, 0.5254],
        [0.5003, 0.4997],
        [0.5728, 0.4272]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4588, 0.5412],
        [0.5042, 0.4958],
        [0.4055, 0.5945],
        [0.4626, 0.5374],
        [0.4887, 0.5113],
        [0.5373, 0.4627],
        [0.4083, 0.5917],
        [0.4681, 0.5319],
        [0.5090, 0.4910],
        [0.4690, 0.5310],
        [0.4671, 0.5329],
        [0.4544, 0.5456]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4988, 0.5012],
        [0.4563, 0.5437],
        [0.4943, 0.5057],
        [0.4761, 0.5239],
        [0.4842, 0.5158],
        [0.5207, 0.4793],
        [0.4454, 0.5546],
        [0.4736, 0.5264],
        [0.4859, 0.5141],
        [0.4540, 0.5460],
        [0.4903, 0.5097],
        [0.4633, 0.5367]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4716, 0.5284],
        [0.4534, 0.5466],
        [0.4880, 0.5120],
        [0.4642, 0.5358],
        [0.5292, 0.4708],
        [0.5318, 0.4682],
        [0.4926, 0.5074],
        [0.4942, 0.5058],
        [0.3978, 0.6022],
        [0.5018, 0.4982],
        [0.5098, 0.4902],
        [0.3888, 0.6112]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4214, 0.5786],
        [0.4485, 0.5515],
        [0.4860, 0.5140],
        [0.3825, 0.6175],
        [0.5094, 0.4906],
        [0.4950, 0.5050],
        [0.5068, 0.4932],
        [0.4373, 0.5627],
        [0.4235, 0.5765],
        [0.4638, 0.5362],
        [0.4362, 0.5638],
        [0.3890, 0.6110]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4941, 0.5059],
        [0.4465, 0.5535],
        [0.5080, 0.4920],
        [0.4367, 0.5633],
        [0.4319, 0.5681],
        [0.5231, 0.4769],
        [0.4874, 0.5126],
        [0.4795, 0.5205],
        [0.5030, 0.4970],
        [0.4435, 0.5565],
        [0.4192, 0.5808],
        [0.5014, 0.4986]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4594, 0.5406],
        [0.4633, 0.5367],
        [0.4705, 0.5295],
        [0.5116, 0.4884],
        [0.4304, 0.5696],
        [0.4797, 0.5203],
        [0.4605, 0.5395],
        [0.4303, 0.5697],
        [0.4342, 0.5658],
        [0.4895, 0.5105],
        [0.5555, 0.4445],
        [0.5617, 0.4383]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5209, 0.4791],
        [0.3953, 0.6047],
        [0.4920, 0.5080],
        [0.4775, 0.5225],
        [0.5490, 0.4510],
        [0.5335, 0.4665],
        [0.4317, 0.5683],
        [0.4615, 0.5385],
        [0.5477, 0.4523],
        [0.4917, 0.5083],
        [0.4747, 0.5253],
        [0.4598, 0.5402]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4002, 0.5998],
        [0.4128, 0.5872],
        [0.4961, 0.5039],
        [0.5533, 0.4467],
        [0.4156, 0.5844],
        [0.4374, 0.5626],
        [0.4503, 0.5497],
        [0.5186, 0.4814],
        [0.5563, 0.4437],
        [0.5423, 0.4577],
        [0.4905, 0.5095],
        [0.4628, 0.5372]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4491, 0.5509],
        [0.4281, 0.5719],
        [0.5125, 0.4875],
        [0.3864, 0.6136],
        [0.4363, 0.5637],
        [0.5652, 0.4348],
        [0.4202, 0.5798],
        [0.4151, 0.5849],
        [0.4624, 0.5376],
        [0.4471, 0.5529],
        [0.3745, 0.6255],
        [0.4427, 0.5573]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5094, 0.4906],
        [0.4614, 0.5386],
        [0.4311, 0.5689],
        [0.4243, 0.5757],
        [0.4543, 0.5457],
        [0.4273, 0.5727],
        [0.3303, 0.6697],
        [0.5718, 0.4282],
        [0.4390, 0.5610],
        [0.4546, 0.5454],
        [0.4269, 0.5731],
        [0.4714, 0.5286]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4738, 0.5262],
        [0.5265, 0.4735],
        [0.5194, 0.4806],
        [0.4480, 0.5520],
        [0.4999, 0.5001],
        [0.4074, 0.5926],
        [0.4867, 0.5133],
        [0.3855, 0.6145],
        [0.4924, 0.5076],
        [0.4899, 0.5101],
        [0.5485, 0.4515],
        [0.5152, 0.4848]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4268, 0.5732],
        [0.4333, 0.5667],
        [0.4145, 0.5855],
        [0.4817, 0.5183],
        [0.5039, 0.4961],
        [0.4480, 0.5520],
        [0.3959, 0.6041],
        [0.4552, 0.5448],
        [0.4730, 0.5270],
        [0.5189, 0.4811],
        [0.4473, 0.5527],
        [0.4668, 0.5332]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4707, 0.5293],
        [0.3751, 0.6249],
        [0.4613, 0.5387],
        [0.4938, 0.5062],
        [0.3714, 0.6286],
        [0.5252, 0.4748],
        [0.3975, 0.6025],
        [0.4436, 0.5564],
        [0.4552, 0.5448],
        [0.4079, 0.5921],
        [0.5088, 0.4912],
        [0.4577, 0.5423]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4297, 0.5703],
        [0.5157, 0.4843],
        [0.5129, 0.4871],
        [0.5182, 0.4818],
        [0.4639, 0.5361],
        [0.5099, 0.4901],
        [0.4488, 0.5512],
        [0.3944, 0.6056],
        [0.4342, 0.5658],
        [0.4649, 0.5351],
        [0.4969, 0.5031],
        [0.4343, 0.5657]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5282, 0.4718],
        [0.4369, 0.5631],
        [0.5016, 0.4984],
        [0.3405, 0.6595],
        [0.4437, 0.5563],
        [0.5184, 0.4816],
        [0.5426, 0.4574],
        [0.4909, 0.5091],
        [0.4790, 0.5210],
        [0.5237, 0.4763],
        [0.4439, 0.5561],
        [0.3714, 0.6286]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4390, 0.5610],
        [0.4425, 0.5575],
        [0.5378, 0.4622],
        [0.4240, 0.5760],
        [0.4198, 0.5802],
        [0.4678, 0.5322],
        [0.3971, 0.6029],
        [0.5323, 0.4677],
        [0.4882, 0.5118],
        [0.4381, 0.5619],
        [0.3968, 0.6032],
        [0.4890, 0.5110]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4859, 0.5141],
        [0.4816, 0.5184],
        [0.5083, 0.4917],
        [0.4262, 0.5738],
        [0.4065, 0.5935],
        [0.4946, 0.5054],
        [0.4091, 0.5909],
        [0.3829, 0.6171],
        [0.4257, 0.5743],
        [0.4984, 0.5016],
        [0.5188, 0.4812],
        [0.5569, 0.4431]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4704, 0.5296],
        [0.4127, 0.5873],
        [0.3709, 0.6291],
        [0.4344, 0.5656],
        [0.5292, 0.4708],
        [0.4887, 0.5113],
        [0.4387, 0.5613],
        [0.4852, 0.5148],
        [0.5279, 0.4721],
        [0.4341, 0.5659],
        [0.4263, 0.5737],
        [0.4130, 0.5870]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4361, 0.5639],
        [0.4290, 0.5710],
        [0.4563, 0.5437],
        [0.5009, 0.4991],
        [0.4545, 0.5455],
        [0.4712, 0.5288],
        [0.3470, 0.6530],
        [0.4740, 0.5260],
        [0.4597, 0.5403],
        [0.5049, 0.4951],
        [0.4947, 0.5053],
        [0.5010, 0.4990]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4101, 0.5899],
        [0.4430, 0.5570],
        [0.5161, 0.4839],
        [0.3897, 0.6103],
        [0.4455, 0.5545],
        [0.6164, 0.3836],
        [0.4536, 0.5464],
        [0.5011, 0.4989],
        [0.5465, 0.4535],
        [0.5250, 0.4750],
        [0.3937, 0.6063],
        [0.4681, 0.5319]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5272, 0.4728],
        [0.5147, 0.4853],
        [0.4486, 0.5514],
        [0.4944, 0.5056],
        [0.5084, 0.4916],
        [0.3533, 0.6467],
        [0.4463, 0.5537],
        [0.5185, 0.4815],
        [0.4287, 0.5713],
        [0.4862, 0.5138],
        [0.3775, 0.6225],
        [0.4781, 0.5219]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4986, 0.5014],
        [0.5190, 0.4810],
        [0.5121, 0.4879],
        [0.4970, 0.5030],
        [0.4802, 0.5198],
        [0.4312, 0.5688],
        [0.3916, 0.6084],
        [0.4610, 0.5390],
        [0.4491, 0.5509],
        [0.5028, 0.4972],
        [0.5049, 0.4951],
        [0.5361, 0.4639]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.5119, 0.4881],
        [0.4181, 0.5819],
        [0.3991, 0.6009],
        [0.4259, 0.5741],
        [0.4386, 0.5614],
        [0.5301, 0.4699],
        [0.4160, 0.5840],
        [0.4552, 0.5448],
        [0.4632, 0.5368],
        [0.5106, 0.4894],
        [0.4016, 0.5984],
        [0.5186, 0.4814]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4487, 0.5513],
        [0.4214, 0.5786],
        [0.4393, 0.5607],
        [0.4050, 0.5950],
        [0.5034, 0.4966],
        [0.5484, 0.4516],
        [0.4035, 0.5965],
        [0.4668, 0.5332],
        [0.4789, 0.5211],
        [0.4493, 0.5507],
        [0.4963, 0.5037],
        [0.4204, 0.5796]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4183, 0.5817],
        [0.4681, 0.5319],
        [0.4458, 0.5542],
        [0.4237, 0.5763],
        [0.4430, 0.5570],
        [0.4840, 0.5160],
        [0.3939, 0.6061],
        [0.3971, 0.6029],
        [0.4096, 0.5904],
        [0.3933, 0.6067],
        [0.4351, 0.5649],
        [0.4621, 0.5379]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5012, 0.4988],
        [0.3878, 0.6122],
        [0.5033, 0.4967],
        [0.3806, 0.6194],
        [0.5104, 0.4896],
        [0.5316, 0.4684],
        [0.5072, 0.4928],
        [0.4188, 0.5812],
        [0.4997, 0.5003],
        [0.4806, 0.5194],
        [0.4622, 0.5378],
        [0.4172, 0.5828]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4788, 0.5212],
        [0.4769, 0.5231],
        [0.4723, 0.5277],
        [0.4742, 0.5258],
        [0.4139, 0.5861],
        [0.4151, 0.5849],
        [0.4495, 0.5505],
        [0.5060, 0.4940],
        [0.4838, 0.5162],
        [0.4498, 0.5502],
        [0.4062, 0.5938],
        [0.4756, 0.5244]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4688, 0.5312],
        [0.4923, 0.5077],
        [0.5087, 0.4913],
        [0.4915, 0.5085],
        [0.3874, 0.6126],
        [0.4140, 0.5860],
        [0.4324, 0.5676],
        [0.3640, 0.6360],
        [0.4475, 0.5525],
        [0.4645, 0.5355],
        [0.4729, 0.5271],
        [0.5590, 0.4410]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5102, 0.4898],
        [0.4623, 0.5377],
        [0.4068, 0.5932],
        [0.4485, 0.5515],
        [0.5480, 0.4520],
        [0.4699, 0.5301],
        [0.4019, 0.5981],
        [0.4509, 0.5491],
        [0.4820, 0.5180],
        [0.4652, 0.5348],
        [0.4908, 0.5092],
        [0.4813, 0.5187]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4573, 0.5427],
        [0.4458, 0.5542],
        [0.5333, 0.4667],
        [0.4368, 0.5632],
        [0.4913, 0.5087],
        [0.4458, 0.5542],
        [0.4275, 0.5725],
        [0.4243, 0.5757],
        [0.4377, 0.5623],
        [0.5653, 0.4347],
        [0.4077, 0.5923],
        [0.5423, 0.4577]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4419, 0.5581],
        [0.5592, 0.4408],
        [0.5332, 0.4668],
        [0.4688, 0.5312],
        [0.4934, 0.5066],
        [0.5719, 0.4281],
        [0.5462, 0.4538],
        [0.4682, 0.5318],
        [0.5298, 0.4702],
        [0.4903, 0.5097],
        [0.4009, 0.5991],
        [0.4919, 0.5081]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4513, 0.5487],
        [0.4765, 0.5235],
        [0.4684, 0.5316],
        [0.4674, 0.5326],
        [0.5326, 0.4674],
        [0.4364, 0.5636],
        [0.4345, 0.5655],
        [0.5546, 0.4454],
        [0.4298, 0.5702],
        [0.4528, 0.5472],
        [0.4588, 0.5412],
        [0.4385, 0.5615]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4374, 0.5626],
        [0.5281, 0.4719],
        [0.5205, 0.4795],
        [0.4759, 0.5241],
        [0.4650, 0.5350],
        [0.4325, 0.5675],
        [0.4262, 0.5738],
        [0.4148, 0.5852],
        [0.5079, 0.4921],
        [0.5146, 0.4854],
        [0.4958, 0.5042],
        [0.5256, 0.4744]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4368, 0.5632],
        [0.4674, 0.5326],
        [0.3935, 0.6065],
        [0.4531, 0.5469],
        [0.5099, 0.4901],
        [0.4795, 0.5205],
        [0.3930, 0.6070],
        [0.4658, 0.5342],
        [0.4587, 0.5413],
        [0.4418, 0.5582],
        [0.4648, 0.5352],
        [0.4978, 0.5022]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5067, 0.4933],
        [0.4141, 0.5859],
        [0.4964, 0.5036],
        [0.4964, 0.5036],
        [0.5137, 0.4863],
        [0.5598, 0.4402],
        [0.4243, 0.5757],
        [0.5893, 0.4107],
        [0.5234, 0.4766],
        [0.4189, 0.5811],
        [0.5189, 0.4811],
        [0.4771, 0.5229]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4904, 0.5096],
        [0.5105, 0.4895],
        [0.5019, 0.4981],
        [0.4215, 0.5785],
        [0.4042, 0.5958],
        [0.5066, 0.4934],
        [0.4619, 0.5381],
        [0.3877, 0.6123],
        [0.4064, 0.5936],
        [0.4773, 0.5227],
        [0.4338, 0.5662],
        [0.4235, 0.5765]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4278, 0.5722],
        [0.4084, 0.5916],
        [0.4410, 0.5590],
        [0.3541, 0.6459],
        [0.4802, 0.5198],
        [0.5103, 0.4897],
        [0.4395, 0.5605],
        [0.5133, 0.4867],
        [0.4099, 0.5901],
        [0.4997, 0.5003],
        [0.3933, 0.6067],
        [0.3388, 0.6612]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4299, 0.5701],
        [0.4015, 0.5985],
        [0.4905, 0.5095],
        [0.4692, 0.5308],
        [0.4499, 0.5501],
        [0.5117, 0.4883],
        [0.3872, 0.6128],
        [0.4952, 0.5048],
        [0.4805, 0.5195],
        [0.4064, 0.5936],
        [0.3858, 0.6142],
        [0.4642, 0.5358]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4601, 0.5399],
        [0.4998, 0.5002],
        [0.5720, 0.4280],
        [0.4939, 0.5061],
        [0.3855, 0.6145],
        [0.4847, 0.5153],
        [0.4589, 0.5411],
        [0.4058, 0.5942],
        [0.4754, 0.5246],
        [0.5093, 0.4907],
        [0.4962, 0.5038],
        [0.5645, 0.4355]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5001, 0.4999],
        [0.4308, 0.5692],
        [0.3861, 0.6139],
        [0.4902, 0.5098],
        [0.5538, 0.4462],
        [0.4800, 0.5200],
        [0.4381, 0.5619],
        [0.4621, 0.5379],
        [0.5041, 0.4959],
        [0.4524, 0.5476],
        [0.4433, 0.5567],
        [0.4605, 0.5395]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4614, 0.5386],
        [0.4939, 0.5061],
        [0.4460, 0.5540],
        [0.5051, 0.4949],
        [0.4282, 0.5718],
        [0.4526, 0.5474],
        [0.4381, 0.5619],
        [0.4856, 0.5144],
        [0.4236, 0.5764],
        [0.5879, 0.4121],
        [0.4523, 0.5477],
        [0.5567, 0.4433]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4010, 0.5990],
        [0.4876, 0.5124],
        [0.5354, 0.4646],
        [0.3965, 0.6035],
        [0.4672, 0.5328],
        [0.5977, 0.4023],
        [0.4589, 0.5411],
        [0.4538, 0.5462],
        [0.4772, 0.5228],
        [0.4897, 0.5103],
        [0.3854, 0.6146],
        [0.4852, 0.5148]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4725, 0.5275],
        [0.4835, 0.5165],
        [0.4170, 0.5830],
        [0.5270, 0.4730],
        [0.5010, 0.4990],
        [0.3860, 0.6140],
        [0.3777, 0.6223],
        [0.4701, 0.5299],
        [0.4305, 0.5695],
        [0.3955, 0.6045],
        [0.3856, 0.6144],
        [0.4528, 0.5472]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4464, 0.5536],
        [0.4582, 0.5418],
        [0.4956, 0.5044],
        [0.4203, 0.5797],
        [0.4369, 0.5631],
        [0.4383, 0.5617],
        [0.3730, 0.6270],
        [0.3894, 0.6106],
        [0.4156, 0.5844],
        [0.5170, 0.4830],
        [0.4630, 0.5370],
        [0.4339, 0.5661]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4636, 0.5364],
        [0.4418, 0.5582],
        [0.3935, 0.6065],
        [0.4616, 0.5384],
        [0.4810, 0.5190],
        [0.4877, 0.5123],
        [0.3693, 0.6307],
        [0.4591, 0.5409],
        [0.4279, 0.5721],
        [0.4088, 0.5912],
        [0.4398, 0.5602],
        [0.4874, 0.5126]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4297, 0.5703],
        [0.4441, 0.5559],
        [0.4017, 0.5983],
        [0.4943, 0.5057],
        [0.5057, 0.4943],
        [0.5109, 0.4891],
        [0.4673, 0.5327],
        [0.5074, 0.4926],
        [0.4991, 0.5009],
        [0.4489, 0.5511],
        [0.5192, 0.4808],
        [0.4571, 0.5429]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5045, 0.4955],
        [0.5173, 0.4827],
        [0.4893, 0.5107],
        [0.5369, 0.4631],
        [0.4693, 0.5307],
        [0.5829, 0.4171],
        [0.4624, 0.5376],
        [0.4186, 0.5814],
        [0.4482, 0.5518],
        [0.5125, 0.4875],
        [0.5150, 0.4850],
        [0.4707, 0.5293]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4756, 0.5244],
        [0.4208, 0.5792],
        [0.4236, 0.5764],
        [0.3868, 0.6132],
        [0.5465, 0.4535],
        [0.4546, 0.5454],
        [0.5269, 0.4731],
        [0.4762, 0.5238],
        [0.4112, 0.5888],
        [0.4417, 0.5583],
        [0.4961, 0.5039],
        [0.3930, 0.6070]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4324, 0.5676],
        [0.4068, 0.5932],
        [0.5090, 0.4910],
        [0.4703, 0.5297],
        [0.3939, 0.6061],
        [0.4366, 0.5634],
        [0.4051, 0.5949],
        [0.4533, 0.5467],
        [0.4537, 0.5463],
        [0.3874, 0.6126],
        [0.4100, 0.5900],
        [0.4784, 0.5216]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4413, 0.5587],
        [0.4532, 0.5468],
        [0.4871, 0.5129],
        [0.4880, 0.5120],
        [0.4133, 0.5867],
        [0.4806, 0.5194],
        [0.3950, 0.6050],
        [0.3935, 0.6065],
        [0.4748, 0.5252],
        [0.4490, 0.5510],
        [0.4911, 0.5089],
        [0.5471, 0.4529]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4286, 0.5714],
        [0.4760, 0.5240],
        [0.4036, 0.5964],
        [0.4677, 0.5323],
        [0.5572, 0.4428],
        [0.4704, 0.5296],
        [0.4264, 0.5736],
        [0.4907, 0.5093],
        [0.4627, 0.5373],
        [0.4396, 0.5604],
        [0.4127, 0.5873],
        [0.4226, 0.5774]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4636, 0.5364],
        [0.4120, 0.5880],
        [0.4436, 0.5564],
        [0.3983, 0.6017],
        [0.4211, 0.5789],
        [0.4590, 0.5410],
        [0.4202, 0.5798],
        [0.4603, 0.5397],
        [0.4859, 0.5141],
        [0.5591, 0.4409],
        [0.4354, 0.5646],
        [0.4699, 0.5301]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3995, 0.6005],
        [0.4317, 0.5683],
        [0.5208, 0.4792],
        [0.4006, 0.5994],
        [0.4785, 0.5215],
        [0.6099, 0.3901],
        [0.4310, 0.5690],
        [0.4651, 0.5349],
        [0.4719, 0.5281],
        [0.5260, 0.4740],
        [0.3721, 0.6279],
        [0.5149, 0.4851]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4739, 0.5261],
        [0.4761, 0.5239],
        [0.4101, 0.5899],
        [0.4507, 0.5493],
        [0.4680, 0.5320],
        [0.3997, 0.6003],
        [0.3538, 0.6462],
        [0.4941, 0.5059],
        [0.4085, 0.5915],
        [0.5000, 0.5000],
        [0.3802, 0.6198],
        [0.3867, 0.6133]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4109, 0.5891],
        [0.4866, 0.5134],
        [0.4492, 0.5508],
        [0.4578, 0.5422],
        [0.4241, 0.5759],
        [0.4043, 0.5957],
        [0.3832, 0.6168],
        [0.5091, 0.4909],
        [0.4687, 0.5313],
        [0.4272, 0.5728],
        [0.4537, 0.5463],
        [0.5297, 0.4703]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4326, 0.5674],
        [0.4491, 0.5509],
        [0.3847, 0.6153],
        [0.4928, 0.5072],
        [0.4457, 0.5543],
        [0.4835, 0.5165],
        [0.3965, 0.6035],
        [0.5019, 0.4981],
        [0.4868, 0.5132],
        [0.4726, 0.5274],
        [0.4079, 0.5921],
        [0.4442, 0.5558]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5127, 0.4873],
        [0.4485, 0.5515],
        [0.4472, 0.5528],
        [0.4122, 0.5878],
        [0.4935, 0.5065],
        [0.5091, 0.4909],
        [0.3510, 0.6490],
        [0.6022, 0.3978],
        [0.4659, 0.5341],
        [0.5037, 0.4963],
        [0.4373, 0.5627],
        [0.4730, 0.5270]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4815, 0.5185],
        [0.5190, 0.4810],
        [0.5129, 0.4871],
        [0.4610, 0.5390],
        [0.4730, 0.5270],
        [0.5374, 0.4626],
        [0.4744, 0.5256],
        [0.3715, 0.6285],
        [0.4291, 0.5709],
        [0.5177, 0.4823],
        [0.4253, 0.5747],
        [0.4458, 0.5542]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4772, 0.5228],
        [0.4201, 0.5799],
        [0.4792, 0.5208],
        [0.3430, 0.6570],
        [0.5143, 0.4857],
        [0.5216, 0.4784],
        [0.4644, 0.5356],
        [0.4927, 0.5073],
        [0.4645, 0.5355],
        [0.4819, 0.5181],
        [0.5016, 0.4984],
        [0.4292, 0.5708]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4869, 0.5131],
        [0.4888, 0.5112],
        [0.5409, 0.4591],
        [0.4801, 0.5199],
        [0.4192, 0.5808],
        [0.5073, 0.4927],
        [0.4865, 0.5135],
        [0.5333, 0.4667],
        [0.5119, 0.4881],
        [0.4054, 0.5946],
        [0.4785, 0.5215],
        [0.4798, 0.5202]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4439, 0.5561],
        [0.5332, 0.4668],
        [0.4395, 0.5605],
        [0.5351, 0.4649],
        [0.4285, 0.5715],
        [0.4358, 0.5642],
        [0.4077, 0.5923],
        [0.3966, 0.6034],
        [0.5231, 0.4769],
        [0.4531, 0.5469],
        [0.4628, 0.5372],
        [0.5440, 0.4560]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5110, 0.4890],
        [0.4389, 0.5611],
        [0.4421, 0.5579],
        [0.4836, 0.5164],
        [0.5642, 0.4358],
        [0.4519, 0.5481],
        [0.3977, 0.6023],
        [0.5003, 0.4997],
        [0.5017, 0.4983],
        [0.4740, 0.5260],
        [0.4778, 0.5222],
        [0.4708, 0.5292]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4958, 0.5042],
        [0.4529, 0.5471],
        [0.4860, 0.5140],
        [0.4851, 0.5149],
        [0.4553, 0.5447],
        [0.4635, 0.5365],
        [0.4316, 0.5684],
        [0.4429, 0.5571],
        [0.4817, 0.5183],
        [0.5086, 0.4914],
        [0.4567, 0.5433],
        [0.5542, 0.4458]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4257, 0.5743],
        [0.4635, 0.5365],
        [0.5795, 0.4205],
        [0.4468, 0.5532],
        [0.5856, 0.4144],
        [0.6336, 0.3664],
        [0.4357, 0.5643],
        [0.4855, 0.5145],
        [0.5284, 0.4716],
        [0.5474, 0.4526],
        [0.4095, 0.5905],
        [0.5050, 0.4950]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4993, 0.5007],
        [0.4711, 0.5289],
        [0.4613, 0.5387],
        [0.5101, 0.4899],
        [0.5149, 0.4851],
        [0.4019, 0.5981],
        [0.3905, 0.6095],
        [0.5260, 0.4740],
        [0.4116, 0.5884],
        [0.5085, 0.4915],
        [0.4355, 0.5645],
        [0.4482, 0.5518]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4702, 0.5298],
        [0.4903, 0.5097],
        [0.4784, 0.5216],
        [0.4157, 0.5843],
        [0.4698, 0.5302],
        [0.4045, 0.5955],
        [0.4113, 0.5887],
        [0.4726, 0.5274],
        [0.4452, 0.5548],
        [0.4513, 0.5487],
        [0.4489, 0.5511],
        [0.5136, 0.4864]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4161, 0.5839],
        [0.4592, 0.5408],
        [0.4565, 0.5435],
        [0.4447, 0.5553],
        [0.4919, 0.5081],
        [0.5295, 0.4705],
        [0.3888, 0.6112],
        [0.5157, 0.4843],
        [0.5024, 0.4976],
        [0.4620, 0.5380],
        [0.4341, 0.5659],
        [0.4883, 0.5117]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4871, 0.5129],
        [0.3668, 0.6332],
        [0.4535, 0.5465],
        [0.4440, 0.5560],
        [0.4174, 0.5826],
        [0.4925, 0.5075],
        [0.4044, 0.5956],
        [0.5197, 0.4803],
        [0.4846, 0.5154],
        [0.3777, 0.6223],
        [0.4675, 0.5325],
        [0.4354, 0.5646]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4645, 0.5355],
        [0.5144, 0.4856],
        [0.4966, 0.5034],
        [0.4717, 0.5283],
        [0.4895, 0.5105],
        [0.5549, 0.4451],
        [0.4580, 0.5420],
        [0.4101, 0.5899],
        [0.4951, 0.5049],
        [0.4770, 0.5230],
        [0.4618, 0.5382],
        [0.4797, 0.5203]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4535, 0.5465],
        [0.4002, 0.5998],
        [0.4559, 0.5441],
        [0.3863, 0.6137],
        [0.4366, 0.5634],
        [0.5493, 0.4507],
        [0.4763, 0.5237],
        [0.5004, 0.4996],
        [0.4364, 0.5636],
        [0.4682, 0.5318],
        [0.5000, 0.5000],
        [0.4201, 0.5799]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4340, 0.5660],
        [0.4304, 0.5696],
        [0.4775, 0.5225],
        [0.4261, 0.5739],
        [0.4055, 0.5945],
        [0.4484, 0.5516],
        [0.4334, 0.5666],
        [0.4548, 0.5452],
        [0.4567, 0.5433],
        [0.3843, 0.6157],
        [0.3804, 0.6196],
        [0.4810, 0.5190]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4484, 0.5516],
        [0.5118, 0.4882],
        [0.4996, 0.5004],
        [0.4802, 0.5198],
        [0.3755, 0.6245],
        [0.5228, 0.4772],
        [0.4546, 0.5454],
        [0.4190, 0.5810],
        [0.4797, 0.5203],
        [0.4118, 0.5882],
        [0.4689, 0.5311],
        [0.5500, 0.4500]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4545, 0.5455],
        [0.4532, 0.5468],
        [0.4455, 0.5545],
        [0.3995, 0.6005],
        [0.5736, 0.4264],
        [0.5269, 0.4731],
        [0.4670, 0.5330],
        [0.4833, 0.5167],
        [0.5230, 0.4770],
        [0.4195, 0.5805],
        [0.4484, 0.5516],
        [0.4761, 0.5239]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.5001, 0.4999],
        [0.5178, 0.4822],
        [0.5099, 0.4901],
        [0.5153, 0.4847],
        [0.4429, 0.5571],
        [0.4789, 0.5211],
        [0.4951, 0.5049],
        [0.4593, 0.5407],
        [0.4481, 0.5519],
        [0.5172, 0.4828],
        [0.4721, 0.5279],
        [0.5486, 0.4514]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4703, 0.5297],
        [0.4467, 0.5533],
        [0.4927, 0.5073],
        [0.3801, 0.6199],
        [0.4749, 0.5251],
        [0.5711, 0.4289],
        [0.4187, 0.5813],
        [0.5359, 0.4641],
        [0.5562, 0.4438],
        [0.5089, 0.4911],
        [0.3788, 0.6212],
        [0.5022, 0.4978]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4723, 0.5277],
        [0.4828, 0.5172],
        [0.4384, 0.5616],
        [0.4281, 0.5719],
        [0.5207, 0.4793],
        [0.3492, 0.6508],
        [0.3482, 0.6518],
        [0.4691, 0.5309],
        [0.4212, 0.5788],
        [0.4673, 0.5327],
        [0.3755, 0.6245],
        [0.4717, 0.5283]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4430, 0.5570],
        [0.4918, 0.5082],
        [0.5315, 0.4685],
        [0.4872, 0.5128],
        [0.4324, 0.5676],
        [0.3973, 0.6027],
        [0.4014, 0.5986],
        [0.4661, 0.5339],
        [0.4493, 0.5507],
        [0.5433, 0.4567],
        [0.4564, 0.5436],
        [0.5000, 0.5000]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4169, 0.5831],
        [0.4265, 0.5735],
        [0.4199, 0.5801],
        [0.4540, 0.5460],
        [0.4618, 0.5382],
        [0.4613, 0.5387],
        [0.3776, 0.6224],
        [0.3786, 0.6214],
        [0.4255, 0.5745],
        [0.4074, 0.5926],
        [0.3579, 0.6421],
        [0.4889, 0.5111]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4490, 0.5510],
        [0.3792, 0.6208],
        [0.4399, 0.5601],
        [0.4401, 0.5599],
        [0.4858, 0.5142],
        [0.4679, 0.5321],
        [0.4445, 0.5555],
        [0.5591, 0.4409],
        [0.4773, 0.5227],
        [0.3766, 0.6234],
        [0.4952, 0.5048],
        [0.4446, 0.5554]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4914, 0.5086],
        [0.4967, 0.5033],
        [0.5273, 0.4727],
        [0.4361, 0.5639],
        [0.4194, 0.5806],
        [0.4870, 0.5130],
        [0.4310, 0.5690],
        [0.3999, 0.6001],
        [0.4975, 0.5025],
        [0.4677, 0.5323],
        [0.4649, 0.5351],
        [0.4389, 0.5611]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4925, 0.5075],
        [0.4646, 0.5354],
        [0.4943, 0.5057],
        [0.4377, 0.5623],
        [0.5083, 0.4917],
        [0.5326, 0.4674],
        [0.4696, 0.5304],
        [0.4587, 0.5413],
        [0.4281, 0.5719],
        [0.4928, 0.5072],
        [0.4211, 0.5789],
        [0.4258, 0.5742]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4428, 0.5572],
        [0.4179, 0.5821],
        [0.4634, 0.5366],
        [0.4485, 0.5515],
        [0.4172, 0.5828],
        [0.4406, 0.5594],
        [0.3994, 0.6006],
        [0.5606, 0.4394],
        [0.4081, 0.5919],
        [0.3691, 0.6309],
        [0.4423, 0.5577],
        [0.4519, 0.5481]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4788, 0.5212],
        [0.4640, 0.5360],
        [0.4780, 0.5220],
        [0.5210, 0.4790],
        [0.3770, 0.6230],
        [0.4959, 0.5041],
        [0.3930, 0.6070],
        [0.3722, 0.6278],
        [0.5069, 0.4931],
        [0.5111, 0.4889],
        [0.4438, 0.5562],
        [0.4754, 0.5246]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4962, 0.5038],
        [0.4769, 0.5231],
        [0.4149, 0.5851],
        [0.4572, 0.5428],
        [0.5586, 0.4414],
        [0.5599, 0.4401],
        [0.5079, 0.4921],
        [0.5039, 0.4961],
        [0.5215, 0.4785],
        [0.4820, 0.5180],
        [0.4617, 0.5383],
        [0.5080, 0.4920]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3891, 0.6109],
        [0.4049, 0.5951],
        [0.4061, 0.5939],
        [0.4743, 0.5257],
        [0.5036, 0.4964],
        [0.5294, 0.4706],
        [0.4420, 0.5580],
        [0.4630, 0.5370],
        [0.5083, 0.4917],
        [0.4928, 0.5072],
        [0.4387, 0.5613],
        [0.5635, 0.4365]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4064, 0.5936],
        [0.4935, 0.5065],
        [0.5347, 0.4653],
        [0.3596, 0.6404],
        [0.4605, 0.5395],
        [0.6153, 0.3847],
        [0.4337, 0.5663],
        [0.4530, 0.5470],
        [0.5247, 0.4753],
        [0.4558, 0.5442],
        [0.3463, 0.6537],
        [0.4670, 0.5330]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4850, 0.5150],
        [0.5361, 0.4639],
        [0.4740, 0.5260],
        [0.5236, 0.4764],
        [0.5554, 0.4446],
        [0.4597, 0.5403],
        [0.3967, 0.6033],
        [0.4989, 0.5011],
        [0.4543, 0.5457],
        [0.4728, 0.5272],
        [0.4291, 0.5709],
        [0.4638, 0.5362]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4054, 0.5946],
        [0.5628, 0.4372],
        [0.4784, 0.5216],
        [0.5003, 0.4997],
        [0.4915, 0.5085],
        [0.4233, 0.5767],
        [0.4466, 0.5534],
        [0.5034, 0.4966],
        [0.4519, 0.5481],
        [0.4558, 0.5442],
        [0.5156, 0.4844],
        [0.5639, 0.4361]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4562, 0.5438],
        [0.4440, 0.5560],
        [0.3700, 0.6300],
        [0.4835, 0.5165],
        [0.4766, 0.5234],
        [0.5039, 0.4961],
        [0.3606, 0.6394],
        [0.4777, 0.5223],
        [0.4311, 0.5689],
        [0.5013, 0.4987],
        [0.4078, 0.5922],
        [0.4628, 0.5372]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4574, 0.5426],
        [0.3826, 0.6174],
        [0.4657, 0.5343],
        [0.4732, 0.5268],
        [0.4082, 0.5918],
        [0.4782, 0.5218],
        [0.4264, 0.5736],
        [0.5949, 0.4051],
        [0.4856, 0.5144],
        [0.4260, 0.5740],
        [0.4715, 0.5285],
        [0.4598, 0.5402]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5506, 0.4494],
        [0.4719, 0.5281],
        [0.5216, 0.4784],
        [0.4882, 0.5118],
        [0.4621, 0.5379],
        [0.5424, 0.4576],
        [0.5046, 0.4954],
        [0.3729, 0.6271],
        [0.4905, 0.5095],
        [0.5039, 0.4961],
        [0.4945, 0.5055],
        [0.5092, 0.4908]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4041, 0.5959],
        [0.4094, 0.5906],
        [0.5230, 0.4770],
        [0.4371, 0.5629],
        [0.5184, 0.4816],
        [0.5186, 0.4814],
        [0.4430, 0.5570],
        [0.4576, 0.5424],
        [0.4569, 0.5431],
        [0.5148, 0.4852],
        [0.4394, 0.5606],
        [0.3845, 0.6155]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4720, 0.5280],
        [0.4016, 0.5984],
        [0.4955, 0.5045],
        [0.4184, 0.5816],
        [0.3795, 0.6205],
        [0.4284, 0.5716],
        [0.4207, 0.5793],
        [0.5137, 0.4863],
        [0.4778, 0.5222],
        [0.3588, 0.6412],
        [0.4461, 0.5539],
        [0.4398, 0.5602]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.5059, 0.4941],
        [0.5131, 0.4869],
        [0.5237, 0.4763],
        [0.4669, 0.5331],
        [0.5158, 0.4842],
        [0.5699, 0.4301],
        [0.4755, 0.5245],
        [0.4289, 0.5711],
        [0.4094, 0.5906],
        [0.4340, 0.5660],
        [0.5069, 0.4931],
        [0.5139, 0.4861]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5159, 0.4841],
        [0.5193, 0.4807],
        [0.4007, 0.5993],
        [0.3921, 0.6079],
        [0.5651, 0.4349],
        [0.4230, 0.5770],
        [0.3651, 0.6349],
        [0.4557, 0.5443],
        [0.4957, 0.5043],
        [0.4329, 0.5671],
        [0.3791, 0.6209],
        [0.4395, 0.5605]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4566, 0.5434],
        [0.4480, 0.5520],
        [0.4662, 0.5338],
        [0.5349, 0.4651],
        [0.4757, 0.5243],
        [0.4819, 0.5181],
        [0.4278, 0.5722],
        [0.4254, 0.5746],
        [0.4326, 0.5674],
        [0.5725, 0.4275],
        [0.4206, 0.5794],
        [0.4993, 0.5007]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4291, 0.5709],
        [0.4488, 0.5512],
        [0.5304, 0.4696],
        [0.4078, 0.5922],
        [0.5677, 0.4323],
        [0.5512, 0.4488],
        [0.4953, 0.5047],
        [0.5258, 0.4742],
        [0.4760, 0.5240],
        [0.5253, 0.4747],
        [0.4158, 0.5842],
        [0.4669, 0.5331]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5070, 0.4930],
        [0.4485, 0.5515],
        [0.4643, 0.5357],
        [0.4305, 0.5695],
        [0.4925, 0.5075],
        [0.4628, 0.5372],
        [0.4244, 0.5756],
        [0.4232, 0.5768],
        [0.4401, 0.5599],
        [0.4647, 0.5353],
        [0.3464, 0.6536],
        [0.4823, 0.5177]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4751, 0.5249],
        [0.4694, 0.5306],
        [0.4948, 0.5052],
        [0.4623, 0.5377],
        [0.4519, 0.5481],
        [0.4301, 0.5699],
        [0.4066, 0.5934],
        [0.4399, 0.5601],
        [0.4798, 0.5202],
        [0.4820, 0.5180],
        [0.4472, 0.5528],
        [0.4997, 0.5003]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3937, 0.6063],
        [0.4905, 0.5095],
        [0.4452, 0.5548],
        [0.4572, 0.5428],
        [0.4163, 0.5837],
        [0.4593, 0.5407],
        [0.3612, 0.6388],
        [0.4063, 0.5937],
        [0.4197, 0.5803],
        [0.4210, 0.5790],
        [0.4646, 0.5354],
        [0.5118, 0.4882]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4126, 0.5874],
        [0.4802, 0.5198],
        [0.4511, 0.5489],
        [0.4439, 0.5561],
        [0.4815, 0.5185],
        [0.4443, 0.5557],
        [0.4258, 0.5742],
        [0.5579, 0.4421],
        [0.5453, 0.4547],
        [0.4449, 0.5551],
        [0.5110, 0.4890],
        [0.5016, 0.4984]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4601, 0.5399],
        [0.4898, 0.5102],
        [0.4595, 0.5405],
        [0.4623, 0.5377],
        [0.4606, 0.5394],
        [0.4834, 0.5166],
        [0.4257, 0.5743],
        [0.4694, 0.5306],
        [0.5153, 0.4847],
        [0.4612, 0.5388],
        [0.4731, 0.5269],
        [0.4125, 0.5875]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5563, 0.4437],
        [0.4423, 0.5577],
        [0.4908, 0.5092],
        [0.4005, 0.5995],
        [0.5541, 0.4459],
        [0.4964, 0.5036],
        [0.4425, 0.5575],
        [0.5191, 0.4809],
        [0.4372, 0.5628],
        [0.5243, 0.4757],
        [0.4758, 0.5242],
        [0.3459, 0.6541]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4260, 0.5740],
        [0.4414, 0.5586],
        [0.4831, 0.5169],
        [0.4374, 0.5626],
        [0.4246, 0.5754],
        [0.3828, 0.6172],
        [0.4031, 0.5969],
        [0.5127, 0.4873],
        [0.5205, 0.4795],
        [0.4235, 0.5765],
        [0.4370, 0.5630],
        [0.5089, 0.4911]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4336, 0.5664],
        [0.4980, 0.5020],
        [0.3563, 0.6437],
        [0.5266, 0.4734],
        [0.4023, 0.5977],
        [0.4750, 0.5250],
        [0.4599, 0.5401],
        [0.4429, 0.5571],
        [0.3828, 0.6172],
        [0.4651, 0.5349],
        [0.4648, 0.5352],
        [0.5918, 0.4082]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4513, 0.5487],
        [0.4023, 0.5977],
        [0.3958, 0.6042],
        [0.4266, 0.5734],
        [0.6309, 0.3691],
        [0.5041, 0.4959],
        [0.4490, 0.5510],
        [0.4627, 0.5373],
        [0.4686, 0.5314],
        [0.4422, 0.5578],
        [0.4707, 0.5293],
        [0.4564, 0.5436]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4284, 0.5716],
        [0.4250, 0.5750],
        [0.4629, 0.5371],
        [0.4609, 0.5391],
        [0.4865, 0.5135],
        [0.4950, 0.5050],
        [0.3810, 0.6190],
        [0.4425, 0.5575],
        [0.3952, 0.6048],
        [0.5592, 0.4408],
        [0.4527, 0.5473],
        [0.5188, 0.4812]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4033, 0.5967],
        [0.4461, 0.5539],
        [0.4606, 0.5394],
        [0.3932, 0.6068],
        [0.4928, 0.5072],
        [0.5001, 0.4999],
        [0.4584, 0.5416],
        [0.4780, 0.5220],
        [0.4897, 0.5103],
        [0.4677, 0.5323],
        [0.3729, 0.6271],
        [0.4406, 0.5594]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5207, 0.4793],
        [0.3820, 0.6180],
        [0.3888, 0.6112],
        [0.3891, 0.6109],
        [0.4497, 0.5503],
        [0.4298, 0.5702],
        [0.4096, 0.5904],
        [0.5122, 0.4878],
        [0.4033, 0.5967],
        [0.5174, 0.4826],
        [0.3798, 0.6202],
        [0.3955, 0.6045]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4947, 0.5053],
        [0.4338, 0.5662],
        [0.5245, 0.4755],
        [0.5148, 0.4852],
        [0.4800, 0.5200],
        [0.4269, 0.5731],
        [0.4239, 0.5761],
        [0.5074, 0.4926],
        [0.4459, 0.5541],
        [0.3631, 0.6369],
        [0.4277, 0.5723],
        [0.4662, 0.5338]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4833, 0.5167],
        [0.5161, 0.4839],
        [0.4921, 0.5079],
        [0.5293, 0.4707],
        [0.4645, 0.5355],
        [0.4923, 0.5077],
        [0.3890, 0.6110],
        [0.4368, 0.5632],
        [0.4922, 0.5078],
        [0.4402, 0.5598],
        [0.4702, 0.5298],
        [0.4733, 0.5267]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4535, 0.5465],
        [0.4068, 0.5932],
        [0.5044, 0.4956],
        [0.4636, 0.5364],
        [0.4827, 0.5173],
        [0.5116, 0.4884],
        [0.3770, 0.6230],
        [0.6160, 0.3840],
        [0.4891, 0.5109],
        [0.4159, 0.5841],
        [0.4827, 0.5173],
        [0.5192, 0.4808]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5215, 0.4785],
        [0.5038, 0.4962],
        [0.4994, 0.5006],
        [0.4496, 0.5504],
        [0.4338, 0.5662],
        [0.4830, 0.5170],
        [0.4275, 0.5725],
        [0.4407, 0.5593],
        [0.4876, 0.5124],
        [0.5049, 0.4951],
        [0.4470, 0.5530],
        [0.4550, 0.5450]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4340, 0.5660],
        [0.3888, 0.6112],
        [0.4716, 0.5284],
        [0.4213, 0.5787],
        [0.5091, 0.4909],
        [0.4514, 0.5486],
        [0.4408, 0.5592],
        [0.4725, 0.5275],
        [0.4905, 0.5095],
        [0.4641, 0.5359],
        [0.4554, 0.5446],
        [0.3846, 0.6154]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4167, 0.5833],
        [0.4154, 0.5846],
        [0.4408, 0.5592],
        [0.4384, 0.5616],
        [0.3783, 0.6217],
        [0.4272, 0.5728],
        [0.4247, 0.5753],
        [0.4358, 0.5642],
        [0.4203, 0.5797],
        [0.3617, 0.6383],
        [0.4402, 0.5598],
        [0.4716, 0.5284]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3620, 0.6380],
        [0.5127, 0.4873],
        [0.5481, 0.4519],
        [0.4974, 0.5026],
        [0.4368, 0.5632],
        [0.4506, 0.5494],
        [0.4464, 0.5536],
        [0.3768, 0.6232],
        [0.4492, 0.5508],
        [0.4005, 0.5995],
        [0.4659, 0.5341],
        [0.4482, 0.5518]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4547, 0.5453],
        [0.4356, 0.5644],
        [0.3887, 0.6113],
        [0.3507, 0.6493],
        [0.4972, 0.5028],
        [0.5265, 0.4735],
        [0.4307, 0.5693],
        [0.4564, 0.5436],
        [0.4577, 0.5423],
        [0.4030, 0.5970],
        [0.4509, 0.5491],
        [0.4386, 0.5614]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3922, 0.6078],
        [0.4533, 0.5467],
        [0.5377, 0.4623],
        [0.4334, 0.5666],
        [0.4087, 0.5913],
        [0.4991, 0.5009],
        [0.3783, 0.6217],
        [0.4012, 0.5988],
        [0.4838, 0.5162],
        [0.4653, 0.5347],
        [0.3900, 0.6100],
        [0.5136, 0.4864]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3659, 0.6341],
        [0.4252, 0.5748],
        [0.5304, 0.4696],
        [0.3414, 0.6586],
        [0.4773, 0.5227],
        [0.5834, 0.4166],
        [0.4628, 0.5372],
        [0.4778, 0.5222],
        [0.4469, 0.5531],
        [0.4556, 0.5444],
        [0.3470, 0.6530],
        [0.4545, 0.5455]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4774, 0.5226],
        [0.4039, 0.5961],
        [0.3784, 0.6216],
        [0.5228, 0.4772],
        [0.5231, 0.4769],
        [0.4181, 0.5819],
        [0.3829, 0.6171],
        [0.4973, 0.5027],
        [0.4089, 0.5911],
        [0.4848, 0.5152],
        [0.3777, 0.6223],
        [0.4438, 0.5562]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5185, 0.4815],
        [0.5556, 0.4444],
        [0.4713, 0.5287],
        [0.5102, 0.4898],
        [0.4252, 0.5748],
        [0.4215, 0.5785],
        [0.3909, 0.6091],
        [0.5147, 0.4853],
        [0.3733, 0.6267],
        [0.4538, 0.5462],
        [0.4344, 0.5656],
        [0.5297, 0.4703]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3763, 0.6237],
        [0.4331, 0.5669],
        [0.4126, 0.5874],
        [0.5202, 0.4798],
        [0.4606, 0.5394],
        [0.5139, 0.4861],
        [0.3742, 0.6258],
        [0.4779, 0.5221],
        [0.3991, 0.6009],
        [0.4869, 0.5131],
        [0.4258, 0.5742],
        [0.4531, 0.5469]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4876, 0.5124],
        [0.4008, 0.5992],
        [0.4486, 0.5514],
        [0.4427, 0.5573],
        [0.4798, 0.5202],
        [0.5617, 0.4383],
        [0.3805, 0.6195],
        [0.5605, 0.4395],
        [0.4636, 0.5364],
        [0.4252, 0.5748],
        [0.4898, 0.5102],
        [0.4819, 0.5181]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4403, 0.5597],
        [0.4851, 0.5149],
        [0.4619, 0.5381],
        [0.4820, 0.5180],
        [0.4790, 0.5210],
        [0.5713, 0.4287],
        [0.4734, 0.5266],
        [0.3945, 0.6055],
        [0.5397, 0.4603],
        [0.4584, 0.5416],
        [0.4546, 0.5454],
        [0.4857, 0.5143]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5043, 0.4957],
        [0.4653, 0.5347],
        [0.4511, 0.5489],
        [0.4407, 0.5593],
        [0.4982, 0.5018],
        [0.4511, 0.5489],
        [0.4298, 0.5702],
        [0.4500, 0.5500],
        [0.4156, 0.5844],
        [0.4438, 0.5562],
        [0.4482, 0.5518],
        [0.4039, 0.5961]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4948, 0.5052],
        [0.4348, 0.5652],
        [0.4227, 0.5773],
        [0.4201, 0.5799],
        [0.3838, 0.6162],
        [0.4120, 0.5880],
        [0.4243, 0.5757],
        [0.4047, 0.5953],
        [0.4549, 0.5451],
        [0.3858, 0.6142],
        [0.4063, 0.5937],
        [0.4322, 0.5678]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4332, 0.5668],
        [0.5920, 0.4080],
        [0.4845, 0.5155],
        [0.5092, 0.4908],
        [0.4117, 0.5883],
        [0.4673, 0.5327],
        [0.4220, 0.5780],
        [0.4049, 0.5951],
        [0.4885, 0.5115],
        [0.4735, 0.5265],
        [0.4724, 0.5276],
        [0.5483, 0.4517]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4596, 0.5404],
        [0.3680, 0.6320],
        [0.3795, 0.6205],
        [0.3547, 0.6453],
        [0.5549, 0.4451],
        [0.5504, 0.4496],
        [0.3783, 0.6217],
        [0.4972, 0.5028],
        [0.4373, 0.5627],
        [0.4109, 0.5891],
        [0.5058, 0.4942],
        [0.4794, 0.5206]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4812, 0.5188],
        [0.4255, 0.5745],
        [0.4127, 0.5873],
        [0.4701, 0.5299],
        [0.4021, 0.5979],
        [0.4426, 0.5574],
        [0.4045, 0.5955],
        [0.3740, 0.6260],
        [0.4098, 0.5902],
        [0.4660, 0.5340],
        [0.3880, 0.6120],
        [0.4862, 0.5138]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3972, 0.6028],
        [0.4712, 0.5288],
        [0.5099, 0.4901],
        [0.3645, 0.6355],
        [0.4828, 0.5172],
        [0.5756, 0.4244],
        [0.4782, 0.5218],
        [0.5276, 0.4724],
        [0.5060, 0.4940],
        [0.4712, 0.5288],
        [0.3787, 0.6213],
        [0.4393, 0.5607]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4916, 0.5084],
        [0.4458, 0.5542],
        [0.3798, 0.6202],
        [0.5292, 0.4708],
        [0.5096, 0.4904],
        [0.3833, 0.6167],
        [0.3873, 0.6127],
        [0.4283, 0.5717],
        [0.3708, 0.6292],
        [0.4561, 0.5439],
        [0.3729, 0.6271],
        [0.4795, 0.5205]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4697, 0.5303],
        [0.4949, 0.5051],
        [0.4788, 0.5212],
        [0.4432, 0.5568],
        [0.4494, 0.5506],
        [0.4033, 0.5967],
        [0.4060, 0.5940],
        [0.4863, 0.5137],
        [0.4219, 0.5781],
        [0.4281, 0.5719],
        [0.4640, 0.5360],
        [0.4835, 0.5165]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4034, 0.5966],
        [0.3730, 0.6270],
        [0.3914, 0.6086],
        [0.4487, 0.5513],
        [0.5194, 0.4806],
        [0.5872, 0.4128],
        [0.4132, 0.5868],
        [0.3539, 0.6461],
        [0.4076, 0.5924],
        [0.4699, 0.5301],
        [0.3966, 0.6034],
        [0.4558, 0.5442]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4463, 0.5537],
        [0.4011, 0.5989],
        [0.4204, 0.5796],
        [0.4659, 0.5341],
        [0.4306, 0.5694],
        [0.5164, 0.4836],
        [0.3987, 0.6013],
        [0.5262, 0.4738],
        [0.4755, 0.5245],
        [0.4146, 0.5854],
        [0.4251, 0.5749],
        [0.5474, 0.4526]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5135, 0.4865],
        [0.4213, 0.5787],
        [0.4342, 0.5658],
        [0.4627, 0.5373],
        [0.3815, 0.6185],
        [0.5183, 0.4817],
        [0.4207, 0.5793],
        [0.4061, 0.5939],
        [0.4197, 0.5803],
        [0.4457, 0.5543],
        [0.4946, 0.5054],
        [0.4594, 0.5406]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4921, 0.5079],
        [0.5190, 0.4810],
        [0.4755, 0.5245],
        [0.4375, 0.5625],
        [0.5488, 0.4512],
        [0.5297, 0.4703],
        [0.4709, 0.5291],
        [0.4799, 0.5201],
        [0.5264, 0.4736],
        [0.5645, 0.4355],
        [0.5149, 0.4851],
        [0.4686, 0.5314]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4493, 0.5507],
        [0.4110, 0.5890],
        [0.4465, 0.5535],
        [0.4664, 0.5336],
        [0.4238, 0.5762],
        [0.4097, 0.5903],
        [0.4397, 0.5603],
        [0.3620, 0.6380],
        [0.4554, 0.5446],
        [0.3825, 0.6175],
        [0.3835, 0.6165],
        [0.4116, 0.5884]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4193, 0.5807],
        [0.5071, 0.4929],
        [0.4611, 0.5389],
        [0.4900, 0.5100],
        [0.3857, 0.6143],
        [0.5266, 0.4734],
        [0.4208, 0.5792],
        [0.5013, 0.4987],
        [0.4773, 0.5227],
        [0.5160, 0.4840],
        [0.4745, 0.5255],
        [0.5620, 0.4380]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4747, 0.5253],
        [0.4187, 0.5813],
        [0.4042, 0.5958],
        [0.4834, 0.5166],
        [0.5388, 0.4612],
        [0.5596, 0.4404],
        [0.3895, 0.6105],
        [0.4586, 0.5414],
        [0.5005, 0.4995],
        [0.4232, 0.5768],
        [0.4792, 0.5208],
        [0.4405, 0.5595]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4606, 0.5394],
        [0.5269, 0.4731],
        [0.4812, 0.5188],
        [0.4164, 0.5836],
        [0.4539, 0.5461],
        [0.5157, 0.4843],
        [0.4141, 0.5859],
        [0.4452, 0.5548],
        [0.5449, 0.4551],
        [0.5323, 0.4677],
        [0.4062, 0.5938],
        [0.5083, 0.4917]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3809, 0.6191],
        [0.4417, 0.5583],
        [0.5004, 0.4996],
        [0.3591, 0.6409],
        [0.5068, 0.4932],
        [0.5963, 0.4037],
        [0.4500, 0.5500],
        [0.4470, 0.5530],
        [0.5245, 0.4755],
        [0.4502, 0.5498],
        [0.4034, 0.5966],
        [0.4984, 0.5016]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.5103, 0.4897],
        [0.5606, 0.4394],
        [0.4545, 0.5455],
        [0.5383, 0.4617],
        [0.5244, 0.4756],
        [0.4672, 0.5328],
        [0.4235, 0.5765],
        [0.5228, 0.4772],
        [0.4425, 0.5575],
        [0.5733, 0.4267],
        [0.5062, 0.4938],
        [0.4705, 0.5295]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5232, 0.4768],
        [0.5115, 0.4885],
        [0.5296, 0.4704],
        [0.5216, 0.4784],
        [0.5225, 0.4775],
        [0.4512, 0.5488],
        [0.4342, 0.5658],
        [0.5165, 0.4835],
        [0.5053, 0.4947],
        [0.5735, 0.4265],
        [0.4505, 0.5495],
        [0.4548, 0.5452]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4271, 0.5729],
        [0.4928, 0.5072],
        [0.4010, 0.5990],
        [0.5044, 0.4956],
        [0.4247, 0.5753],
        [0.5364, 0.4636],
        [0.3927, 0.6073],
        [0.4737, 0.5263],
        [0.3768, 0.6232],
        [0.4488, 0.5512],
        [0.4324, 0.5676],
        [0.4208, 0.5792]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.5109, 0.4891],
        [0.4189, 0.5811],
        [0.4582, 0.5418],
        [0.3953, 0.6047],
        [0.4435, 0.5565],
        [0.5475, 0.4525],
        [0.4606, 0.5394],
        [0.6084, 0.3916],
        [0.4564, 0.5436],
        [0.4479, 0.5521],
        [0.4442, 0.5558],
        [0.4694, 0.5306]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4619, 0.5381],
        [0.5584, 0.4416],
        [0.4201, 0.5799],
        [0.4455, 0.5545],
        [0.4623, 0.5377],
        [0.5358, 0.4642],
        [0.4789, 0.5211],
        [0.4162, 0.5838],
        [0.4424, 0.5576],
        [0.4967, 0.5033],
        [0.4269, 0.5731],
        [0.4346, 0.5654]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4536, 0.5464],
        [0.4076, 0.5924],
        [0.5180, 0.4820],
        [0.4455, 0.5545],
        [0.5218, 0.4782],
        [0.4733, 0.5267],
        [0.4419, 0.5581],
        [0.4477, 0.5523],
        [0.4744, 0.5256],
        [0.4937, 0.5063],
        [0.4703, 0.5297],
        [0.3596, 0.6404]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4899, 0.5101],
        [0.4539, 0.5461],
        [0.4326, 0.5674],
        [0.4438, 0.5562],
        [0.3985, 0.6015],
        [0.4223, 0.5777],
        [0.4155, 0.5845],
        [0.4431, 0.5569],
        [0.4726, 0.5274],
        [0.3840, 0.6160],
        [0.4398, 0.5602],
        [0.5223, 0.4777]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4151, 0.5849],
        [0.5115, 0.4885],
        [0.5046, 0.4954],
        [0.5027, 0.4973],
        [0.3779, 0.6221],
        [0.4843, 0.5157],
        [0.4120, 0.5880],
        [0.3685, 0.6315],
        [0.4167, 0.5833],
        [0.4858, 0.5142],
        [0.4636, 0.5364],
        [0.4393, 0.5607]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4875, 0.5125],
        [0.3642, 0.6358],
        [0.3733, 0.6267],
        [0.4478, 0.5522],
        [0.5484, 0.4516],
        [0.5651, 0.4349],
        [0.4400, 0.5600],
        [0.5040, 0.4960],
        [0.5180, 0.4820],
        [0.3801, 0.6199],
        [0.4895, 0.5105],
        [0.4470, 0.5530]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4318, 0.5682],
        [0.5160, 0.4840],
        [0.4260, 0.5740],
        [0.4992, 0.5008],
        [0.4273, 0.5727],
        [0.4673, 0.5327],
        [0.4282, 0.5718],
        [0.3711, 0.6289],
        [0.4692, 0.5308],
        [0.6218, 0.3782],
        [0.3939, 0.6061],
        [0.4580, 0.5420]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3948, 0.6052],
        [0.4349, 0.5651],
        [0.5009, 0.4991],
        [0.3838, 0.6162],
        [0.4773, 0.5227],
        [0.6026, 0.3974],
        [0.4113, 0.5887],
        [0.4426, 0.5574],
        [0.4959, 0.5041],
        [0.4292, 0.5708],
        [0.4004, 0.5996],
        [0.4902, 0.5098]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4808, 0.5192],
        [0.4634, 0.5366],
        [0.4124, 0.5876],
        [0.4499, 0.5501],
        [0.5414, 0.4586],
        [0.3873, 0.6127],
        [0.3243, 0.6757],
        [0.4969, 0.5031],
        [0.4248, 0.5752],
        [0.4792, 0.5208],
        [0.4350, 0.5650],
        [0.4860, 0.5140]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4481, 0.5519],
        [0.5072, 0.4928],
        [0.4559, 0.5441],
        [0.4646, 0.5354],
        [0.4641, 0.5359],
        [0.4173, 0.5827],
        [0.4009, 0.5991],
        [0.4865, 0.5135],
        [0.4428, 0.5572],
        [0.5783, 0.4217],
        [0.4569, 0.5431],
        [0.5364, 0.4636]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3993, 0.6007],
        [0.4585, 0.5415],
        [0.5082, 0.4918],
        [0.5320, 0.4680],
        [0.5279, 0.4721],
        [0.4601, 0.5399],
        [0.3593, 0.6407],
        [0.4394, 0.5606],
        [0.4421, 0.5579],
        [0.4475, 0.5525],
        [0.4872, 0.5128],
        [0.4075, 0.5925]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4291, 0.5709],
        [0.4297, 0.5703],
        [0.4042, 0.5958],
        [0.4107, 0.5893],
        [0.4302, 0.5698],
        [0.4379, 0.5621],
        [0.4542, 0.5458],
        [0.5367, 0.4633],
        [0.5199, 0.4801],
        [0.3581, 0.6419],
        [0.4034, 0.5966],
        [0.5036, 0.4964]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4973, 0.5027],
        [0.4681, 0.5319],
        [0.4362, 0.5638],
        [0.4866, 0.5134],
        [0.4672, 0.5328],
        [0.4477, 0.5523],
        [0.5379, 0.4621],
        [0.3041, 0.6959],
        [0.5141, 0.4859],
        [0.4470, 0.5530],
        [0.5213, 0.4787],
        [0.4284, 0.5716]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3910, 0.6090],
        [0.4774, 0.5226],
        [0.3816, 0.6184],
        [0.4687, 0.5313],
        [0.5030, 0.4970],
        [0.4982, 0.5018],
        [0.4044, 0.5956],
        [0.4371, 0.5629],
        [0.4102, 0.5898],
        [0.4874, 0.5126],
        [0.4795, 0.5205],
        [0.3786, 0.6214]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4334, 0.5666],
        [0.4196, 0.5804],
        [0.4147, 0.5853],
        [0.4392, 0.5608],
        [0.3588, 0.6412],
        [0.4279, 0.5721],
        [0.3586, 0.6414],
        [0.4176, 0.5824],
        [0.4172, 0.5828],
        [0.3604, 0.6396],
        [0.3980, 0.6020],
        [0.4489, 0.5511]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3766, 0.6234],
        [0.4666, 0.5334],
        [0.4571, 0.5429],
        [0.5039, 0.4961],
        [0.3120, 0.6880],
        [0.4695, 0.5305],
        [0.3619, 0.6381],
        [0.4156, 0.5844],
        [0.3533, 0.6467],
        [0.4557, 0.5443],
        [0.4026, 0.5974],
        [0.5448, 0.4552]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4388, 0.5612],
        [0.4043, 0.5957],
        [0.4449, 0.5551],
        [0.3632, 0.6368],
        [0.5618, 0.4382],
        [0.4818, 0.5182],
        [0.4543, 0.5457],
        [0.5124, 0.4876],
        [0.4685, 0.5315],
        [0.3629, 0.6371],
        [0.4165, 0.5835],
        [0.4480, 0.5520]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4150, 0.5850],
        [0.5309, 0.4691],
        [0.4442, 0.5558],
        [0.4597, 0.5403],
        [0.4091, 0.5909],
        [0.5471, 0.4529],
        [0.4210, 0.5790],
        [0.4233, 0.5767],
        [0.4273, 0.5727],
        [0.4669, 0.5331],
        [0.3148, 0.6852],
        [0.4822, 0.5178]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4336, 0.5664],
        [0.4590, 0.5410],
        [0.4950, 0.5050],
        [0.3208, 0.6792],
        [0.4560, 0.5440],
        [0.5651, 0.4349],
        [0.4673, 0.5327],
        [0.5160, 0.4840],
        [0.5132, 0.4868],
        [0.4594, 0.5406],
        [0.3608, 0.6392],
        [0.4852, 0.5148]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4921, 0.5079],
        [0.4609, 0.5391],
        [0.4231, 0.5769],
        [0.4353, 0.5647],
        [0.5484, 0.4516],
        [0.4035, 0.5965],
        [0.3723, 0.6277],
        [0.4342, 0.5658],
        [0.3768, 0.6232],
        [0.4789, 0.5211],
        [0.3784, 0.6216],
        [0.4548, 0.5452]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5009, 0.4991],
        [0.4451, 0.5549],
        [0.5838, 0.4162],
        [0.4701, 0.5299],
        [0.4839, 0.5161],
        [0.4770, 0.5230],
        [0.4742, 0.5258],
        [0.4687, 0.5313],
        [0.4545, 0.5455],
        [0.5156, 0.4844],
        [0.4634, 0.5366],
        [0.4976, 0.5024]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3681, 0.6319],
        [0.4612, 0.5388],
        [0.4310, 0.5690],
        [0.4713, 0.5287],
        [0.4685, 0.5315],
        [0.5004, 0.4996],
        [0.3401, 0.6599],
        [0.3912, 0.6088],
        [0.4266, 0.5734],
        [0.4402, 0.5598],
        [0.4084, 0.5916],
        [0.4993, 0.5007]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4378, 0.5622],
        [0.3286, 0.6714],
        [0.3933, 0.6067],
        [0.3729, 0.6271],
        [0.4220, 0.5780],
        [0.5262, 0.4738],
        [0.3595, 0.6405],
        [0.5447, 0.4553],
        [0.4480, 0.5520],
        [0.3656, 0.6344],
        [0.4674, 0.5326],
        [0.5115, 0.4885]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4588, 0.5412],
        [0.4517, 0.5483],
        [0.4540, 0.5460],
        [0.4346, 0.5654],
        [0.4389, 0.5611],
        [0.4713, 0.5287],
        [0.4346, 0.5654],
        [0.4008, 0.5992],
        [0.4152, 0.5848],
        [0.5118, 0.4882],
        [0.4621, 0.5379],
        [0.4746, 0.5254]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4299, 0.5701],
        [0.4371, 0.5629],
        [0.4688, 0.5312],
        [0.4381, 0.5619],
        [0.4094, 0.5906],
        [0.4891, 0.5109],
        [0.4288, 0.5712],
        [0.5206, 0.4794],
        [0.3881, 0.6119],
        [0.4814, 0.5186],
        [0.4993, 0.5007],
        [0.4611, 0.5389]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4690, 0.5310],
        [0.4541, 0.5459],
        [0.3710, 0.6290],
        [0.4156, 0.5844],
        [0.4380, 0.5620],
        [0.4420, 0.5580],
        [0.3538, 0.6462],
        [0.3665, 0.6335],
        [0.3961, 0.6039],
        [0.3701, 0.6299],
        [0.3523, 0.6477],
        [0.5109, 0.4891]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3925, 0.6075],
        [0.4575, 0.5425],
        [0.4332, 0.5668],
        [0.4328, 0.5672],
        [0.3264, 0.6736],
        [0.4271, 0.5729],
        [0.3996, 0.6004],
        [0.4012, 0.5988],
        [0.4045, 0.5955],
        [0.3705, 0.6295],
        [0.4802, 0.5198],
        [0.4333, 0.5667]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3557, 0.6443],
        [0.4270, 0.5730],
        [0.3142, 0.6858],
        [0.3912, 0.6088],
        [0.5595, 0.4405],
        [0.4326, 0.5674],
        [0.4268, 0.5732],
        [0.4579, 0.5421],
        [0.4272, 0.5728],
        [0.3319, 0.6681],
        [0.4280, 0.5720],
        [0.3894, 0.6106]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4212, 0.5788],
        [0.4921, 0.5079],
        [0.4319, 0.5681],
        [0.4719, 0.5281],
        [0.4299, 0.5701],
        [0.4822, 0.5178],
        [0.4025, 0.5975],
        [0.4023, 0.5977],
        [0.3823, 0.6177],
        [0.5332, 0.4668],
        [0.3191, 0.6809],
        [0.4699, 0.5301]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3992, 0.6008],
        [0.4409, 0.5591],
        [0.5190, 0.4810],
        [0.3324, 0.6676],
        [0.4053, 0.5947],
        [0.5581, 0.4419],
        [0.4552, 0.5448],
        [0.4516, 0.5484],
        [0.4058, 0.5942],
        [0.4976, 0.5024],
        [0.3525, 0.6475],
        [0.4315, 0.5685]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3572, 0.6428],
        [0.4701, 0.5299],
        [0.3731, 0.6269],
        [0.4007, 0.5993],
        [0.5333, 0.4667],
        [0.4109, 0.5891],
        [0.3282, 0.6718],
        [0.4747, 0.5253],
        [0.3594, 0.6406],
        [0.4462, 0.5538],
        [0.4158, 0.5842],
        [0.4352, 0.5648]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4993, 0.5007],
        [0.4472, 0.5528],
        [0.5312, 0.4688],
        [0.4196, 0.5804],
        [0.5144, 0.4856],
        [0.4524, 0.5476],
        [0.4000, 0.6000],
        [0.4738, 0.5262],
        [0.4781, 0.5219],
        [0.5090, 0.4910],
        [0.3992, 0.6008],
        [0.4769, 0.5231]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4836, 0.5164],
        [0.4357, 0.5643],
        [0.4301, 0.5699],
        [0.4716, 0.5284],
        [0.3719, 0.6281],
        [0.5004, 0.4996],
        [0.3352, 0.6648],
        [0.3904, 0.6096],
        [0.4155, 0.5845],
        [0.4577, 0.5423],
        [0.4037, 0.5963],
        [0.4668, 0.5332]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4523, 0.5477],
        [0.4341, 0.5659],
        [0.4186, 0.5814],
        [0.4337, 0.5663],
        [0.4233, 0.5767],
        [0.5550, 0.4450],
        [0.4153, 0.5847],
        [0.5627, 0.4373],
        [0.4320, 0.5680],
        [0.4058, 0.5942],
        [0.5177, 0.4823],
        [0.4990, 0.5010]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5251, 0.4749],
        [0.4692, 0.5308],
        [0.4372, 0.5628],
        [0.4609, 0.5391],
        [0.4059, 0.5941],
        [0.4565, 0.5435],
        [0.4940, 0.5060],
        [0.4589, 0.5411],
        [0.4252, 0.5748],
        [0.4036, 0.5964],
        [0.5013, 0.4987],
        [0.4902, 0.5098]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4751, 0.5249],
        [0.4618, 0.5382],
        [0.4274, 0.5726],
        [0.4649, 0.5351],
        [0.4894, 0.5106],
        [0.5176, 0.4824],
        [0.4001, 0.5999],
        [0.4398, 0.5602],
        [0.3517, 0.6483],
        [0.4885, 0.5115],
        [0.4683, 0.5317],
        [0.3721, 0.6279]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.5625, 0.4375],
        [0.4076, 0.5924],
        [0.4348, 0.5652],
        [0.4046, 0.5954],
        [0.4002, 0.5998],
        [0.3770, 0.6230],
        [0.3819, 0.6181],
        [0.4049, 0.5951],
        [0.4110, 0.5890],
        [0.3821, 0.6179],
        [0.3785, 0.6215],
        [0.5151, 0.4849]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3646, 0.6354],
        [0.5247, 0.4753],
        [0.4212, 0.5788],
        [0.4143, 0.5857],
        [0.3875, 0.6125],
        [0.5197, 0.4803],
        [0.3067, 0.6933],
        [0.3672, 0.6328],
        [0.3472, 0.6528],
        [0.3693, 0.6307],
        [0.4057, 0.5943],
        [0.5079, 0.4921]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3788, 0.6212],
        [0.4337, 0.5663],
        [0.3710, 0.6290],
        [0.3968, 0.6032],
        [0.6306, 0.3694],
        [0.5043, 0.4957],
        [0.4263, 0.5737],
        [0.4074, 0.5926],
        [0.4865, 0.5135],
        [0.4560, 0.5440],
        [0.5263, 0.4737],
        [0.4513, 0.5487]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3862, 0.6138],
        [0.4318, 0.5682],
        [0.4791, 0.5209],
        [0.5246, 0.4754],
        [0.4366, 0.5634],
        [0.5309, 0.4691],
        [0.4362, 0.5638],
        [0.4583, 0.5417],
        [0.4955, 0.5045],
        [0.6131, 0.3869],
        [0.4654, 0.5346],
        [0.4836, 0.5164]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4292, 0.5708],
        [0.3687, 0.6313],
        [0.4776, 0.5224],
        [0.3408, 0.6592],
        [0.4695, 0.5305],
        [0.5654, 0.4346],
        [0.4981, 0.5019],
        [0.4654, 0.5346],
        [0.4968, 0.5032],
        [0.4887, 0.5113],
        [0.3604, 0.6396],
        [0.4370, 0.5630]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4356, 0.5644],
        [0.4325, 0.5675],
        [0.3894, 0.6106],
        [0.4349, 0.5651],
        [0.5498, 0.4502],
        [0.4014, 0.5986],
        [0.3463, 0.6537],
        [0.3150, 0.6850],
        [0.3931, 0.6069],
        [0.5362, 0.4638],
        [0.3781, 0.6219],
        [0.4233, 0.5767]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4304, 0.5696],
        [0.4010, 0.5990],
        [0.4137, 0.5863],
        [0.3951, 0.6049],
        [0.3927, 0.6073],
        [0.4798, 0.5202],
        [0.3687, 0.6313],
        [0.4105, 0.5895],
        [0.4082, 0.5918],
        [0.4279, 0.5721],
        [0.4558, 0.5442],
        [0.4216, 0.5784]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3470, 0.6530],
        [0.4039, 0.5961],
        [0.3917, 0.6083],
        [0.5078, 0.4922],
        [0.5175, 0.4825],
        [0.4418, 0.5582],
        [0.3441, 0.6559],
        [0.3849, 0.6151],
        [0.4370, 0.5630],
        [0.3318, 0.6682],
        [0.3121, 0.6879],
        [0.4530, 0.5470]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4062, 0.5938],
        [0.3103, 0.6897],
        [0.3808, 0.6192],
        [0.3636, 0.6364],
        [0.4299, 0.5701],
        [0.4245, 0.5755],
        [0.3174, 0.6826],
        [0.5224, 0.4776],
        [0.4729, 0.5271],
        [0.3432, 0.6568],
        [0.4039, 0.5961],
        [0.4409, 0.5591]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4111, 0.5889],
        [0.4825, 0.5175],
        [0.3653, 0.6347],
        [0.3700, 0.6300],
        [0.4126, 0.5874],
        [0.5190, 0.4810],
        [0.3907, 0.6093],
        [0.3432, 0.6568],
        [0.3392, 0.6608],
        [0.3785, 0.6215],
        [0.3768, 0.6232],
        [0.3635, 0.6365]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.5260, 0.4740],
        [0.4738, 0.5262],
        [0.3553, 0.6447],
        [0.4818, 0.5182],
        [0.4031, 0.5969],
        [0.5351, 0.4649],
        [0.5437, 0.4563],
        [0.4513, 0.5487],
        [0.4602, 0.5398],
        [0.5252, 0.4748],
        [0.5380, 0.4620],
        [0.4473, 0.5527]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4748, 0.5252],
        [0.5233, 0.4767],
        [0.4478, 0.5522],
        [0.4651, 0.5349],
        [0.3725, 0.6275],
        [0.3861, 0.6139],
        [0.4471, 0.5529],
        [0.4679, 0.5321],
        [0.4482, 0.5518],
        [0.4838, 0.5162],
        [0.4198, 0.5802],
        [0.4599, 0.5401]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4569, 0.5431],
        [0.5141, 0.4859],
        [0.6132, 0.3868],
        [0.5723, 0.4277],
        [0.3469, 0.6531],
        [0.5849, 0.4151],
        [0.4276, 0.5724],
        [0.3768, 0.6232],
        [0.4385, 0.5615],
        [0.4827, 0.5173],
        [0.5399, 0.4601],
        [0.5830, 0.4170]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.5013, 0.4987],
        [0.4611, 0.5389],
        [0.4083, 0.5917],
        [0.3852, 0.6148],
        [0.5978, 0.4022],
        [0.4753, 0.5247],
        [0.4549, 0.5451],
        [0.5328, 0.4672],
        [0.5002, 0.4998],
        [0.4138, 0.5862],
        [0.4279, 0.5721],
        [0.4519, 0.5481]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4235, 0.5765],
        [0.5099, 0.4901],
        [0.4300, 0.5700],
        [0.3929, 0.6071],
        [0.4204, 0.5796],
        [0.5188, 0.4812],
        [0.4391, 0.5609],
        [0.4340, 0.5660],
        [0.5108, 0.4892],
        [0.5324, 0.4676],
        [0.3864, 0.6136],
        [0.5116, 0.4884]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3744, 0.6256],
        [0.4226, 0.5774],
        [0.4461, 0.5539],
        [0.3720, 0.6280],
        [0.5221, 0.4779],
        [0.5770, 0.4230],
        [0.4949, 0.5051],
        [0.4871, 0.5129],
        [0.4627, 0.5373],
        [0.4825, 0.5175],
        [0.3973, 0.6027],
        [0.3926, 0.6074]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3266, 0.6734],
        [0.3557, 0.6443],
        [0.3421, 0.6579],
        [0.3173, 0.6827],
        [0.4699, 0.5301],
        [0.3135, 0.6865],
        [0.2426, 0.7574],
        [0.3066, 0.6934],
        [0.3252, 0.6748],
        [0.4307, 0.5693],
        [0.3895, 0.6105],
        [0.3538, 0.6462]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4538, 0.5462],
        [0.3740, 0.6260],
        [0.5225, 0.4775],
        [0.4397, 0.5603],
        [0.4471, 0.5529],
        [0.3242, 0.6758],
        [0.3271, 0.6729],
        [0.3792, 0.6208],
        [0.4112, 0.5888],
        [0.5081, 0.4919],
        [0.3850, 0.6150],
        [0.3711, 0.6289]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4217, 0.5783],
        [0.4208, 0.5792],
        [0.4051, 0.5949],
        [0.4166, 0.5834],
        [0.4924, 0.5076],
        [0.4759, 0.5241],
        [0.3181, 0.6819],
        [0.3901, 0.6099],
        [0.4916, 0.5084],
        [0.4335, 0.5665],
        [0.3810, 0.6190],
        [0.4129, 0.5871]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4515, 0.5485],
        [0.3231, 0.6769],
        [0.4870, 0.5130],
        [0.3916, 0.6084],
        [0.4190, 0.5810],
        [0.4726, 0.5274],
        [0.3905, 0.6095],
        [0.5850, 0.4150],
        [0.4272, 0.5728],
        [0.3791, 0.6209],
        [0.4201, 0.5799],
        [0.4645, 0.5355]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4363, 0.5637],
        [0.4575, 0.5425],
        [0.3374, 0.6626],
        [0.4152, 0.5848],
        [0.4872, 0.5128],
        [0.5495, 0.4505],
        [0.4187, 0.5813],
        [0.3802, 0.6198],
        [0.4355, 0.5645],
        [0.4494, 0.5506],
        [0.4732, 0.5268],
        [0.3585, 0.6415]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4646, 0.5354],
        [0.3965, 0.6035],
        [0.4964, 0.5036],
        [0.3795, 0.6205],
        [0.4193, 0.5807],
        [0.4961, 0.5039],
        [0.4398, 0.5602],
        [0.4549, 0.5451],
        [0.5013, 0.4987],
        [0.4332, 0.5668],
        [0.4069, 0.5931],
        [0.3624, 0.6376]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4871, 0.5129],
        [0.3664, 0.6336],
        [0.4480, 0.5520],
        [0.3698, 0.6302],
        [0.3509, 0.6491],
        [0.3780, 0.6220],
        [0.3177, 0.6823],
        [0.4298, 0.5702],
        [0.4272, 0.5728],
        [0.3697, 0.6303],
        [0.3501, 0.6499],
        [0.4273, 0.5727]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4235, 0.5765],
        [0.4355, 0.5645],
        [0.4445, 0.5555],
        [0.4350, 0.5650],
        [0.3554, 0.6446],
        [0.4112, 0.5888],
        [0.3765, 0.6235],
        [0.3256, 0.6744],
        [0.3968, 0.6032],
        [0.4533, 0.5467],
        [0.4066, 0.5934],
        [0.4557, 0.5443]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4486, 0.5514],
        [0.2796, 0.7204],
        [0.3700, 0.6300],
        [0.3862, 0.6138],
        [0.5107, 0.4893],
        [0.4623, 0.5377],
        [0.3477, 0.6523],
        [0.3903, 0.6097],
        [0.4281, 0.5719],
        [0.3694, 0.6306],
        [0.3909, 0.6091],
        [0.3544, 0.6456]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3913, 0.6087],
        [0.3891, 0.6109],
        [0.3899, 0.6101],
        [0.4794, 0.5206],
        [0.3473, 0.6527],
        [0.4987, 0.5013],
        [0.3690, 0.6310],
        [0.3620, 0.6380],
        [0.3828, 0.6172],
        [0.4532, 0.5468],
        [0.3642, 0.6358],
        [0.4722, 0.5278]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3353, 0.6647],
        [0.3893, 0.6107],
        [0.5419, 0.4581],
        [0.3259, 0.6741],
        [0.5464, 0.4536],
        [0.6170, 0.3830],
        [0.4223, 0.5777],
        [0.4121, 0.5879],
        [0.5036, 0.4964],
        [0.4396, 0.5604],
        [0.3446, 0.6554],
        [0.4502, 0.5498]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3899, 0.6101],
        [0.4709, 0.5291],
        [0.4115, 0.5885],
        [0.3893, 0.6107],
        [0.5294, 0.4706],
        [0.3494, 0.6506],
        [0.3241, 0.6759],
        [0.4031, 0.5969],
        [0.4578, 0.5422],
        [0.5087, 0.4913],
        [0.4053, 0.5947],
        [0.4038, 0.5962]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4264, 0.5736],
        [0.4750, 0.5250],
        [0.5063, 0.4937],
        [0.4903, 0.5097],
        [0.4535, 0.5465],
        [0.4024, 0.5976],
        [0.3847, 0.6153],
        [0.3912, 0.6088],
        [0.4269, 0.5731],
        [0.3866, 0.6134],
        [0.4087, 0.5913],
        [0.4393, 0.5607]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4234, 0.5766],
        [0.3823, 0.6177],
        [0.3595, 0.6405],
        [0.5132, 0.4868],
        [0.4689, 0.5311],
        [0.3717, 0.6283],
        [0.2959, 0.7041],
        [0.3442, 0.6558],
        [0.3474, 0.6526],
        [0.4590, 0.5410],
        [0.3520, 0.6480],
        [0.4412, 0.5588]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4241, 0.5759],
        [0.4465, 0.5535],
        [0.3829, 0.6171],
        [0.4136, 0.5864],
        [0.4040, 0.5960],
        [0.4204, 0.5796],
        [0.2992, 0.7008],
        [0.5704, 0.4296],
        [0.4380, 0.5620],
        [0.3337, 0.6663],
        [0.4251, 0.5749],
        [0.4366, 0.5634]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5139, 0.4861],
        [0.5397, 0.4603],
        [0.4805, 0.5195],
        [0.4384, 0.5616],
        [0.4812, 0.5188],
        [0.4278, 0.5722],
        [0.4815, 0.5185],
        [0.4421, 0.5579],
        [0.4435, 0.5565],
        [0.4970, 0.5030],
        [0.4455, 0.5545],
        [0.3981, 0.6019]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4754, 0.5246],
        [0.3902, 0.6098],
        [0.4044, 0.5956],
        [0.4385, 0.5615],
        [0.4709, 0.5291],
        [0.5225, 0.4775],
        [0.3682, 0.6318],
        [0.4564, 0.5436],
        [0.4760, 0.5240],
        [0.4735, 0.5265],
        [0.4178, 0.5822],
        [0.3657, 0.6343]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4706, 0.5294],
        [0.3727, 0.6273],
        [0.4769, 0.5231],
        [0.4118, 0.5882],
        [0.4270, 0.5730],
        [0.4204, 0.5796],
        [0.3817, 0.6183],
        [0.4436, 0.5564],
        [0.3869, 0.6131],
        [0.3677, 0.6323],
        [0.4398, 0.5602],
        [0.4984, 0.5016]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3766, 0.6234],
        [0.4831, 0.5169],
        [0.4537, 0.5463],
        [0.4092, 0.5908],
        [0.3281, 0.6719],
        [0.4066, 0.5934],
        [0.3788, 0.6212],
        [0.3439, 0.6561],
        [0.3433, 0.6567],
        [0.4104, 0.5896],
        [0.4813, 0.5187],
        [0.5272, 0.4728]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4373, 0.5627],
        [0.3853, 0.6147],
        [0.3194, 0.6806],
        [0.3971, 0.6029],
        [0.5362, 0.4638],
        [0.4909, 0.5091],
        [0.4055, 0.5945],
        [0.4618, 0.5382],
        [0.5159, 0.4841],
        [0.3391, 0.6609],
        [0.4308, 0.5692],
        [0.3516, 0.6484]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3556, 0.6444],
        [0.5086, 0.4914],
        [0.4359, 0.5641],
        [0.5316, 0.4684],
        [0.3883, 0.6117],
        [0.4836, 0.5164],
        [0.3868, 0.6132],
        [0.4214, 0.5786],
        [0.4301, 0.5699],
        [0.5378, 0.4622],
        [0.4039, 0.5961],
        [0.4931, 0.5069]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.4206, 0.5794],
        [0.4024, 0.5976],
        [0.4899, 0.5101],
        [0.4307, 0.5693],
        [0.4402, 0.5598],
        [0.5818, 0.4182],
        [0.5281, 0.4719],
        [0.3592, 0.6408],
        [0.4762, 0.5238],
        [0.4048, 0.5952],
        [0.3773, 0.6227],
        [0.4114, 0.5886]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3202, 0.6798],
        [0.4800, 0.5200],
        [0.4113, 0.5887],
        [0.4713, 0.5287],
        [0.4984, 0.5016],
        [0.4173, 0.5827],
        [0.2648, 0.7352],
        [0.4694, 0.5306],
        [0.3243, 0.6757],
        [0.4344, 0.5656],
        [0.3316, 0.6684],
        [0.4751, 0.5249]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4649, 0.5351],
        [0.4257, 0.5743],
        [0.4568, 0.5432],
        [0.4182, 0.5818],
        [0.4223, 0.5777],
        [0.4067, 0.5933],
        [0.3868, 0.6132],
        [0.3608, 0.6392],
        [0.5022, 0.4978],
        [0.5232, 0.4768],
        [0.4388, 0.5612],
        [0.5012, 0.4988]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3940, 0.6060],
        [0.4141, 0.5859],
        [0.4133, 0.5867],
        [0.4364, 0.5636],
        [0.4891, 0.5109],
        [0.4760, 0.5240],
        [0.2883, 0.7117],
        [0.4305, 0.5695],
        [0.4355, 0.5645],
        [0.3980, 0.6020],
        [0.3194, 0.6806],
        [0.3972, 0.6028]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4568, 0.5432],
        [0.3715, 0.6285],
        [0.4478, 0.5522],
        [0.3980, 0.6020],
        [0.4452, 0.5548],
        [0.4803, 0.5197],
        [0.3545, 0.6455],
        [0.5743, 0.4257],
        [0.5736, 0.4264],
        [0.4249, 0.5751],
        [0.4443, 0.5557],
        [0.5241, 0.4759]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4506, 0.5494],
        [0.4578, 0.5422],
        [0.4102, 0.5898],
        [0.3653, 0.6347],
        [0.4121, 0.5879],
        [0.4483, 0.5517],
        [0.5098, 0.4902],
        [0.3609, 0.6391],
        [0.4001, 0.5999],
        [0.3477, 0.6523],
        [0.4098, 0.5902],
        [0.3610, 0.6390]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4066, 0.5934],
        [0.4215, 0.5785],
        [0.4309, 0.5691],
        [0.3458, 0.6542],
        [0.3890, 0.6110],
        [0.4538, 0.5462],
        [0.4120, 0.5880],
        [0.3769, 0.6231],
        [0.4056, 0.5944],
        [0.4943, 0.5057],
        [0.4700, 0.5300],
        [0.3296, 0.6704]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4461, 0.5539],
        [0.4080, 0.5920],
        [0.3592, 0.6408],
        [0.3530, 0.6470],
        [0.3501, 0.6499],
        [0.3903, 0.6097],
        [0.3754, 0.6246],
        [0.3827, 0.6173],
        [0.4386, 0.5614],
        [0.4079, 0.5921],
        [0.4209, 0.5791],
        [0.5015, 0.4985]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3842, 0.6158],
        [0.4668, 0.5332],
        [0.4656, 0.5344],
        [0.4580, 0.5420],
        [0.3477, 0.6523],
        [0.5431, 0.4569],
        [0.4126, 0.5874],
        [0.3800, 0.6200],
        [0.3994, 0.6006],
        [0.4617, 0.5383],
        [0.4667, 0.5333],
        [0.4644, 0.5355]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4178, 0.5822],
        [0.3543, 0.6457],
        [0.3375, 0.6625],
        [0.3850, 0.6150],
        [0.5740, 0.4260],
        [0.3833, 0.6167],
        [0.3887, 0.6113],
        [0.4022, 0.5978],
        [0.5029, 0.4971],
        [0.3446, 0.6554],
        [0.3634, 0.6366],
        [0.3948, 0.6052]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.4144, 0.5856],
        [0.4633, 0.5367],
        [0.4094, 0.5906],
        [0.5055, 0.4945],
        [0.4178, 0.5822],
        [0.4785, 0.5215],
        [0.4350, 0.5650],
        [0.3433, 0.6567],
        [0.3987, 0.6013],
        [0.5833, 0.4167],
        [0.3786, 0.6214],
        [0.4034, 0.5966]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3920, 0.6080],
        [0.3076, 0.6924],
        [0.4626, 0.5374],
        [0.2869, 0.7131],
        [0.4622, 0.5378],
        [0.5969, 0.4031],
        [0.3792, 0.6208],
        [0.3590, 0.6410],
        [0.3761, 0.6239],
        [0.4462, 0.5538],
        [0.3516, 0.6484],
        [0.3496, 0.6504]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4284, 0.5716],
        [0.3614, 0.6386],
        [0.4372, 0.5628],
        [0.4167, 0.5833],
        [0.4198, 0.5802],
        [0.3523, 0.6477],
        [0.2872, 0.7128],
        [0.3326, 0.6674],
        [0.3697, 0.6303],
        [0.4442, 0.5558],
        [0.3652, 0.6348],
        [0.4154, 0.5846]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4239, 0.5761],
        [0.4039, 0.5961],
        [0.4389, 0.5611],
        [0.4572, 0.5428],
        [0.3516, 0.6484],
        [0.3724, 0.6276],
        [0.3652, 0.6348],
        [0.2951, 0.7049],
        [0.3512, 0.6488],
        [0.5217, 0.4783],
        [0.3823, 0.6177],
        [0.4897, 0.5103]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4392, 0.5608],
        [0.3923, 0.6077],
        [0.3812, 0.6188],
        [0.5025, 0.4975],
        [0.4668, 0.5332],
        [0.4010, 0.5990],
        [0.2549, 0.7451],
        [0.3892, 0.6108],
        [0.4010, 0.5990],
        [0.4093, 0.5907],
        [0.4236, 0.5764],
        [0.3971, 0.6029]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3660, 0.6340],
        [0.4049, 0.5951],
        [0.3435, 0.6565],
        [0.3958, 0.6042],
        [0.4431, 0.5569],
        [0.4527, 0.5473],
        [0.3503, 0.6497],
        [0.4570, 0.5430],
        [0.4903, 0.5097],
        [0.3230, 0.6770],
        [0.4580, 0.5420],
        [0.4071, 0.5929]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3880, 0.6120],
        [0.4310, 0.5690],
        [0.4307, 0.5693],
        [0.4534, 0.5466],
        [0.3915, 0.6085],
        [0.4490, 0.5510],
        [0.2869, 0.7131],
        [0.3403, 0.6597],
        [0.3390, 0.6610],
        [0.4218, 0.5782],
        [0.4172, 0.5828],
        [0.3333, 0.6667]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4319, 0.5681],
        [0.3769, 0.6231],
        [0.4185, 0.5815],
        [0.3915, 0.6085],
        [0.4103, 0.5897],
        [0.4749, 0.5251],
        [0.4414, 0.5586],
        [0.4126, 0.5874],
        [0.3518, 0.6482],
        [0.3746, 0.6254],
        [0.3643, 0.6357],
        [0.3538, 0.6462]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4275, 0.5725],
        [0.3764, 0.6236],
        [0.3343, 0.6657],
        [0.3578, 0.6422],
        [0.3227, 0.6773],
        [0.4494, 0.5506],
        [0.3239, 0.6761],
        [0.3398, 0.6602],
        [0.4156, 0.5844],
        [0.3790, 0.6210],
        [0.3930, 0.6070],
        [0.4690, 0.5310]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3646, 0.6354],
        [0.4536, 0.5464],
        [0.5746, 0.4254],
        [0.3504, 0.6496],
        [0.3531, 0.6469],
        [0.3849, 0.6151],
        [0.3109, 0.6891],
        [0.2598, 0.7402],
        [0.3392, 0.6608],
        [0.5119, 0.4881],
        [0.4422, 0.5578],
        [0.4295, 0.5705]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4234, 0.5766],
        [0.3398, 0.6602],
        [0.3011, 0.6989],
        [0.4476, 0.5524],
        [0.5091, 0.4909],
        [0.3808, 0.6192],
        [0.3498, 0.6502],
        [0.5100, 0.4900],
        [0.4595, 0.5405],
        [0.3757, 0.6243],
        [0.3576, 0.6424],
        [0.2943, 0.7057]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3547, 0.6453],
        [0.3530, 0.6470],
        [0.4169, 0.5831],
        [0.4411, 0.5589],
        [0.3749, 0.6251],
        [0.4307, 0.5693],
        [0.3785, 0.6215],
        [0.3081, 0.6919],
        [0.3975, 0.6025],
        [0.5042, 0.4958],
        [0.2849, 0.7151],
        [0.4712, 0.5288]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.2222, 0.7778],
        [0.3924, 0.6076],
        [0.4235, 0.5765],
        [0.2737, 0.7263],
        [0.4282, 0.5718],
        [0.4534, 0.5466],
        [0.3317, 0.6683],
        [0.4148, 0.5852],
        [0.4028, 0.5972],
        [0.4632, 0.5368],
        [0.2927, 0.7073],
        [0.3560, 0.6440]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3608, 0.6392],
        [0.4469, 0.5531],
        [0.3532, 0.6468],
        [0.4144, 0.5856],
        [0.3817, 0.6183],
        [0.3574, 0.6426],
        [0.2891, 0.7109],
        [0.3355, 0.6645],
        [0.3668, 0.6332],
        [0.3903, 0.6097],
        [0.3593, 0.6407],
        [0.3334, 0.6666]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4735, 0.5265],
        [0.2903, 0.7097],
        [0.4351, 0.5649],
        [0.4632, 0.5368],
        [0.4421, 0.5579],
        [0.3400, 0.6600],
        [0.3309, 0.6691],
        [0.3286, 0.6714],
        [0.3708, 0.6292],
        [0.4321, 0.5679],
        [0.3639, 0.6361],
        [0.3973, 0.6027]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4608, 0.5392],
        [0.3094, 0.6906],
        [0.4261, 0.5739],
        [0.3703, 0.6297],
        [0.3644, 0.6356],
        [0.4570, 0.5430],
        [0.2452, 0.7548],
        [0.3708, 0.6292],
        [0.3429, 0.6571],
        [0.4267, 0.5733],
        [0.3844, 0.6156],
        [0.4519, 0.5481]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3501, 0.6499],
        [0.3676, 0.6324],
        [0.3643, 0.6357],
        [0.3684, 0.6316],
        [0.4234, 0.5766],
        [0.4643, 0.5357],
        [0.3315, 0.6685],
        [0.4864, 0.5136],
        [0.4217, 0.5783],
        [0.3425, 0.6575],
        [0.3420, 0.6580],
        [0.5071, 0.4929]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.3566, 0.6434],
        [0.3772, 0.6228],
        [0.5058, 0.4942],
        [0.4376, 0.5624],
        [0.3969, 0.6031],
        [0.4571, 0.5429],
        [0.3553, 0.6447],
        [0.3241, 0.6759],
        [0.3270, 0.6730],
        [0.3735, 0.6265],
        [0.4028, 0.5972],
        [0.2946, 0.7054]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3865, 0.6135],
        [0.4073, 0.5927],
        [0.3652, 0.6348],
        [0.4112, 0.5888],
        [0.5433, 0.4567],
        [0.4851, 0.5149],
        [0.4085, 0.5915],
        [0.4772, 0.5228],
        [0.3922, 0.6078],
        [0.4824, 0.5176],
        [0.2893, 0.7107],
        [0.3311, 0.6689]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3743, 0.6257],
        [0.3807, 0.6193],
        [0.3045, 0.6955],
        [0.3561, 0.6439],
        [0.3007, 0.6993],
        [0.3898, 0.6102],
        [0.2774, 0.7226],
        [0.3451, 0.6549],
        [0.3382, 0.6618],
        [0.2590, 0.7410],
        [0.2797, 0.7203],
        [0.4299, 0.5701]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3783, 0.6217],
        [0.4747, 0.5253],
        [0.4953, 0.5047],
        [0.3247, 0.6753],
        [0.2884, 0.7116],
        [0.4583, 0.5417],
        [0.3462, 0.6538],
        [0.3684, 0.6316],
        [0.2479, 0.7521],
        [0.4419, 0.5581],
        [0.4058, 0.5942],
        [0.4859, 0.5141]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4089, 0.5911],
        [0.4254, 0.5746],
        [0.2828, 0.7172],
        [0.3793, 0.6207],
        [0.4709, 0.5291],
        [0.5189, 0.4811],
        [0.3623, 0.6377],
        [0.4675, 0.5325],
        [0.4153, 0.5847],
        [0.3345, 0.6655],
        [0.3558, 0.6442],
        [0.3869, 0.6131]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3472, 0.6528],
        [0.4571, 0.5429],
        [0.4721, 0.5279],
        [0.4253, 0.5747],
        [0.3776, 0.6224],
        [0.4167, 0.5833],
        [0.3219, 0.6781],
        [0.3484, 0.6516],
        [0.3757, 0.6243],
        [0.5987, 0.4013],
        [0.3242, 0.6758],
        [0.4484, 0.5516]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.2533, 0.7467],
        [0.3700, 0.6300],
        [0.3474, 0.6526],
        [0.3080, 0.6920],
        [0.5052, 0.4948],
        [0.5930, 0.4070],
        [0.4262, 0.5738],
        [0.4064, 0.5936],
        [0.3419, 0.6581],
        [0.4539, 0.5461],
        [0.3280, 0.6720],
        [0.4383, 0.5617]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4145, 0.5855],
        [0.4456, 0.5544],
        [0.3110, 0.6890],
        [0.3423, 0.6577],
        [0.4683, 0.5317],
        [0.2848, 0.7152],
        [0.2956, 0.7044],
        [0.4196, 0.5804],
        [0.3503, 0.6497],
        [0.4714, 0.5286],
        [0.3904, 0.6096],
        [0.4940, 0.5060]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.5219, 0.4781],
        [0.4040, 0.5960],
        [0.4328, 0.5672],
        [0.4562, 0.5438],
        [0.4026, 0.5974],
        [0.4082, 0.5918],
        [0.3356, 0.6644],
        [0.3501, 0.6499],
        [0.3556, 0.6444],
        [0.4029, 0.5971],
        [0.3823, 0.6177],
        [0.3933, 0.6067]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3639, 0.6361],
        [0.3727, 0.6273],
        [0.3092, 0.6908],
        [0.3923, 0.6077],
        [0.5652, 0.4348],
        [0.4901, 0.5099],
        [0.2935, 0.7065],
        [0.3023, 0.6977],
        [0.3846, 0.6154],
        [0.4445, 0.5555],
        [0.2863, 0.7137],
        [0.4107, 0.5893]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3977, 0.6023],
        [0.4473, 0.5527],
        [0.4640, 0.5360],
        [0.3693, 0.6307],
        [0.4621, 0.5379],
        [0.4510, 0.5490],
        [0.3789, 0.6211],
        [0.4485, 0.5515],
        [0.5046, 0.4954],
        [0.3510, 0.6490],
        [0.4220, 0.5780],
        [0.4662, 0.5338]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4893, 0.5107],
        [0.5206, 0.4794],
        [0.3830, 0.6170],
        [0.3637, 0.6363],
        [0.4243, 0.5757],
        [0.4555, 0.5445],
        [0.4012, 0.5988],
        [0.4310, 0.5690],
        [0.4060, 0.5940],
        [0.4401, 0.5599],
        [0.3661, 0.6339],
        [0.2996, 0.7004]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4019, 0.5981],
        [0.3593, 0.6407],
        [0.4641, 0.5359],
        [0.4176, 0.5824],
        [0.4537, 0.5463],
        [0.4882, 0.5118],
        [0.3518, 0.6482],
        [0.4100, 0.5900],
        [0.3782, 0.6218],
        [0.5348, 0.4652],
        [0.3990, 0.6010],
        [0.3078, 0.6922]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.3948, 0.6052],
        [0.4803, 0.5197],
        [0.3771, 0.6229],
        [0.4260, 0.5740],
        [0.3559, 0.6441],
        [0.4396, 0.5604],
        [0.3816, 0.6184],
        [0.3805, 0.6195],
        [0.4148, 0.5852],
        [0.3827, 0.6173],
        [0.3264, 0.6736],
        [0.4652, 0.5348]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4065, 0.5935],
        [0.4745, 0.5255],
        [0.4571, 0.5429],
        [0.4945, 0.5055],
        [0.4086, 0.5914],
        [0.5038, 0.4962],
        [0.3321, 0.6679],
        [0.3933, 0.6067],
        [0.3878, 0.6122],
        [0.4907, 0.5093],
        [0.4793, 0.5207],
        [0.5076, 0.4924]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.4157, 0.5843],
        [0.3841, 0.6159],
        [0.3533, 0.6467],
        [0.4482, 0.5518],
        [0.5527, 0.4473],
        [0.3951, 0.6049],
        [0.4287, 0.5713],
        [0.4892, 0.5108],
        [0.4463, 0.5537],
        [0.4360, 0.5640],
        [0.4009, 0.5991],
        [0.2783, 0.7217]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3197, 0.6803],
        [0.3625, 0.6375],
        [0.3903, 0.6097],
        [0.3446, 0.6554],
        [0.5021, 0.4979],
        [0.4871, 0.5129],
        [0.4771, 0.5229],
        [0.3922, 0.6078],
        [0.4250, 0.5750],
        [0.5629, 0.4371],
        [0.3414, 0.6586],
        [0.4437, 0.5563]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.2956, 0.7044],
        [0.3831, 0.6169],
        [0.5226, 0.4774],
        [0.3127, 0.6873],
        [0.4635, 0.5365],
        [0.6156, 0.3844],
        [0.4300, 0.5700],
        [0.3938, 0.6062],
        [0.4725, 0.5275],
        [0.4368, 0.5632],
        [0.3026, 0.6974],
        [0.4687, 0.5313]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4166, 0.5834],
        [0.3712, 0.6288],
        [0.3432, 0.6568],
        [0.3424, 0.6576],
        [0.4070, 0.5930],
        [0.2642, 0.7358],
        [0.3184, 0.6816],
        [0.3959, 0.6041],
        [0.3141, 0.6859],
        [0.4388, 0.5612],
        [0.3983, 0.6017],
        [0.4552, 0.5448]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3497, 0.6503],
        [0.3352, 0.6648],
        [0.4406, 0.5594],
        [0.4201, 0.5799],
        [0.3506, 0.6494],
        [0.3850, 0.6150],
        [0.3034, 0.6966],
        [0.2864, 0.7136],
        [0.3507, 0.6493],
        [0.3716, 0.6284],
        [0.3848, 0.6152],
        [0.3568, 0.6432]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3615, 0.6385],
        [0.3161, 0.6839],
        [0.3277, 0.6723],
        [0.5195, 0.4805],
        [0.3690, 0.6310],
        [0.3804, 0.6196],
        [0.2452, 0.7548],
        [0.3357, 0.6643],
        [0.4192, 0.5808],
        [0.4254, 0.5746],
        [0.3213, 0.6787],
        [0.4075, 0.5925]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.4201, 0.5799],
        [0.3873, 0.6127],
        [0.3201, 0.6799],
        [0.4514, 0.5486],
        [0.3894, 0.6106],
        [0.4996, 0.5004],
        [0.3043, 0.6957],
        [0.4832, 0.5168],
        [0.5372, 0.4628],
        [0.3654, 0.6346],
        [0.4043, 0.5957],
        [0.4646, 0.5354]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5169, 0.4831],
        [0.4134, 0.5866],
        [0.3934, 0.6066],
        [0.3843, 0.6157],
        [0.3896, 0.6104],
        [0.5211, 0.4789],
        [0.3527, 0.6473],
        [0.3479, 0.6521],
        [0.4162, 0.5838],
        [0.5195, 0.4805],
        [0.3367, 0.6633],
        [0.3632, 0.6368]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4744, 0.5256],
        [0.4034, 0.5966],
        [0.5016, 0.4984],
        [0.3885, 0.6115],
        [0.3226, 0.6774],
        [0.4632, 0.5368],
        [0.3062, 0.6938],
        [0.4438, 0.5562],
        [0.4041, 0.5959],
        [0.4626, 0.5374],
        [0.3403, 0.6597],
        [0.3119, 0.6881]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4722, 0.5278],
        [0.4102, 0.5898],
        [0.3650, 0.6350],
        [0.3530, 0.6470],
        [0.2933, 0.7067],
        [0.4308, 0.5692],
        [0.3208, 0.6792],
        [0.3365, 0.6635],
        [0.2997, 0.7003],
        [0.3935, 0.6065],
        [0.3727, 0.6273],
        [0.4333, 0.5667]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.3617, 0.6383],
        [0.4556, 0.5444],
        [0.3975, 0.6025],
        [0.3778, 0.6222],
        [0.3350, 0.6650],
        [0.4778, 0.5222],
        [0.3838, 0.6162],
        [0.2990, 0.7010],
        [0.3222, 0.6778],
        [0.4606, 0.5394],
        [0.4169, 0.5831],
        [0.3753, 0.6247]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.2993, 0.7007],
        [0.3692, 0.6308],
        [0.3249, 0.6751],
        [0.3943, 0.6057],
        [0.5119, 0.4881],
        [0.4159, 0.5841],
        [0.3776, 0.6224],
        [0.3888, 0.6112],
        [0.2723, 0.7277],
        [0.3585, 0.6415],
        [0.3793, 0.6207],
        [0.3965, 0.6035]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3307, 0.6693],
        [0.3340, 0.6660],
        [0.3607, 0.6393],
        [0.4212, 0.5788],
        [0.3913, 0.6087],
        [0.5571, 0.4429],
        [0.4858, 0.5142],
        [0.3902, 0.6098],
        [0.4350, 0.5650],
        [0.4070, 0.5930],
        [0.2849, 0.7151],
        [0.4934, 0.5066]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3460, 0.6540],
        [0.3894, 0.6106],
        [0.4860, 0.5140],
        [0.3929, 0.6071],
        [0.4359, 0.5641],
        [0.5722, 0.4278],
        [0.4029, 0.5971],
        [0.4564, 0.5436],
        [0.5228, 0.4772],
        [0.4158, 0.5842],
        [0.3597, 0.6403],
        [0.3974, 0.6026]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.4203, 0.5797],
        [0.4725, 0.5275],
        [0.3653, 0.6347],
        [0.4429, 0.5571],
        [0.3852, 0.6148],
        [0.3311, 0.6689],
        [0.3025, 0.6975],
        [0.3332, 0.6668],
        [0.3179, 0.6821],
        [0.5168, 0.4832],
        [0.3639, 0.6361],
        [0.4542, 0.5458]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.4885, 0.5115],
        [0.3381, 0.6619],
        [0.4752, 0.5248],
        [0.3655, 0.6345],
        [0.3979, 0.6021],
        [0.3667, 0.6333],
        [0.3094, 0.6906],
        [0.3889, 0.6111],
        [0.4217, 0.5783],
        [0.4444, 0.5556],
        [0.3923, 0.6077],
        [0.3562, 0.6438]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.4160, 0.5840],
        [0.2836, 0.7164],
        [0.3774, 0.6226],
        [0.4072, 0.5928],
        [0.4058, 0.5942],
        [0.4345, 0.5655],
        [0.2920, 0.7080],
        [0.3347, 0.6653],
        [0.3617, 0.6383],
        [0.4287, 0.5713],
        [0.2866, 0.7134],
        [0.4025, 0.5975]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3916, 0.6084],
        [0.4032, 0.5968],
        [0.3666, 0.6334],
        [0.4157, 0.5843],
        [0.4360, 0.5640],
        [0.3884, 0.6116],
        [0.4115, 0.5885],
        [0.4999, 0.5001],
        [0.4663, 0.5337],
        [0.4197, 0.5803],
        [0.3825, 0.6175],
        [0.4443, 0.5557]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.4050, 0.5950],
        [0.3442, 0.6558],
        [0.3885, 0.6115],
        [0.4237, 0.5763],
        [0.5093, 0.4907],
        [0.5066, 0.4934],
        [0.4541, 0.5459],
        [0.3122, 0.6878],
        [0.3558, 0.6442],
        [0.3889, 0.6111],
        [0.4037, 0.5963],
        [0.3963, 0.6037]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.3557, 0.6443],
        [0.3886, 0.6114],
        [0.4235, 0.5765],
        [0.3207, 0.6793],
        [0.4939, 0.5061],
        [0.4464, 0.5536],
        [0.3970, 0.6030],
        [0.4837, 0.5163],
        [0.4273, 0.5727],
        [0.3973, 0.6027],
        [0.3533, 0.6467],
        [0.3360, 0.6640]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.4687, 0.5313],
        [0.4176, 0.5824],
        [0.3384, 0.6616],
        [0.3612, 0.6388],
        [0.3271, 0.6729],
        [0.3522, 0.6478],
        [0.3506, 0.6494],
        [0.3620, 0.6380],
        [0.2954, 0.7046],
        [0.2768, 0.7232],
        [0.3059, 0.6941],
        [0.3011, 0.6989]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.4063, 0.5937],
        [0.4058, 0.5942],
        [0.4631, 0.5369],
        [0.4121, 0.5879],
        [0.2910, 0.7090],
        [0.4304, 0.5696],
        [0.2941, 0.7059],
        [0.2253, 0.7747],
        [0.2446, 0.7554],
        [0.4568, 0.5432],
        [0.3636, 0.6364],
        [0.3548, 0.6452]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #50: tensor([[0.3184, 0.6816],
        [0.4648, 0.5352],
        [0.2510, 0.7490],
        [0.4911, 0.5089],
        [0.4701, 0.5299],
        [0.4148, 0.5852],
        [0.3479, 0.6521],
        [0.3948, 0.6052],
        [0.4514, 0.5486],
        [0.3694, 0.6306],
        [0.3819, 0.6181],
        [0.3225, 0.6775]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #100: tensor([[0.3562, 0.6438],
        [0.4386, 0.5614],
        [0.3006, 0.6994],
        [0.4151, 0.5849],
        [0.4015, 0.5985],
        [0.4776, 0.5224],
        [0.3593, 0.6407],
        [0.3569, 0.6431],
        [0.4642, 0.5358],
        [0.5553, 0.4447],
        [0.2920, 0.7080],
        [0.3595, 0.6405]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #150: tensor([[0.3125, 0.6875],
        [0.3662, 0.6338],
        [0.3905, 0.6095],
        [0.3212, 0.6788],
        [0.4067, 0.5933],
        [0.6189, 0.3811],
        [0.3575, 0.6425],
        [0.3759, 0.6241],
        [0.4430, 0.5570],
        [0.4543, 0.5457],
        [0.2856, 0.7144],
        [0.4152, 0.5848]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #200: tensor([[0.3358, 0.6642],
        [0.4422, 0.5578],
        [0.3947, 0.6053],
        [0.3325, 0.6675],
        [0.4228, 0.5772],
        [0.2797, 0.7203],
        [0.2735, 0.7265],
        [0.2779, 0.7221],
        [0.2797, 0.7203],
        [0.3450, 0.6550],
        [0.3041, 0.6959],
        [0.3986, 0.6014]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #250: tensor([[0.3419, 0.6581],
        [0.2852, 0.7148],
        [0.4550, 0.5450],
        [0.3290, 0.6710],
        [0.4074, 0.5926],
        [0.3086, 0.6914],
        [0.2947, 0.7053],
        [0.2934, 0.7066],
        [0.3092, 0.6908],
        [0.5490, 0.4510],
        [0.3105, 0.6895],
        [0.5011, 0.4989]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #300: tensor([[0.3488, 0.6512],
        [0.3425, 0.6575],
        [0.3604, 0.6396],
        [0.3728, 0.6272],
        [0.3570, 0.6430],
        [0.3553, 0.6447],
        [0.2923, 0.7077],
        [0.3678, 0.6322],
        [0.3383, 0.6617],
        [0.2936, 0.7064],
        [0.3572, 0.6428],
        [0.3340, 0.6660]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #350: tensor([[0.3848, 0.6152],
        [0.3676, 0.6324],
        [0.4669, 0.5331],
        [0.3289, 0.6711],
        [0.3949, 0.6051],
        [0.3590, 0.6410],
        [0.4023, 0.5977],
        [0.4213, 0.5787],
        [0.4490, 0.5510],
        [0.3457, 0.6543],
        [0.3206, 0.6794],
        [0.4267, 0.5733]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #400: tensor([[0.5219, 0.4781],
        [0.4216, 0.5784],
        [0.3133, 0.6867],
        [0.2858, 0.7142],
        [0.3082, 0.6918],
        [0.4428, 0.5572],
        [0.3314, 0.6686],
        [0.2943, 0.7057],
        [0.4554, 0.5446],
        [0.4162, 0.5838],
        [0.3878, 0.6122],
        [0.2727, 0.7273]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #450: tensor([[0.4095, 0.5905],
        [0.2875, 0.7125],
        [0.5647, 0.4353],
        [0.3694, 0.6306],
        [0.4391, 0.5609],
        [0.3765, 0.6235],
        [0.3751, 0.6249],
        [0.4642, 0.5358],
        [0.4236, 0.5764],
        [0.4089, 0.5911],
        [0.3904, 0.6096],
        [0.2859, 0.7141]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #500: tensor([[0.5034, 0.4966],
        [0.4085, 0.5915],
        [0.2962, 0.7038],
        [0.2694, 0.7306],
        [0.3193, 0.6807],
        [0.4357, 0.5643],
        [0.2982, 0.7018],
        [0.2995, 0.7005],
        [0.3219, 0.6781],
        [0.3213, 0.6787],
        [0.3205, 0.6795],
        [0.4387, 0.5613]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Iter #550: tensor([[0.2966, 0.7034],
        [0.4521, 0.5479],
        [0.4673, 0.5327],
        [0.3920, 0.6080],
        [0.2671, 0.7329],
        [0.4049, 0.5951],
        [0.3911, 0.6089],
        [0.3160, 0.6840],
        [0.2802, 0.7198],
        [0.4479, 0.5521],
        [0.3889, 0.6111],
        [0.3332, 0.6668]], device='cuda:0', grad_fn=<SoftmaxBackward>)
