handle onnx nll with default ignore index (pytorch#44816)

liqunfu · facebook-github-bot · commit c3bf402cbbd5 · 2020-09-27T23:26:19.000-07:00
Summary: in ONNX NegativeLogLikelihoodLoss specification, ignore_index is optional without default value. therefore, when convert nll op to ONNX, we need to set ignore_index attribute even if it is not specified (e.g. ignore_index=-100). Pull Request resolved: pytorch#44816 Reviewed By: ezyang Differential Revision: D23880354 Pulled By: bzinodev fbshipit-source-id: d0bdd58d0a4507ed9ce37133e68533fe6d1bdf2b
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy.expect
@@ -8,6 +8,11 @@ graph {
     output: "2"
     name: "SoftmaxCrossEntropyLoss_0"
     op_type: "SoftmaxCrossEntropyLoss"
+    attribute {
+      name: "ignore_index"
+      i: -100
+      type: INT
+    }
     attribute {
       name: "reduction"
       s: "mean"
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_3d.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy_3d.expect
@@ -8,6 +8,11 @@ graph {
     output: "2"
     name: "SoftmaxCrossEntropyLoss_0"
     op_type: "SoftmaxCrossEntropyLoss"
+    attribute {
+      name: "ignore_index"
+      i: -100
+      type: INT
+    }
     attribute {
       name: "reduction"
       s: "mean"
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_3d_none.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy_3d_none.expect
@@ -8,6 +8,11 @@ graph {
     output: "2"
     name: "SoftmaxCrossEntropyLoss_0"
     op_type: "SoftmaxCrossEntropyLoss"
+    attribute {
+      name: "ignore_index"
+      i: -100
+      type: INT
+    }
     attribute {
       name: "reduction"
       s: "none"
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_4d.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy_4d.expect
@@ -8,6 +8,11 @@ graph {
     output: "2"
     name: "SoftmaxCrossEntropyLoss_0"
     op_type: "SoftmaxCrossEntropyLoss"
+    attribute {
+      name: "ignore_index"
+      i: -100
+      type: INT
+    }
     attribute {
       name: "reduction"
       s: "mean"
diff --git a/test/onnx/expect/TestOperators.test_softmaxcrossentropy_weights.expect b/test/onnx/expect/TestOperators.test_softmaxcrossentropy_weights.expect
@@ -9,6 +9,11 @@ graph {
     output: "3"
     name: "SoftmaxCrossEntropyLoss_0"
     op_type: "SoftmaxCrossEntropyLoss"
+    attribute {
+      name: "ignore_index"
+      i: -100
+      type: INT
+    }
     attribute {
       name: "reduction"
       s: "mean"
diff --git a/test/onnx/test_pytorch_onnx_onnxruntime.py b/test/onnx/test_pytorch_onnx_onnxruntime.py
@@ -3742,138 +3742,102 @@ def forward(self, *tensor_list):
     @skipIfUnsupportedMinOpsetVersion(12)
     @disableScriptTest()
     def test_crossentropyloss(self):
-        x = torch.randn(3, 5)
-        y = torch.empty(3, dtype=torch.long).random_(5)
-        self._crossentropyloss(x, y)
+        for ignore_index in [-100, 1]:
+            x = torch.randn(3, 5)
+            y = torch.empty(3, dtype=torch.long).random_(5)
+            y[y == 1] = ignore_index
 
-        x = torch.randn(3, 5, 2)
-        y = torch.empty(3, 2, dtype=torch.long).random_(5)
-        self._crossentropyloss(x, y)
+            self._crossentropyloss(x, y, ignore_index)
 
-        x = torch.randn(3, 5, 2, 7)
-        y = torch.empty(3, 2, 7, dtype=torch.long).random_(5)
-        self._crossentropyloss(x, y)
+            x = torch.randn(3, 5, 2)
+            y = torch.empty(3, 2, dtype=torch.long).random_(5)
+            y[y == 1] = ignore_index
+            self._crossentropyloss(x, y, ignore_index)
 
-    def _crossentropyloss(self, x, y):
+            x = torch.randn(3, 5, 2, 7)
+            y = torch.empty(3, 2, 7, dtype=torch.long).random_(5)
+            y[y == 1] = ignore_index
+            self._crossentropyloss(x, y, ignore_index)
+
+    def _crossentropyloss(self, x, y, ignore_index):
         class CrossEntropyLossNone(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, ignore_index):
                 super(CrossEntropyLossNone, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='none')
+                if ignore_index == -100:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='none')
+                else:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='none', ignore_index=ignore_index)
 
             def forward(self, input, target):
                 return self.loss(input, target)
 
-        self.run_test(CrossEntropyLossNone(), input=(x, y))
+        self.run_test(CrossEntropyLossNone(ignore_index), input=(x, y))
 
         class CrossEntropyLossNoneWeight(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, ignore_index):
                 super(CrossEntropyLossNoneWeight, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='none', weight=torch.randn(5))
+                if ignore_index == -100:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='none', weight=torch.randn(5))
+                else:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='none', weight=torch.randn(5), ignore_index=ignore_index)
 
             def forward(self, input, target):
                 return self.loss(input, target)
 
-        self.run_test(CrossEntropyLossNoneWeight(), input=(x, y))
+        self.run_test(CrossEntropyLossNoneWeight(ignore_index), input=(x, y))
 
         class CrossEntropyLossSum(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, ignore_index):
                 super(CrossEntropyLossSum, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='sum')
+                if ignore_index == -100:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='sum')
+                else:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='sum', ignore_index=ignore_index)
 
             def forward(self, input, target):
                 return self.loss(input, target)
 
-        self.run_test(CrossEntropyLossSum(), input=(x, y))
+        self.run_test(CrossEntropyLossSum(ignore_index), input=(x, y))
 
         class CrossEntropyLossSumWeight(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, ignore_index):
                 super(CrossEntropyLossSumWeight, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='sum', weight=torch.randn(5))
+                if ignore_index == -100:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='sum', weight=torch.randn(5))
+                else:
+                    self.loss = torch.nn.CrossEntropyLoss(reduction='sum', weight=torch.randn(5), ignore_index=ignore_index)
 
             def forward(self, input, target):
                 return self.loss(input, target)
 
-        self.run_test(CrossEntropyLossSumWeight(), input=(x, y))
+        self.run_test(CrossEntropyLossSumWeight(ignore_index), input=(x, y))
 
         class CrossEntropyLossMean(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, ignore_index):
                 super(CrossEntropyLossMean, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss()
+                if ignore_index == -100:
+                    self.loss = torch.nn.CrossEntropyLoss()
+                else:
+                    self.loss = torch.nn.CrossEntropyLoss(ignore_index=ignore_index)
 
             def forward(self, input, target):
                 return self.loss(input, target)
 
-        self.run_test(CrossEntropyLossMean(), input=(x, y))
+        self.run_test(CrossEntropyLossMean(ignore_index), input=(x, y))
 
         class CrossEntropyLossMeanWeight(torch.nn.Module):
-            def __init__(self):
+            def __init__(self, ignore_index):
                 super(CrossEntropyLossMeanWeight, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(weight=torch.randn(5))
-
-            def forward(self, input, target):
-                return self.loss(input, target)
-
-        self.run_test(CrossEntropyLossMeanWeight(), input=(x, y))
-
-        class CrossEntropyLossNoneIgnoreIndex(torch.nn.Module):
-            def __init__(self):
-                super(CrossEntropyLossNoneIgnoreIndex, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='none', ignore_index=1)
-
-            def forward(self, input, target):
-                return self.loss(input, target)
-
-        self.run_test(CrossEntropyLossNoneIgnoreIndex(), input=(x, y))
-
-        class CrossEntropyLossNoneWeightIgnoreIndex(torch.nn.Module):
-            def __init__(self):
-                super(CrossEntropyLossNoneWeightIgnoreIndex, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='none', weight=torch.randn(5), ignore_index=1)
-
-            def forward(self, input, target):
-                return self.loss(input, target)
-
-        self.run_test(CrossEntropyLossNoneWeightIgnoreIndex(), input=(x, y))
-
-        class CrossEntropyLossSumIgnoreIndex(torch.nn.Module):
-            def __init__(self):
-                super(CrossEntropyLossSumIgnoreIndex, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='sum', ignore_index=1)
-
-            def forward(self, input, target):
-                return self.loss(input, target)
-
-        self.run_test(CrossEntropyLossSumIgnoreIndex(), input=(x, y))
-
-        class CrossEntropyLossSumWeightIgnoreIndex(torch.nn.Module):
-            def __init__(self):
-                super(CrossEntropyLossSumWeightIgnoreIndex, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(reduction='sum', weight=torch.randn(5), ignore_index=1)
-
-            def forward(self, input, target):
-                return self.loss(input, target)
-
-        self.run_test(CrossEntropyLossSumWeightIgnoreIndex(), input=(x, y))
-
-        class CrossEntropyLossMeanIgnoreIndex(torch.nn.Module):
-            def __init__(self):
-                super(CrossEntropyLossMeanIgnoreIndex, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(ignore_index=1)
+                if ignore_index == -100:
+                    self.loss = torch.nn.CrossEntropyLoss(weight=torch.randn(5))
+                else:
+                    self.loss = torch.nn.CrossEntropyLoss(weight=torch.randn(5), ignore_index=ignore_index)
 
             def forward(self, input, target):
                 return self.loss(input, target)
 
-        self.run_test(CrossEntropyLossMeanIgnoreIndex(), input=(x, y))
+        self.run_test(CrossEntropyLossMeanWeight(ignore_index), input=(x, y))
 
-        class CrossEntropyLossMeanWeightIgnoreIndex(torch.nn.Module):
-            def __init__(self):
-                super(CrossEntropyLossMeanWeightIgnoreIndex, self).__init__()
-                self.loss = torch.nn.CrossEntropyLoss(weight=torch.randn(5), ignore_index=1)
-
-            def forward(self, input, target):
-                return self.loss(input, target)
-
-        self.run_test(CrossEntropyLossMeanWeightIgnoreIndex(), input=(x, y))
 
     @skipIfUnsupportedMinOpsetVersion(9)
     def test_kldiv_loss(self):
@@ -3957,6 +3921,9 @@ def forward(self, input, target):
         N, C = 5, 4
         input = torch.randn(N, 16)
         target = torch.empty(N, dtype=torch.long).random_(0, C)
+
+        # using test data containing default ignore_index=-100
+        target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
     @skipIfUnsupportedMinOpsetVersion(12)
@@ -3976,6 +3943,9 @@ def forward(self, input, target):
         N, C = 5, 4
         input = torch.randn(N, 16, 10, 10)
         target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
+
+        # using test data containing default ignore_index=-100
+        target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
     @skipIfUnsupportedMinOpsetVersion(12)
@@ -3995,6 +3965,9 @@ def forward(self, input, target):
         N, C = 5, 4
         input = torch.randn(N, 16, 10, 10)
         target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
+
+        # using test data containing default ignore_index=-100
+        target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
     @skipIfUnsupportedMinOpsetVersion(12)
@@ -4014,6 +3987,9 @@ def forward(self, input, target):
         N, C = 5, 4
         input = torch.randn(N, 16, 10, 10)
         target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
+
+        # using test data containing default ignore_index=-100
+        target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
     @skipIfUnsupportedMinOpsetVersion(12)
@@ -4033,6 +4009,9 @@ def forward(self, input, target):
         N, C = 5, 4
         input = torch.randn(N, 16, 10, 10)
         target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
+
+        # using test data containing default ignore_index=-100
+        target[target == 1] = -100
         self.run_test(NLLModel(), (input, target))
 
     @skipIfUnsupportedMinOpsetVersion(12)
diff --git a/torch/onnx/symbolic_opset12.py b/torch/onnx/symbolic_opset12.py
@@ -36,15 +36,9 @@ def nll_loss(g, self, target, weight, reduction, ignore_index):
     reduction_vals = ['none', 'mean', 'sum']
     reduction = reduction_vals[reduction]
 
-    # when ignore_index is not specified, ignore_index == onnx::Constant[value={-100}]
+    # in onnx NegativeLogLikelihoodLoss specification, ignore_index is optional without default value.
+    # therefore we need to set ignore_index attribute even if it is not specified (e.g. ignore_index=-100).
     ignore_index = sym_help._maybe_get_const(ignore_index, 'i')
-    if ignore_index == -100:
-        if weight.node().mustBeNone():
-            return g.op("NegativeLogLikelihoodLoss", self, target, reduction_s=reduction)
-        else:
-            return g.op("NegativeLogLikelihoodLoss", self, target, weight, reduction_s=reduction)
-
-    # if ignore_index is specified, compute nllloss with no reduction and apply the reduction afterwards
     if weight.node().mustBeNone():
         nllloss = g.op("NegativeLogLikelihoodLoss", self, target, reduction_s=reduction, ignore_index_i=ignore_index)
     else: