PaddlePaddle · co63oc · Jan 6, 2024 · Jan 6, 2024 · JunnYu · Jan 15, 2024
diff --git a/ppdiffusers/ppdiffusers/models/unet_3d_blocks.py b/ppdiffusers/ppdiffusers/models/unet_3d_blocks.py
@@ -18,7 +18,7 @@
 from paddle import nn
 from paddle.distributed.fleet.utils import recompute
 
-from ..utils import recompute_use_reentrant
+from ..utils import recompute_use_reentrant, use_old_recompute
 from ..utils.paddle_utils import apply_freeu
 from .attention import Attention
 from .dual_transformer_2d import DualTransformer2DModel
@@ -1013,7 +1013,12 @@ def forward(
 
         blocks = zip(self.resnets, self.motion_modules)
         for resnet, motion_module in blocks:
-            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+            if (
+                self.training
+                and self.gradient_checkpointing
+                and not hidden_states.stop_gradient
+                and not use_old_recompute()
+            ):
 
                 def create_custom_forward(module):
                     def custom_forward(*inputs):
@@ -1183,7 +1188,12 @@ def forward(
 
         blocks = list(zip(self.resnets, self.attentions, self.motion_modules))
         for i, (resnet, attn, motion_module) in enumerate(blocks):
-            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+            if (
+                self.training
+                and self.gradient_checkpointing
+                and not hidden_states.stop_gradient
+                and not use_old_recompute()
+            ):
 
                 def create_custom_forward(module, return_dict=None):
                     def custom_forward(*inputs):
@@ -1387,7 +1397,12 @@ def forward(
 
             hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1)
 
-            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+            if (
+                self.training
+                and self.gradient_checkpointing
+                and not hidden_states.stop_gradient
+                and not use_old_recompute()
+            ):
 
                 def create_custom_forward(module, return_dict=None):
                     def custom_forward(*inputs):
@@ -1542,7 +1557,12 @@ def forward(
 
             hidden_states = paddle.concat([hidden_states, res_hidden_states], axis=1)
 
-            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+            if (
+                self.training
+                and self.gradient_checkpointing
+                and not hidden_states.stop_gradient
+                and not use_old_recompute()
+            ):
 
                 def create_custom_forward(module):
                     def custom_forward(*inputs):
@@ -1699,7 +1719,12 @@ def forward(
 
         blocks = zip(self.attentions, self.resnets[1:], self.motion_modules)
         for attn, resnet, motion_module in blocks:
-            if self.training and self.gradient_checkpointing and not hidden_states.stop_gradient:
+            if (
+                self.training
+                and self.gradient_checkpointing
+                and not hidden_states.stop_gradient
+                and not use_old_recompute()
+            ):
 
                 def create_custom_forward(module, return_dict=None):
                     def custom_forward(*inputs):

diff --git a/ppdiffusers/ppdiffusers/models/vae.py b/ppdiffusers/ppdiffusers/models/vae.py
@@ -19,7 +19,7 @@
 import paddle.nn as nn
 from paddle.distributed.fleet.utils import recompute
 
-from ..utils import BaseOutput, recompute_use_reentrant
+from ..utils import BaseOutput, recompute_use_reentrant, use_old_recompute
 from ..utils.paddle_utils import randn_tensor
 from .activations import get_activation
 from .attention_processor import SpatialNorm
@@ -850,7 +850,7 @@ def __init__(
 
     def forward(self, x: paddle.Tensor) -> paddle.Tensor:
         r"""The forward method of the `EncoderTiny` class."""
-        if self.training and self.gradient_checkpointing and not x.stop_gradient:
+        if self.training and self.gradient_checkpointing and not x.stop_gradient and not use_old_recompute():
 
             def create_custom_forward(module):
                 def custom_forward(*inputs):
@@ -932,7 +932,7 @@ def forward(self, x: paddle.Tensor) -> paddle.Tensor:
         # Clamp.
         x = nn.functional.tanh(x / 3) * 3
 
-        if self.training and self.gradient_checkpointing and not x.stop_gradient:
+        if self.training and self.gradient_checkpointing and not x.stop_gradient and not use_old_recompute():
 
             def create_custom_forward(module):
                 def custom_forward(*inputs):