Add tensor/batch factories and copies.

mzient · mzient · commit 5966aabdb1e0 · 2025-07-10T17:03:51.000+02:00
Signed-off-by: Michał Zientkiewicz &lt;mzient@gmail.com&gt;
diff --git a/dali/python/nvidia/dali/experimental/dali2/__init__.py b/dali/python/nvidia/dali/experimental/dali2/__init__.py
@@ -18,8 +18,8 @@
 
 from ._eval_mode import EvalMode
 from ._type import *  # noqa: F403
-from ._tensor import Tensor
-from ._batch import Batch
+from ._tensor import Tensor, tensor
+from ._batch import Batch, batch
 from ._device import Device
 
 from . import fn
diff --git a/dali/python/nvidia/dali/experimental/dali2/_batch.py b/dali/python/nvidia/dali/experimental/dali2/_batch.py
@@ -20,6 +20,7 @@
 from ._device import Device
 from . import _eval_mode
 from . import _invocation
+import copy
 
 
 class BatchedSlice:
@@ -74,10 +75,12 @@ def __init__(
         device: Optional[Device] = None,
         layout: Optional[str] = None,
         invocation_result: Optional[_invocation.InvocationResult] = None,
+        copy: bool = False,
     ):
         assert isinstance(layout, str) or layout is None
         self._wraps_external_data = False
         self._tensors = None
+        copied = False
         if tensors is not None:
             self._tensors = []
             if len(tensors) == 0:
@@ -99,6 +102,8 @@ def __init__(
                     self._tensors.append(sample)
                     if sample._wraps_external_data:
                         self._wraps_external_data = True
+                    else:
+                        copied = True
 
         if dtype is not None:
             if not isinstance(dtype, DType):
@@ -112,6 +117,9 @@ def __init__(
         if self._tensors and self._tensors[0]._shape:
             self._ndim = len(self._tensors[0]._shape)
 
+        if copy and self._backend is not None and not copied:
+            self.assign(self.to_device(self.device, force_copy=True).evaluate())
+
         if _eval_mode.EvalMode.current().value >= _eval_mode.EvalMode.eager.value:
             self.evaluate()
 
@@ -175,8 +183,8 @@ def tensors(self):
                     t._backend = self._backend[i]
         return self._tensors
 
-    def to_device(self, device: Device) -> "Batch":
-        if self.device == device:
+    def to_device(self, device: Device, force_copy: bool = False) -> "Batch":
+        if self.device == device and not force_copy:
             return self
         else:
             with device:
@@ -393,3 +401,15 @@ def __xor__(self, other):
 
     def __rxor__(self, other):
         return _arithm_op("bitxor", other, self)
+
+
+def batch(
+    tensors: Union[List[Any], Batch],
+    dtype: Optional[DType] = None,
+    device: Optional[Device] = None,
+    layout: Optional[str] = None,
+):
+    if isinstance(tensors, Batch):
+        batch = tensors.to_device(device, force_copy=True).evaluate()
+    else:
+        return Batch(tensors, dtype=dtype, device=device, layout=layout, copy=True)
diff --git a/dali/python/nvidia/dali/experimental/dali2/_op_builder.py b/dali/python/nvidia/dali/experimental/dali2/_op_builder.py
@@ -431,6 +431,7 @@ def call(*inputs, batch_size=None, device=None, **raw_kwargs):
 
         # If device is not specified, infer it from the inputs and call_args
         if device is None:
+
             def _infer_device():
                 for inp in inputs:
                     if inp is None:
@@ -445,6 +446,7 @@ def _infer_device():
                     if dev is not None and dev.device_type == "gpu":
                         return dev
                 return _device.Device("cpu")
+
             device = _infer_device()
         elif not isinstance(device, _device.Device):
             device = _device.Device(device)
diff --git a/dali/python/nvidia/dali/experimental/dali2/_tensor.py b/dali/python/nvidia/dali/experimental/dali2/_tensor.py
@@ -40,6 +40,7 @@ def __init__(
         batch: Optional[Any] = None,
         index_in_batch: Optional[int] = None,
         invocation_result: Optional[_invocation.InvocationResult] = None,
+        copy: bool = False,
     ):
         if layout is None:
             layout = ""
@@ -52,6 +53,7 @@ def __init__(
         self._index_in_batch = index_in_batch
         self._invocation_result = None
         self._wraps_external_data = False
+        copied = False
 
         from . import fn
 
@@ -77,13 +79,15 @@ def __init__(
                         self.assign(data)
                         self._wraps_external_data = data._wraps_external_data
                     else:
-                        self.assign(data.to_device(device).evaluate())
+                        dev = data.to_device(device).evaluate()
+                        if dev is not self:
+                            copied = True
+                        self.assign(dev)
+                        self._wraps_external_data = not copied
                 else:
                     self.assign(fn.cast(data, dtype, device=device).evaluate())
-                return
             elif isinstance(data, TensorSlice):
                 self._slice = data
-                return
             elif hasattr(data, "__dlpack__"):
                 self._backend = TensorCPU(data, layout)
                 self._wraps_external_data = True
@@ -99,10 +103,12 @@ def __init__(
                         layout,
                         False,
                     )
+                    copied = True
                     self._wraps_external_data = False
                     self._dtype = dtype
                 else:
                     self._backend = TensorCPU(np.array(data), layout, False)
+                    copied = True
                     self._wraps_external_data = False
 
             if device is not None:
@@ -131,6 +137,9 @@ def __init__(
         if _eval_mode.EvalMode.current().value >= _eval_mode.EvalMode.eager.value:
             self.evaluate()
 
+        if copy and self._backend is not None and not copied:
+            self.assign(self.to_device(self.device, force_copy=True).evaluate())
+
     def _is_external(self) -> bool:
         return self._wraps_external_data
 
@@ -150,8 +159,8 @@ def device(self) -> Device:
         else:
             raise RuntimeError("Device not set")
 
-    def to_device(self, device: Device) -> "Tensor":
-        if self.device == device:
+    def to_device(self, device: Device, force_copy: bool = False) -> "Tensor":
+        if self.device == device and not force_copy:
             return self
         else:
             with device:
@@ -548,3 +557,12 @@ def evaluate(self):
             from . import fn
 
             return fn.tensor_subscript(self._tensor, **args).evaluate()
+
+
+def tensor(
+    data: Any,
+    dtype: Optional[Any] = None,
+    device: Optional[Device] = None,
+    layout: Optional[str] = None,
+):
+    return Tensor(data, dtype=dtype, device=device, layout=layout, copy=True)