diff --git a/dali/test/python/Untitled b/dali/test/python/Untitled
new file mode 100644
index 00000000000..3d998d693c8
--- /dev/null
+++ b/dali/test/python/Untitled
@@ -0,0 +1 @@
+test_parallel_fork
\ No newline at end of file
diff --git a/dali/test/python/nose2_attrib_generators.py b/dali/test/python/nose2_attrib_generators.py
new file mode 100644
index 00000000000..8cafb68f7e7
--- /dev/null
+++ b/dali/test/python/nose2_attrib_generators.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Custom nose2 plugin to filter generator test functions by attributes
+before they are called (preventing imports of optional dependencies or other code execution).
+
+This plugin monkey-patches the Generators plugin's _testsFromGeneratorFunc
+method to check attributes before calling generator functions.
+"""
+from nose2.events import Plugin
+import logging
+
+log = logging.getLogger(__name__)
+
+
+class AttributeGeneratorFilter(Plugin):
+    """Filter generator functions by attributes before calling them."""
+
+    configSection = "attrib-generators"
+    alwaysOn = True
+
+    def __init__(self):
+        super().__init__()
+        self._patched = False
+
+    def _get_attrib_plugin(self):
+        """Get the attrib plugin from the session."""
+        for plugin in self.session.plugins:
+            if plugin.__class__.__name__ == "AttributeSelector":
+                return plugin
+        return None
+
+    def _build_attribs_list(self, attrib_plugin):
+        """Build the attribs list from the attrib plugin's -A configuration.
+
+        This replicates the logic from AttributeSelector.moduleLoadedSuite
+        for -A filters only (not -E eval filters).
+        """
+        attribs = []
+
+        # Handle -A (attribute) filters
+        for attr in attrib_plugin.attribs:
+            attr_group = []
+            for attrib in attr.strip().split(","):
+                if not attrib:
+                    continue
+                items = attrib.split("=", 1)
+                if len(items) > 1:
+                    # "name=value"
+                    key, value = items
+                else:
+                    key = items[0]
+                    if key[0] == "!":
+                        # "!name"
+                        key = key[1:]
+                        value = False
+                    else:
+                        # "name"
+                        value = True
+                attr_group.append((key, value))
+            attribs.append(attr_group)
+
+        return attribs
+
+    def _matches_attrib_filter(self, test_func, attrib_plugin):
+        """Check if test_func matches the attribute filter from attrib plugin."""
+        if not attrib_plugin:
+            return True
+
+        if not attrib_plugin.attribs:
+            return True
+
+        # Build attribs list using attrib plugin's logic
+        attribs = self._build_attribs_list(attrib_plugin)
+
+        if not attribs:
+            return True
+
+        # Use the plugin's validateAttrib method
+        return attrib_plugin.validateAttrib(test_func, attribs)
+
+    def _patch_generator_plugin(self):
+        """Monkey-patch the Generators plugin to check attributes first."""
+        if self._patched:
+            return
+
+        # Find the Generators plugin
+        gen_plugin = None
+        for plugin in self.session.plugins:
+            if plugin.__class__.__name__ == "Generators":
+                gen_plugin = plugin
+                break
+
+        if not gen_plugin:
+            log.warning("Could not find Generators plugin to patch")
+            return
+
+        # Save original method
+        original_tests_from_gen = gen_plugin._testsFromGeneratorFunc
+        attrib_filter_self = self
+
+        # Create patched method
+        def patched_tests_from_gen(event, obj):
+            """Check attributes before calling generator function."""
+            attrib_plugin = attrib_filter_self._get_attrib_plugin()
+
+            # Check if generator function matches attribute filter
+            if not attrib_filter_self._matches_attrib_filter(obj, attrib_plugin):
+                log.debug(f"Skipping generator {obj.__name__} due to attribute filter")
+                return []  # Return empty list
+
+            # Call original method
+            return original_tests_from_gen(event, obj)
+
+        # Monkey-patch it
+        gen_plugin._testsFromGeneratorFunc = patched_tests_from_gen
+        self._patched = True
+        log.debug("Patched Generators plugin to check attributes")
+
+    def handleArgs(self, event):
+        """Patch right after argument handling, before test discovery."""
+        self._patch_generator_plugin()
diff --git a/dali/test/python/nose_utils.py b/dali/test/python/nose_utils.py
index 19d4358e1c8..ea51d677375 100644
--- a/dali/test/python/nose_utils.py
+++ b/dali/test/python/nose_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,132 +11,33 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import sys
-import collections
-
-if sys.version_info >= (3, 12):
-    # to make sure we can import anything from nose
-    from importlib import machinery, util
-    from importlib._bootstrap import _exec, _load
-    import modulefinder
-    import types
-    import unittest
-
-    # the below are based on https://github.com/python/cpython/blob/3.11/Lib/imp.py
-    # based on PSF license
-    def find_module(name, path):
-        return modulefinder.ModuleFinder(path).find_module(name, path)
-
-    def load_module(name, file, filename, details):
-        PY_SOURCE = 1
-        PY_COMPILED = 2
-
-        class _HackedGetData:
-            """Compatibility support for 'file' arguments of various load_*()
-            functions."""
-
-            def __init__(self, fullname, path, file=None):
-                super().__init__(fullname, path)
-                self.file = file
-
-            def get_data(self, path):
-                """Gross hack to contort loader to deal w/ load_*()'s bad API."""
-                if self.file and path == self.path:
-                    # The contract of get_data() requires us to return bytes. Reopen the
-                    # file in binary mode if needed.
-                    file = None
-                    if not self.file.closed:
-                        file = self.file
-                        if "b" not in file.mode:
-                            file.close()
-                    if self.file.closed:
-                        self.file = file = open(self.path, "rb")
-
-                    with file:
-                        return file.read()
-                else:
-                    return super().get_data(path)
-
-        class _LoadSourceCompatibility(_HackedGetData, machinery.SourceFileLoader):
-            """Compatibility support for implementing load_source()."""
-
-        _, mode, type_ = details
-        if mode and (not mode.startswith("r") or "+" in mode):
-            raise ValueError("invalid file open mode {!r}".format(mode))
-        elif file is None and type_ in {PY_SOURCE, PY_COMPILED}:
-            msg = "file object required for import (type code {})".format(type_)
-            raise ValueError(msg)
-        assert type_ == PY_SOURCE, "load_module replacement supports only PY_SOURCE file type"
-        loader = _LoadSourceCompatibility(name, filename, file)
-        spec = util.spec_from_file_location(name, filename, loader=loader)
-        if name in sys.modules:
-            module = _exec(spec, sys.modules[name])
-        else:
-            module = _load(spec)
-        # To allow reloading to potentially work, use a non-hacked loader which
-        # won't rely on a now-closed file object.
-        module.__loader__ = machinery.SourceFileLoader(name, filename)
-        module.__spec__.loader = module.__loader__
-        return module
-
-    def acquire_lock():
-        pass
-
-    def release_lock():
-        pass
 
-    context = {
-        "find_module": find_module,
-        "load_module": load_module,
-        "acquire_lock": acquire_lock,
-        "release_lock": release_lock,
-    }
-    imp_module = types.ModuleType("imp", "Mimics old imp module")
-    imp_module.__dict__.update(context)
-    sys.modules["imp"] = imp_module
-    unittest._TextTestResult = unittest.TextTestResult
-
-# Handle pkg_resources deprecation/removal
-try:
-    import pkg_resources  # noqa: F401
-except ImportError:
-    from importlib import metadata
-    import types
-
-    def iter_entry_points(group, name=None):
-        """Mimics pkg_resources.iter_entry_points using importlib.metadata."""
-        eps = metadata.entry_points()
-        selected = eps.select(group=group)
-
-        if name is not None:
-            selected = [ep for ep in selected if ep.name == name]
-
-        return selected
-
-    pkg_resources_module = types.ModuleType("pkg_resources", "Mimics pkg_resources module")
-    pkg_resources_module.iter_entry_points = iter_entry_points
-    sys.modules["pkg_resources"] = pkg_resources_module
-
-import nose.case
-import nose.inspector
-import nose.loader
-import nose.suite
-import nose.plugins.attrib
-from nose import SkipTest, with_setup  # noqa: F401
-from nose.plugins.attrib import attr  # noqa: F401
-from nose.tools import nottest  # noqa: F401
-
-if sys.version_info >= (3, 10) and not hasattr(collections, "Callable"):
-    nose.case.collections = collections.abc
-    nose.inspector.collections = collections.abc
-    nose.loader.collections = collections.abc
-    nose.suite.collections = collections.abc
-    nose.plugins.attrib.collections = collections.abc
-
-import nose.tools as tools
+from unittest import SkipTest  # noqa: F401
+import unittest
 import re
 import fnmatch
-import unittest
+import functools
+
+
+def attr(*tags):
+    """Set test attributes for nose2 filtering with -A flag.
+
+    Usage: @attr("pytorch", "slow")
+    Filtering: nose2 -A 'pytorch' or nose2 -A '!slow'
+    """
+
+    def decorator(func):
+        for tag in tags:
+            setattr(func, tag, True)
+        return func
+
+    return decorator
+
+
+def nottest(func):
+    """Mark function as not a test."""
+    func.__test__ = False
+    return func
 
 
 class empty_case(unittest.TestCase):
@@ -144,6 +45,11 @@ def nop():
         pass
 
 
+# Module-level TestCase instance for assertions
+_test_case = unittest.TestCase()
+_test_case.maxDiff = None  # Show full diff on assertion failures
+
+
 def assert_equals(x, y):
     foo = empty_case()
     foo.assertEqual(x, y)
@@ -187,28 +93,78 @@ def get_pattern(glob=None, regex=None, match_case=None):
 
 def assert_raises(exception, *args, glob=None, regex=None, match_case=None, **kwargs):
     """
-    Wrapper combining `nose.tools.assert_raises` and `nose.tools.assert_raises_regex`.
+    Wrapper combining unittest assertRaises and assertRaisesRegex.
     Specify ``regex=pattern`` or ``glob=pattern`` to check error message of expected exception
     against the pattern.
     Value for `glob` must be a string, `regex` can be either a literal or compiled regex pattern.
     By default, the check will ignore case, if called with `glob` or a literal for `regex`.
     To enforce case sensitive check pass ``match_case=True``.
     Don't specify `match_case` if passing already compiled regex pattern.
-    """
 
-    if glob is None and regex is None:
-        return tools.assert_raises(exception, *args, **kwargs)
+    Can be used as context manager or with callable:
+        with assert_raises(Exception):
+            raise Exception()
 
-    pattern = get_pattern(glob, regex, match_case)
-    return tools.assert_raises_regex(exception, pattern, *args, **kwargs)
+        assert_raises(Exception, callable, arg1, arg2, kwarg=value)
+    """
+    if glob is None and regex is None:
+        # Use unittest's assertRaises
+        if args:
+            # Called with callable: assert_raises(Exception, callable, *args, **kwargs)
+            callable_func = args[0]
+            callable_args = args[1:]
+            with _test_case.assertRaises(exception):
+                callable_func(*callable_args, **kwargs)
+        else:
+            # Used as context manager
+            return _test_case.assertRaises(exception)
+    else:
+        pattern = get_pattern(glob, regex, match_case)
+        # Use unittest's assertRaisesRegex
+        if args:
+            # Called with callable
+            callable_func = args[0]
+            callable_args = args[1:]
+            with _test_case.assertRaisesRegex(exception, pattern):
+                callable_func(*callable_args, **kwargs)
+        else:
+            # Used as context manager
+            return _test_case.assertRaisesRegex(exception, pattern)
 
 
 def assert_warns(exception=Warning, *args, glob=None, regex=None, match_case=None, **kwargs):
-    if glob is None and regex is None:
-        return tools.assert_warns(exception, *args, **kwargs)
+    """
+    Wrapper for asserting warnings, optionally with pattern matching.
 
-    pattern = get_pattern(glob, regex, match_case)
-    return tools.assert_warns_regex(exception, pattern, *args, **kwargs)
+    Can be used as context manager or with callable:
+        with assert_warns(UserWarning):
+            warnings.warn("test", UserWarning)
+
+        assert_warns(UserWarning, callable, arg1, arg2, kwarg=value)
+    """
+    if glob is None and regex is None:
+        # Use unittest's assertWarns
+        if args:
+            # Called with callable
+            callable_func = args[0]
+            callable_args = args[1:]
+            with _test_case.assertWarns(exception):
+                callable_func(*callable_args, **kwargs)
+        else:
+            # Used as context manager
+            return _test_case.assertWarns(exception)
+    else:
+        pattern = get_pattern(glob, regex, match_case)
+        # Use unittest's assertWarnsRegex
+        if args:
+            # Called with callable
+            callable_func = args[0]
+            callable_args = args[1:]
+            with _test_case.assertWarnsRegex(exception, pattern):
+                callable_func(*callable_args, **kwargs)
+        else:
+            # Used as context manager
+            return _test_case.assertWarnsRegex(exception, pattern)
 
 
 def raises(exception, glob=None, regex=None, match_case=None):
@@ -235,10 +191,11 @@ def test():
     """
 
     def decorator(func):
+        @functools.wraps(func)
         def new_func(*args, **kwargs):
             with assert_raises(exception, glob=glob, regex=regex, match_case=match_case):
                 return func(*args, **kwargs)
 
-        return tools.make_decorator(func)(new_func)
+        return new_func
 
     return decorator
diff --git a/dali/test/python/operator_1/test_constant.py b/dali/test/python/operator_1/test_constant.py
index a3484e638f9..130c73296c2 100644
--- a/dali/test/python/operator_1/test_constant.py
+++ b/dali/test/python/operator_1/test_constant.py
@@ -36,15 +36,6 @@
     print("ConstantOp: PyTorch support disabled")
     pass
 
-try:
-    import mxnet
-
-    array_interfaces.append((mxnet.ndarray.array, None))
-    print("ConstantOp: MXNet support enabled")
-except ModuleNotFoundError:
-    print("ConstantOp: MXNet support disabled")
-    pass
-
 
 class ConstantPipeline(Pipeline):
     def __init__(self, device):
diff --git a/dali/test/python/operator_1/test_numba_func.py b/dali/test/python/operator_1/test_numba_func.py
index 8e6a1f855df..ab95c9f3ce7 100644
--- a/dali/test/python/operator_1/test_numba_func.py
+++ b/dali/test/python/operator_1/test_numba_func.py
@@ -19,7 +19,8 @@
 import nvidia.dali as dali
 import nvidia.dali.fn as fn
 import nvidia.dali.types as dali_types
-from nose_utils import with_setup, attr
+from nose2.tools import params
+from nose_utils import attr
 from test_utils import (
     get_dali_extra_path,
     to_array,
@@ -202,13 +203,12 @@ def numba_func_pipe(
             assert np.array_equal(out_arr, expected_out[i])
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_numba_func():
-    # shape, dtype, run_fn, out_types,
-    # in_types, out_ndim, in_ndim, setup_fn, batch_processing,
-    # expected_out
-    args = [
+class TestNumbaFuncCPU:
+    def setUp(self):
+        check_numba_compatibility_cpu()
+
+    @attr("sanitizer_skip")
+    @params(
         (
             [(10, 10, 10)],
             np.bool_,
@@ -293,10 +293,9 @@ def test_numba_func():
             None,
             [np.full((20, 30, 10), 42, dtype=np.int32), np.full((10, 30, 20), 42, dtype=np.int32)],
         ),
-    ]
-
-    device = "cpu"
-    for (
+    )
+    def test_numba_func(
+        self,
         shape,
         dtype,
         run_fn,
@@ -307,9 +306,9 @@ def test_numba_func():
         setup_fn,
         batch_processing,
         expected_out,
-    ) in args:
-        yield (
-            _testimpl_numba_func,
+    ):
+        device = "cpu"
+        _testimpl_numba_func(
             device,
             shape,
             dtype,
@@ -323,56 +322,49 @@ def test_numba_func():
             expected_out,
         )
 
+    def test_numba_func_with_cond(self):
+        # When the function is not converted, the numba still works with no issues.
+        # AG conversion or using a complex enough decorator would break this.
+        # TODO(klecki): Can we add any additional safeguards?
+        _testimpl_numba_func(
+            device="cpu",
+            shapes=[(10, 10, 10)],
+            dtype=np.uint8,
+            run_fn=set_all_values_to_255_batch,
+            out_types=[dali_types.UINT8],
+            in_types=[dali_types.UINT8],
+            outs_ndim=[3],
+            ins_ndim=[3],
+            setup_fn=None,
+            batch_processing=True,
+            expected_out=[np.full((10, 10, 10), 255, dtype=np.uint8)],
+            enable_conditionals=True,
+        )
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_numba_func_with_cond():
-    # When the function is not converted, the numba still works with no issues.
-    # AG conversion or using a complex enough decorator would break this.
-    # TODO(klecki): Can we add any additional safeguards?
-    _testimpl_numba_func(
-        device="cpu",
-        shapes=[(10, 10, 10)],
-        dtype=np.uint8,
-        run_fn=set_all_values_to_255_batch,
-        out_types=[dali_types.UINT8],
-        in_types=[dali_types.UINT8],
-        outs_ndim=[3],
-        ins_ndim=[3],
-        setup_fn=None,
-        batch_processing=True,
-        expected_out=[np.full((10, 10, 10), 255, dtype=np.uint8)],
-        enable_conditionals=True,
-    )
+    def test_numba_func_with_cond_do_not_convert(self):
+        # Test if do_not_convert decorated functions still work.
+        _testimpl_numba_func(
+            device="cpu",
+            shapes=[(10, 10, 10)],
+            dtype=np.uint8,
+            run_fn=do_not_convert(set_all_values_to_255_batch),
+            out_types=[dali_types.UINT8],
+            in_types=[dali_types.UINT8],
+            outs_ndim=[3],
+            ins_ndim=[3],
+            setup_fn=None,
+            batch_processing=True,
+            expected_out=[np.full((10, 10, 10), 255, dtype=np.uint8)],
+            enable_conditionals=True,
+        )
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_numba_func_with_cond_do_not_convert():
-    # Test if do_not_convert decorated functions still work.
-    _testimpl_numba_func(
-        device="cpu",
-        shapes=[(10, 10, 10)],
-        dtype=np.uint8,
-        run_fn=do_not_convert(set_all_values_to_255_batch),
-        out_types=[dali_types.UINT8],
-        in_types=[dali_types.UINT8],
-        outs_ndim=[3],
-        ins_ndim=[3],
-        setup_fn=None,
-        batch_processing=True,
-        expected_out=[np.full((10, 10, 10), 255, dtype=np.uint8)],
-        enable_conditionals=True,
-    )
-
+class TestNumbaFuncGPU:
+    def setUp(self):
+        check_numba_compatibility_gpu()
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_gpu)
-def test_numba_func_gpu():
-    # shape, dtype, run_fn, out_types,
-    # in_types, out_ndim, in_ndim, setup_fn, batch_processing,
-    # expected_out
-    args = [
+    @attr("sanitizer_skip")
+    @params(
         (
             [(10, 10, 10)],
             np.bool_,
@@ -436,12 +428,9 @@ def test_numba_func_gpu():
             None,
             [change_dim_expected_out(20), change_dim_expected_out(30)],
         ),
-    ]
-
-    device = "gpu"
-    blocks = [32, 32, 1]
-    threads_per_block = [32, 16, 1]
-    for (
+    )
+    def test_numba_func_gpu(
+        self,
         shape,
         dtype,
         run_fn,
@@ -452,9 +441,11 @@ def test_numba_func_gpu():
         setup_fn,
         batch_processing,
         expected_out,
-    ) in args:
-        yield (
-            _testimpl_numba_func,
+    ):
+        device = "gpu"
+        blocks = [32, 32, 1]
+        threads_per_block = [32, 16, 1]
+        _testimpl_numba_func(
             device,
             shape,
             dtype,
@@ -590,10 +581,12 @@ def rot_image_setup(outs, ins):
         out0[sample_id][2] = in0[sample_id][2]
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_numba_func_image():
-    args = [
+class TestNumbaFuncImageCPU:
+    def setUp(self):
+        check_numba_compatibility_cpu()
+
+    @attr("sanitizer_skip")
+    @params(
         (
             reverse_col_batch,
             [dali_types.UINT8],
@@ -634,9 +627,9 @@ def test_numba_func_image():
             None,
             lambda x: np.rot90(x),
         ),
-    ]
-    device = "cpu"
-    for (
+    )
+    def test_numba_func_image(
+        self,
         run_fn,
         out_types,
         in_types,
@@ -645,9 +638,9 @@ def test_numba_func_image():
         setup_fn,
         batch_processing,
         transform,
-    ) in args:
-        yield (
-            _testimpl_numba_func_image,
+    ):
+        device = "cpu"
+        _testimpl_numba_func_image(
             device,
             run_fn,
             out_types,
@@ -660,10 +653,12 @@ def test_numba_func_image():
         )
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_gpu)
-def test_numba_func_image_gpu():
-    args = [
+class TestNumbaFuncImageGPU:
+    def setUp(self):
+        check_numba_compatibility_gpu()
+
+    @attr("sanitizer_skip")
+    @params(
         (
             reverse_col_sample_gpu,
             [dali_types.UINT8],
@@ -684,11 +679,9 @@ def test_numba_func_image_gpu():
             None,
             np.rot90,
         ),
-    ]
-    device = "gpu"
-    blocks = [32, 32, 1]
-    threads_per_block = [32, 8, 1]
-    for (
+    )
+    def test_numba_func_image_gpu(
+        self,
         run_fn,
         out_types,
         in_types,
@@ -697,9 +690,11 @@ def test_numba_func_image_gpu():
         setup_fn,
         batch_processing,
         transform,
-    ) in args:
-        yield (
-            _testimpl_numba_func_image,
+    ):
+        device = "gpu"
+        blocks = [32, 32, 1]
+        threads_per_block = [32, 8, 1]
+        _testimpl_numba_func_image(
             device,
             run_fn,
             out_types,
@@ -778,53 +773,61 @@ def numba_func_split_image_pipe(
     return images_in, out0, out1, out2
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_split_images_col():
-    pipe = numba_func_split_image_pipe(
-        batch_size=8,
-        num_threads=1,
-        device_id=0,
-        run_fn=split_images_col_sample,
-        setup_fn=setup_split_images_col,
-        out_types=[dali_types.UINT8 for i in range(3)],
-        in_types=[dali_types.UINT8],
-        outs_ndim=[2, 2, 2],
-        ins_ndim=[3],
-        device="cpu",
-    )
-    for _ in range(3):
-        images_in, R, G, B = pipe.run()
-        for i in range(len(images_in)):
-            assert np.array_equal(images_in.at(i), np.stack([R.at(i), G.at(i), B.at(i)], axis=2))
-
-
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_gpu)
-def test_split_images_col_gpu():
-    blocks = [32, 32, 1]
-    threads_per_block = [32, 8, 1]
-    pipe = numba_func_split_image_pipe(
-        batch_size=8,
-        num_threads=1,
-        device_id=0,
-        run_fn=split_images_col_sample_gpu,
-        setup_fn=setup_split_images_col,
-        out_types=[dali_types.UINT8 for i in range(3)],
-        in_types=[dali_types.UINT8],
-        outs_ndim=[2, 2, 2],
-        ins_ndim=[3],
-        device="gpu",
-        blocks=blocks,
-        threads_per_block=threads_per_block,
-    )
-    for _ in range(3):
-        images_in, R, G, B = pipe.run()
-        for i in range(len(images_in)):
-            assert np.array_equal(
-                to_array(images_in[i]),
-                np.stack([to_array(R[i]), to_array(G[i]), to_array(B[i])], axis=2),
-            )
+class TestSplitImagesCol:
+    def setUp(self):
+        check_numba_compatibility_cpu()
+
+    @attr("sanitizer_skip")
+    def test_split_images_col(self):
+        pipe = numba_func_split_image_pipe(
+            batch_size=8,
+            num_threads=1,
+            device_id=0,
+            run_fn=split_images_col_sample,
+            setup_fn=setup_split_images_col,
+            out_types=[dali_types.UINT8 for i in range(3)],
+            in_types=[dali_types.UINT8],
+            outs_ndim=[2, 2, 2],
+            ins_ndim=[3],
+            device="cpu",
+        )
+        for _ in range(3):
+            images_in, R, G, B = pipe.run()
+            for i in range(len(images_in)):
+                assert np.array_equal(
+                    images_in.at(i), np.stack([R.at(i), G.at(i), B.at(i)], axis=2)
+                )
+
+
+class TestSplitImagesColGPU:
+    def setUp(self):
+        check_numba_compatibility_gpu()
+
+    @attr("sanitizer_skip")
+    def test_split_images_col_gpu(self):
+        blocks = [32, 32, 1]
+        threads_per_block = [32, 8, 1]
+        pipe = numba_func_split_image_pipe(
+            batch_size=8,
+            num_threads=1,
+            device_id=0,
+            run_fn=split_images_col_sample_gpu,
+            setup_fn=setup_split_images_col,
+            out_types=[dali_types.UINT8 for i in range(3)],
+            in_types=[dali_types.UINT8],
+            outs_ndim=[2, 2, 2],
+            ins_ndim=[3],
+            device="gpu",
+            blocks=blocks,
+            threads_per_block=threads_per_block,
+        )
+        for _ in range(3):
+            images_in, R, G, B = pipe.run()
+            for i in range(len(images_in)):
+                assert np.array_equal(
+                    to_array(images_in[i]),
+                    np.stack([to_array(R[i]), to_array(G[i]), to_array(B[i])], axis=2),
+                )
 
 
 def multiple_ins_setup(outs, ins):
@@ -891,54 +894,60 @@ def numba_multiple_ins_pipe(
     )
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_multiple_ins():
-    pipe = numba_multiple_ins_pipe(
-        shapes=[(10, 10)],
-        dtype=np.uint8,
-        batch_size=8,
-        num_threads=1,
-        device_id=0,
-        run_fn=multiple_ins_run,
-        setup_fn=multiple_ins_setup,
-        out_types=[dali_types.UINT8],
-        in_types=[dali_types.UINT8 for i in range(3)],
-        outs_ndim=[3],
-        ins_ndim=[2, 2, 2],
-        device="cpu",
-    )
-    for _ in range(3):
-        outs = pipe.run()
-        out_arr = np.array(outs[0][0])
-        assert np.array_equal(out_arr, np.zeros((10, 10, 3), dtype=np.uint8))
-
-
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_gpu)
-def test_multiple_ins_gpu():
-    blocks = [32, 32, 1]
-    threads_per_block = [32, 8, 1]
-    pipe = numba_multiple_ins_pipe(
-        shapes=[(10, 10)],
-        dtype=np.uint8,
-        batch_size=8,
-        num_threads=1,
-        device_id=0,
-        run_fn=multiple_ins_run_gpu,
-        setup_fn=multiple_ins_setup,
-        out_types=[dali_types.UINT8],
-        in_types=[dali_types.UINT8 for i in range(3)],
-        outs_ndim=[3],
-        ins_ndim=[2, 2, 2],
-        device="gpu",
-        blocks=blocks,
-        threads_per_block=threads_per_block,
-    )
-    for _ in range(3):
-        outs = pipe.run()
-        out_arr = to_array(outs[0][0])
-        assert np.array_equal(out_arr, np.zeros((10, 10, 3), dtype=np.uint8))
+class TestMultipleIns:
+    def setUp(self):
+        check_numba_compatibility_cpu()
+
+    @attr("sanitizer_skip")
+    def test_multiple_ins(self):
+        pipe = numba_multiple_ins_pipe(
+            shapes=[(10, 10)],
+            dtype=np.uint8,
+            batch_size=8,
+            num_threads=1,
+            device_id=0,
+            run_fn=multiple_ins_run,
+            setup_fn=multiple_ins_setup,
+            out_types=[dali_types.UINT8],
+            in_types=[dali_types.UINT8 for i in range(3)],
+            outs_ndim=[3],
+            ins_ndim=[2, 2, 2],
+            device="cpu",
+        )
+        for _ in range(3):
+            outs = pipe.run()
+            out_arr = np.array(outs[0][0])
+            assert np.array_equal(out_arr, np.zeros((10, 10, 3), dtype=np.uint8))
+
+
+class TestMultipleInsGPU:
+    def setUp(self):
+        check_numba_compatibility_gpu()
+
+    @attr("sanitizer_skip")
+    def test_multiple_ins_gpu(self):
+        blocks = [32, 32, 1]
+        threads_per_block = [32, 8, 1]
+        pipe = numba_multiple_ins_pipe(
+            shapes=[(10, 10)],
+            dtype=np.uint8,
+            batch_size=8,
+            num_threads=1,
+            device_id=0,
+            run_fn=multiple_ins_run_gpu,
+            setup_fn=multiple_ins_setup,
+            out_types=[dali_types.UINT8],
+            in_types=[dali_types.UINT8 for i in range(3)],
+            outs_ndim=[3],
+            ins_ndim=[2, 2, 2],
+            device="gpu",
+            blocks=blocks,
+            threads_per_block=threads_per_block,
+        )
+        for _ in range(3):
+            outs = pipe.run()
+            out_arr = to_array(outs[0][0])
+            assert np.array_equal(out_arr, np.zeros((10, 10, 3), dtype=np.uint8))
 
 
 def nonuniform_types_setup(outs, ins):
@@ -1002,52 +1011,58 @@ def nonuniform_types_pipe(
     return images_in, out_img, out_shape
 
 
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_cpu)
-def test_nonuniform_types_cpu():
-    pipe = nonuniform_types_pipe(
-        batch_size=8,
-        num_threads=1,
-        device_id=0,
-        run_fn=nonuniform_types_run_cpu,
-        out_types=[dali_types.UINT8, dali_types.INT64],
-        in_types=[dali_types.UINT8],
-        outs_ndim=[3, 1],
-        ins_ndim=[3],
-        device="cpu",
-    )
-    for _ in range(3):
-        images_in, images_out, img_shape = pipe.run()
-        for i in range(len(images_in)):
-            assert np.array_equal(255 - images_in.at(i), images_out.at(i))
-            assert np.array_equal(images_out.at(i).shape, img_shape.at(i))
-
-
-@attr("sanitizer_skip")
-@with_setup(check_numba_compatibility_gpu)
-def test_nonuniform_types_gpu():
-    blocks = [16, 16, 1]
-    threads_per_block = [32, 16, 1]
-    pipe = nonuniform_types_pipe(
-        batch_size=8,
-        num_threads=1,
-        device_id=0,
-        run_fn=nonuniform_types_run_gpu,
-        out_types=[dali_types.UINT8, dali_types.INT64],
-        in_types=[dali_types.UINT8],
-        outs_ndim=[3, 1],
-        ins_ndim=[3],
-        device="gpu",
-        blocks=blocks,
-        threads_per_block=threads_per_block,
-    )
-    for _ in range(3):
-        images_in, images_out, img_shape = pipe.run()
-        images_in, images_out, img_shape = (
-            images_in.as_cpu(),
-            images_out.as_cpu(),
-            img_shape.as_cpu(),
+class TestNonuniformTypes:
+    def setUp(self):
+        check_numba_compatibility_cpu()
+
+    @attr("sanitizer_skip")
+    def test_nonuniform_types_cpu(self):
+        pipe = nonuniform_types_pipe(
+            batch_size=8,
+            num_threads=1,
+            device_id=0,
+            run_fn=nonuniform_types_run_cpu,
+            out_types=[dali_types.UINT8, dali_types.INT64],
+            in_types=[dali_types.UINT8],
+            outs_ndim=[3, 1],
+            ins_ndim=[3],
+            device="cpu",
         )
-        for i in range(len(images_in)):
-            assert np.array_equal(255 - images_in.at(i), images_out.at(i))
-            assert np.array_equal(images_out.at(i).shape, img_shape.at(i))
+        for _ in range(3):
+            images_in, images_out, img_shape = pipe.run()
+            for i in range(len(images_in)):
+                assert np.array_equal(255 - images_in.at(i), images_out.at(i))
+                assert np.array_equal(images_out.at(i).shape, img_shape.at(i))
+
+
+class TestNonuniformTypesGPU:
+    def setUp(self):
+        check_numba_compatibility_gpu()
+
+    @attr("sanitizer_skip")
+    def test_nonuniform_types_gpu(self):
+        blocks = [16, 16, 1]
+        threads_per_block = [32, 16, 1]
+        pipe = nonuniform_types_pipe(
+            batch_size=8,
+            num_threads=1,
+            device_id=0,
+            run_fn=nonuniform_types_run_gpu,
+            out_types=[dali_types.UINT8, dali_types.INT64],
+            in_types=[dali_types.UINT8],
+            outs_ndim=[3, 1],
+            ins_ndim=[3],
+            device="gpu",
+            blocks=blocks,
+            threads_per_block=threads_per_block,
+        )
+        for _ in range(3):
+            images_in, images_out, img_shape = pipe.run()
+            images_in, images_out, img_shape = (
+                images_in.as_cpu(),
+                images_out.as_cpu(),
+                img_shape.as_cpu(),
+            )
+            for i in range(len(images_in)):
+                assert np.array_equal(255 - images_in.at(i), images_out.at(i))
+                assert np.array_equal(images_out.at(i).shape, img_shape.at(i))
diff --git a/dali/test/python/test_RN50_data_fw_iterators.py b/dali/test/python/test_RN50_data_fw_iterators.py
index b8fdd3b724f..3658c9f9026 100644
--- a/dali/test/python/test_RN50_data_fw_iterators.py
+++ b/dali/test/python/test_RN50_data_fw_iterators.py
@@ -231,12 +231,6 @@ def test_fw_iter(IteratorClass, args):
                     break
 
 
-def import_mxnet():
-    from nvidia.dali.plugin.mxnet import DALIClassificationIterator as MXNetIterator
-
-    return MXNetIterator
-
-
 def import_pytorch():
     from nvidia.dali.plugin.pytorch import DALIClassificationIterator as PyTorchIterator
 
@@ -275,7 +269,6 @@ def import_tf():
 
 
 Iterators = {
-    "mxnet": [import_mxnet],
     "pytorch": [import_pytorch],
     "tf": [import_tf],
     "paddle": [import_paddle],
diff --git a/dali/test/python/test_dali_tf_conditionals.py b/dali/test/python/test_dali_tf_conditionals.py
index 6f8fa40fbcc..ea54d7f8ffe 100644
--- a/dali/test/python/test_dali_tf_conditionals.py
+++ b/dali/test/python/test_dali_tf_conditionals.py
@@ -18,42 +18,44 @@
 import nvidia.dali.fn as fn
 import nvidia.dali.types as types
 import nvidia.dali.plugin.tf as dali_tf
-from nose_utils import with_setup
 from test_utils_tensorflow import skip_inputs_for_incompatible_tf
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_both_tf_and_dali_conditionals():
-    @pipeline_def(enable_conditionals=True, batch_size=5, num_threads=4, device_id=0)
-    def dali_conditional_pipeline():
-        iter_id = fn.external_source(source=lambda x: np.array(x.iteration), batch=False)
-        if iter_id & 1 == 0:
-            output = types.Constant(np.array(-1), device="cpu")
-        else:
-            output = types.Constant(np.array(1), device="cpu")
-        return output
+class TestBothTFAndDALIConditionals:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
 
-    with tf.device("/cpu:0"):
-        dali_dataset = dali_tf.experimental.DALIDatasetWithInputs(
-            pipeline=dali_conditional_pipeline(),
-            batch_size=5,
-            output_shapes=(5,),
-            output_dtypes=(tf.int32),
-            num_threads=4,
-            device_id=0,
-        )
+    def test_both_tf_and_dali_conditionals(self):
+        @pipeline_def(enable_conditionals=True, batch_size=5, num_threads=4, device_id=0)
+        def dali_conditional_pipeline():
+            iter_id = fn.external_source(source=lambda x: np.array(x.iteration), batch=False)
+            if iter_id & 1 == 0:
+                output = types.Constant(np.array(-1), device="cpu")
+            else:
+                output = types.Constant(np.array(1), device="cpu")
+            return output
 
-        @tf.function
-        def tf_function_with_conditionals(dali_dataset):
-            negative = tf.constant(0)
-            positive = tf.constant(0)
-            for input in dali_dataset:
-                if tf.reduce_sum(input) < 0:
-                    negative = negative + 1
-                else:
-                    positive = positive + 1
-            return negative, positive
+        with tf.device("/cpu:0"):
+            dali_dataset = dali_tf.experimental.DALIDatasetWithInputs(
+                pipeline=dali_conditional_pipeline(),
+                batch_size=5,
+                output_shapes=(5,),
+                output_dtypes=(tf.int32),
+                num_threads=4,
+                device_id=0,
+            )
 
-        pos, neg = tf_function_with_conditionals(dali_dataset.take(5))
-        assert pos == 3
-        assert neg == 2
+            @tf.function
+            def tf_function_with_conditionals(dali_dataset):
+                negative = tf.constant(0)
+                positive = tf.constant(0)
+                for input in dali_dataset:
+                    if tf.reduce_sum(input) < 0:
+                        negative = negative + 1
+                    else:
+                        positive = positive + 1
+                return negative, positive
+
+            pos, neg = tf_function_with_conditionals(dali_dataset.take(5))
+            assert pos == 3
+            assert neg == 2
diff --git a/dali/test/python/test_dali_tf_dataset_eager.py b/dali/test/python/test_dali_tf_dataset_eager.py
index ee3b15ec716..179aec54566 100644
--- a/dali/test/python/test_dali_tf_dataset_eager.py
+++ b/dali/test/python/test_dali_tf_dataset_eager.py
@@ -18,7 +18,8 @@
 import nvidia.dali.plugin.tf as dali_tf
 from nvidia.dali.plugin.tf.experimental import Input
 from nvidia.dali import fn
-from nose_utils import with_setup, raises
+from nose2.tools import params
+from nose_utils import raises
 from test_dali_tf_dataset_pipelines import (
     FixedSampleIterator,
     RandomSampleIterator,
@@ -81,14 +82,25 @@ def run_tf_dataset_with_constant_input(dev, shape, value, dtype, batch):
     run_tf_dataset_eager_mode(dev, get_pipeline_desc=get_pipeline_desc, to_dataset=to_dataset)
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_constant_input():
+def _generate_tf_dataset_with_constant_input_test_cases():
+    rng = random.Random(42)
+    cases = []
     for dev in ["cpu", "gpu"]:
         for shape in [(7, 42), (64, 64, 3), (3, 40, 40, 4)]:
             for dtype in [np.uint8, np.int32, np.float32]:
                 for batch in ["dataset", True, False, None]:
-                    value = random.choice([42, 255])
-                    yield run_tf_dataset_with_constant_input, dev, shape, value, dtype, batch
+                    value = rng.choice([42, 255])
+                    cases.append((dev, shape, value, dtype, batch))
+    return cases
+
+
+class TestTFDatasetWithInputs:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_generate_tf_dataset_with_constant_input_test_cases())
+    def test_tf_dataset_with_constant_input(self, dev, shape, value, dtype, batch):
+        run_tf_dataset_with_constant_input(dev, shape, value, dtype, batch)
 
 
 def run_tf_dataset_with_random_input(dev, max_shape, dtype, batch="dataset"):
@@ -101,13 +113,22 @@ def run_tf_dataset_with_random_input(dev, max_shape, dtype, batch="dataset"):
     run_tf_dataset_eager_mode(dev, get_pipeline_desc=get_pipeline_desc, to_dataset=to_dataset)
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_random_input():
-    for dev in ["cpu", "gpu"]:
-        for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
-            for dtype in [np.uint8, np.int32, np.float32]:
-                for batch in ["dataset", False, True, None]:
-                    yield run_tf_dataset_with_random_input, dev, max_shape, dtype, batch
+_tf_dataset_with_random_input_test_cases = [
+    (dev, max_shape, dtype, batch)
+    for dev in ["cpu", "gpu"]
+    for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]
+    for dtype in [np.uint8, np.int32, np.float32]
+    for batch in ["dataset", False, True, None]
+]
+
+
+class TestTFDatasetWithRandomInput:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_tf_dataset_with_random_input_test_cases)
+    def test_tf_dataset_with_random_input(self, dev, max_shape, dtype, batch):
+        run_tf_dataset_with_random_input(dev, max_shape, dtype, batch)
 
 
 # Run with everything on GPU (External Source op as well)
@@ -121,12 +142,21 @@ def run_tf_dataset_with_random_input_gpu(max_shape, dtype, batch):
     run_tf_dataset_eager_mode("gpu", get_pipeline_desc=get_pipeline_desc, to_dataset=to_dataset)
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_random_input_gpu():
-    for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
-        for dtype in [np.uint8, np.int32, np.float32]:
-            for batch in ["dataset", False, True, None]:
-                yield run_tf_dataset_with_random_input_gpu, max_shape, dtype, batch
+_tf_dataset_with_random_input_gpu_test_cases = [
+    (max_shape, dtype, batch)
+    for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]
+    for dtype in [np.uint8, np.int32, np.float32]
+    for batch in ["dataset", False, True, None]
+]
+
+
+class TestTFDatasetWithRandomInputGPU:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_tf_dataset_with_random_input_gpu_test_cases)
+    def test_tf_dataset_with_random_input_gpu(self, max_shape, dtype, batch):
+        run_tf_dataset_with_random_input_gpu(max_shape, dtype, batch)
 
 
 def run_tf_dataset_no_copy(max_shape, dtype, dataset_dev, es_dev, no_copy):
@@ -140,15 +170,25 @@ def run_tf_dataset_no_copy(max_shape, dtype, dataset_dev, es_dev, no_copy):
 
 
 # Check if setting no_copy flags in all placement scenarios is ok as we override it internally
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_no_copy():
+def _generate_tf_dataset_with_no_copy_test_cases():
+    cases = []
     for max_shape in [(10, 20), (120, 120, 3)]:
         for dataset_dev in ["cpu", "gpu"]:
             for es_dev in ["cpu", "gpu"]:
                 if dataset_dev == "cpu" and es_dev == "gpu":
                     continue  # GPU op in CPU dataset not supported
                 for no_copy in [True, False, None]:
-                    yield run_tf_dataset_no_copy, max_shape, np.uint8, dataset_dev, es_dev, no_copy
+                    cases.append((max_shape, np.uint8, dataset_dev, es_dev, no_copy))
+    return cases
+
+
+class TestTFDatasetWithNoCopy:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_generate_tf_dataset_with_no_copy_test_cases())
+    def test_tf_dataset_with_no_copy(self, max_shape, dtype, dataset_dev, es_dev, no_copy):
+        run_tf_dataset_no_copy(max_shape, dtype, dataset_dev, es_dev, no_copy)
 
 
 def run_tf_dataset_with_stop_iter(dev, max_shape, dtype, stop_samples):
@@ -162,20 +202,22 @@ def run_tf_dataset_with_stop_iter(dev, max_shape, dtype, stop_samples):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_stop_iter():
-    batch_size = 12
-    for dev in ["cpu", "gpu"]:
-        for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
-            for dtype in [np.uint8, np.int32, np.float32]:
-                for iters in [1, 2, 3, 4, 5]:
-                    yield (
-                        run_tf_dataset_with_stop_iter,
-                        dev,
-                        max_shape,
-                        dtype,
-                        iters * batch_size - 3,
-                    )
+class TestTFDatasetWithStopIter:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    def test_tf_dataset_with_stop_iter(self):
+        batch_size = 12
+        for dev in ["cpu", "gpu"]:
+            for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
+                for dtype in [np.uint8, np.int32, np.float32]:
+                    for iters in [1, 2, 3, 4, 5]:
+                        run_tf_dataset_with_stop_iter(
+                            dev,
+                            max_shape,
+                            dtype,
+                            iters * batch_size - 3,
+                        )
 
 
 def run_tf_dataset_multi_input(dev, start_values, input_names, batches):
@@ -199,13 +241,23 @@ def run_tf_dataset_multi_input(dev, start_values, input_names, batches):
 input_names = [["input_{}".format(i) for i, _ in enumerate(vals)] for vals in start_values]
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_multi_input():
+def _generate_tf_dataset_multi_input_test_cases():
+    cases = []
     for dev in ["cpu", "gpu"]:
         for starts, names in zip(start_values, input_names):
-            yield run_tf_dataset_multi_input, dev, starts, names, ["dataset" for _ in input_names]
+            cases.append((dev, starts, names, ["dataset" for _ in input_names]))
             for batches in list(itertools.product([True, False], repeat=len(input_names))):
-                yield run_tf_dataset_multi_input, dev, starts, names, batches
+                cases.append((dev, starts, names, batches))
+    return cases
+
+
+class TestTFDatasetMultiInput:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_generate_tf_dataset_multi_input_test_cases())
+    def test_tf_dataset_multi_input(self, dev, starts, names, batches):
+        run_tf_dataset_multi_input(dev, starts, names, batches)
 
 
 @raises(tf.errors.InternalError, glob="TF device and DALI device mismatch")
@@ -264,40 +316,47 @@ def check_tf_dataset_wrong_input_type(wrong_input_datasets):
     check_basic_dataset_build(wrong_input_datasets)
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_wrong_input_type():
-    input_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
-    # wrong `input_datasets` type (no dictionary)
-    for wrong_input_dataset in ["a", input_dataset, [input_dataset]]:
-        yield check_tf_dataset_wrong_input_type, wrong_input_dataset
-    # wrong values in dictionary
-    for wrong_input_dataset in ["str", [input_dataset]]:
-        yield check_tf_dataset_wrong_input_type, {
-            "a": wrong_input_dataset,
-            "b": wrong_input_dataset,
-        }
-    # wrong keys in dictionary
-    for wrong_input_name in [42, ("a", "b")]:
-        yield check_tf_dataset_wrong_input_type, {wrong_input_name: input_dataset}
-
-
-@raises(
-    ValueError,
-    glob="Found External Source nodes in the Pipeline, that were not assigned any inputs.",
-)
-@with_setup(skip_for_incompatible_tf)
-def test_input_not_provided():
-    input_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
-    check_basic_dataset_build({"a": input_dataset})
-
+class TestTFDatasetInputValidation:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    def test_tf_dataset_wrong_input_type(self):
+        input_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
+        # wrong `input_datasets` type (no dictionary)
+        for wrong_input_dataset in ["a", input_dataset, [input_dataset]]:
+            check_tf_dataset_wrong_input_type(wrong_input_dataset)
+        # wrong values in dictionary
+        for wrong_input_dataset in ["str", [input_dataset]]:
+            check_tf_dataset_wrong_input_type(
+                {
+                    "a": wrong_input_dataset,
+                    "b": wrong_input_dataset,
+                }
+            )
+        # wrong keys in dictionary
+        for wrong_input_name in [42, ("a", "b")]:
+            check_tf_dataset_wrong_input_type({wrong_input_name: input_dataset})
+
+
+class TestTFDatasetExternalSourceValidation:
+    def setUp(self):
+        skip_for_incompatible_tf()
+
+    @raises(
+        ValueError,
+        glob="Found External Source nodes in the Pipeline, that were not assigned any inputs.",
+    )
+    def test_input_not_provided(self):
+        input_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
+        check_basic_dataset_build({"a": input_dataset})
 
-@raises(
-    ValueError, glob="Did not find an External Source placeholder node * in the provided pipeline"
-)
-@with_setup(skip_for_incompatible_tf)
-def test_missing_es_node():
-    input_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
-    check_basic_dataset_build({"a": input_dataset, "b": input_dataset, "c": input_dataset})
+    @raises(
+        ValueError,
+        glob="Did not find an External Source placeholder node * in the provided pipeline",
+    )
+    def test_missing_es_node(self):
+        input_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
+        check_basic_dataset_build({"a": input_dataset, "b": input_dataset, "c": input_dataset})
 
 
 @pipeline_def(batch_size=10, num_threads=4, device_id=0)
@@ -321,31 +380,32 @@ def check_single_es_pipeline(kwargs, input_datasets):
         return dali_dataset
 
 
-@raises(
-    ValueError, glob="Did not find an External Source placeholder node * in the provided pipeline"
-)
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_es_with_source():
-    in_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
-    check_single_es_pipeline({"name": "a", "source": []}, {"a": in_dataset})
-
+class TestTFDatasetESParameters:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
 
-@raises(
-    ValueError,
-    glob="The parameter ``num_outputs`` is only valid when using ``source`` to provide data.",
-)
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_es_num_outputs_provided():
-    in_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
-    check_single_es_pipeline({"name": "a", "num_outputs": 1}, {"a": in_dataset})
+    @raises(
+        ValueError,
+        glob="Did not find an External Source placeholder node * in the provided pipeline",
+    )
+    def test_tf_dataset_es_with_source(self):
+        in_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
+        check_single_es_pipeline({"name": "a", "source": []}, {"a": in_dataset})
 
+    @raises(
+        ValueError,
+        glob="The parameter ``num_outputs`` is only valid when using ``source`` to provide data.",
+    )
+    def test_tf_dataset_es_num_outputs_provided(self):
+        in_dataset = tf.data.Dataset.from_tensors(np.full((2, 2), 42)).repeat()
+        check_single_es_pipeline({"name": "a", "num_outputs": 1}, {"a": in_dataset})
 
-@raises(
-    ValueError, glob="Found placeholder External Source node * in the Pipeline that was not named"
-)
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_disallowed_es():
-    check_single_es_pipeline({}, {})
+    @raises(
+        ValueError,
+        glob="Found placeholder External Source node * in the Pipeline that was not named",
+    )
+    def test_tf_dataset_disallowed_es(self):
+        check_single_es_pipeline({}, {})
 
 
 def check_layout(kwargs, input_datasets, layout):
@@ -378,21 +438,25 @@ def run_tf_with_dali_external_source(dev, es_args, ed_dev, dtype, *_):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_with_dali_external_source():
-    yield from gen_tf_with_dali_external_source(run_tf_with_dali_external_source)
+class TestTFWithDALIExternalSource:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
 
+    @params(*gen_tf_with_dali_external_source(run_tf_with_dali_external_source))
+    def test_tf_with_dali_external_source(
+        self, test_run, dev, es_args, es_dev, dtype, iter_limit, dense
+    ):
+        test_run(dev, es_args, es_dev, dtype, iter_limit, dense)
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_layouts():
-    for shape, layout in [((2, 3), "XY"), ((10, 20, 3), "HWC"), ((4, 128, 64, 3), "FHWC")]:
-        in_dataset = tf.data.Dataset.from_tensors(np.full(shape, 42)).repeat()
-        # Captured from pipeline
-        yield check_layout, {"layout": layout, "name": "in"}, {"in": in_dataset}, layout
-        # Captured from pipeline
-        yield check_layout, {"layout": layout, "name": "in"}, {"in": Input(in_dataset)}, layout
-        # Set via experimental.Input, not specified in external source
-        yield check_layout, {"name": "in"}, {"in": Input(in_dataset, layout=layout)}, layout
+    def test_tf_dataset_layouts(self):
+        for shape, layout in [((2, 3), "XY"), ((10, 20, 3), "HWC"), ((4, 128, 64, 3), "FHWC")]:
+            in_dataset = tf.data.Dataset.from_tensors(np.full(shape, 42)).repeat()
+            # Captured from pipeline
+            check_layout({"layout": layout, "name": "in"}, {"in": in_dataset}, layout)
+            # Captured from pipeline
+            check_layout({"layout": layout, "name": "in"}, {"in": Input(in_dataset)}, layout)
+            # Set via experimental.Input, not specified in external source
+            check_layout({"name": "in"}, {"in": Input(in_dataset, layout=layout)}, layout)
 
 
 # Test if the TypeError is raised for unsupported arguments for regular DALIDataset
@@ -427,6 +491,9 @@ def _test_tf_dataset_multigpu_manual_placement():
 
 
 # This test should be private (name starts with _) as it is called separately in L1
-@with_setup(skip_for_incompatible_tf)
-def _test_tf_dataset_multigpu_mirrored_strategy():
-    run_tf_dataset_multigpu_eager_mirrored_strategy()
+class TestTFDatasetMultiGPU:
+    def setUp(self):
+        skip_for_incompatible_tf()
+
+    def _test_tf_dataset_multigpu_mirrored_strategy(self):
+        run_tf_dataset_multigpu_eager_mirrored_strategy()
diff --git a/dali/test/python/test_dali_tf_dataset_graph.py b/dali/test/python/test_dali_tf_dataset_graph.py
index 32702536a8f..d35a9800e69 100644
--- a/dali/test/python/test_dali_tf_dataset_graph.py
+++ b/dali/test/python/test_dali_tf_dataset_graph.py
@@ -16,7 +16,8 @@
 import numpy as np
 import random as random
 import tensorflow as tf
-from nose_utils import with_setup, raises
+from nose2.tools import params, cartesian_params
+from nose_utils import raises
 from test_dali_tf_dataset_pipelines import (
     FixedSampleIterator,
     external_source_tester,
@@ -63,14 +64,25 @@ def run_tf_dataset_with_constant_input(dev, shape, value, dtype, batch):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_constant_input():
+def _generate_tf_dataset_with_constant_input_test_cases():
+    rng = random.Random(42)
+    cases = []
     for dev in ["cpu", "gpu"]:
         for shape in [(7, 42), (64, 64, 3), (3, 40, 40, 4)]:
             for dtype in [np.uint8, np.int32, np.float32]:
                 for batch in ["dataset", True, False, None]:
-                    value = random.choice([42, 255])
-                    yield run_tf_dataset_with_constant_input, dev, shape, value, dtype, batch
+                    value = rng.choice([42, 255])
+                    cases.append((dev, shape, value, dtype, batch))
+    return cases
+
+
+class TestTFDatasetWithConstantInput:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_generate_tf_dataset_with_constant_input_test_cases())
+    def test_tf_dataset_with_constant_input(self, dev, shape, value, dtype, batch):
+        run_tf_dataset_with_constant_input(dev, shape, value, dtype, batch)
 
 
 def run_tf_dataset_with_random_input(dev, max_shape, dtype, batch):
@@ -85,13 +97,18 @@ def run_tf_dataset_with_random_input(dev, max_shape, dtype, batch):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_random_input():
-    for dev in ["cpu", "gpu"]:
-        for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
-            for dtype in [np.uint8, np.int32, np.float32]:
-                for batch in ["dataset", True, False, None]:
-                    yield run_tf_dataset_with_random_input, dev, max_shape, dtype, batch
+class TestTFDatasetWithRandomInput:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @cartesian_params(
+        ["cpu", "gpu"],
+        [(10, 20), (120, 120, 3), (3, 40, 40, 4)],
+        [np.uint8, np.int32, np.float32],
+        ["dataset", True, False, None],
+    )
+    def test_tf_dataset_with_random_input(self, dev, max_shape, dtype, batch):
+        run_tf_dataset_with_random_input(dev, max_shape, dtype, batch)
 
 
 # Run with everything on GPU (External Source op as well)
@@ -107,12 +124,17 @@ def run_tf_dataset_with_random_input_gpu(max_shape, dtype, batch):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_random_input_gpu():
-    for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
-        for dtype in [np.uint8, np.int32, np.float32]:
-            for batch in ["dataset", True, False, None]:
-                yield run_tf_dataset_with_random_input_gpu, max_shape, dtype, batch
+class TestTFDatasetWithRandomInputGPU:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @cartesian_params(
+        [(10, 20), (120, 120, 3), (3, 40, 40, 4)],
+        [np.uint8, np.int32, np.float32],
+        ["dataset", True, False, None],
+    )
+    def test_tf_dataset_with_random_input_gpu(self, max_shape, dtype, batch):
+        run_tf_dataset_with_random_input_gpu(max_shape, dtype, batch)
 
 
 def run_tf_dataset_no_copy(max_shape, dtype, dataset_dev, es_dev, no_copy):
@@ -126,15 +148,25 @@ def run_tf_dataset_no_copy(max_shape, dtype, dataset_dev, es_dev, no_copy):
 
 
 # Check if setting no_copy flags in all placement scenarios is ok as we override it internally
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_no_copy():
+def _generate_tf_dataset_with_no_copy_test_cases():
+    cases = []
     for max_shape in [(10, 20), (120, 120, 3)]:
         for dataset_dev in ["cpu", "gpu"]:
             for es_dev in ["cpu", "gpu"]:
                 if dataset_dev == "cpu" and es_dev == "gpu":
                     continue  # GPU op in CPU dataset not supported
                 for no_copy in [True, False, None]:
-                    yield run_tf_dataset_no_copy, max_shape, np.uint8, dataset_dev, es_dev, no_copy
+                    cases.append((max_shape, np.uint8, dataset_dev, es_dev, no_copy))
+    return cases
+
+
+class TestTFDatasetWithNoCopy:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_generate_tf_dataset_with_no_copy_test_cases())
+    def test_tf_dataset_with_no_copy(self, max_shape, dtype, dataset_dev, es_dev, no_copy):
+        run_tf_dataset_no_copy(max_shape, dtype, dataset_dev, es_dev, no_copy)
 
 
 def run_tf_dataset_with_stop_iter(dev, max_shape, dtype, stop_samples):
@@ -150,20 +182,24 @@ def run_tf_dataset_with_stop_iter(dev, max_shape, dtype, stop_samples):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_with_stop_iter():
-    batch_size = 12
-    for dev in ["cpu", "gpu"]:
-        for max_shape in [(10, 20), (120, 120, 3), (3, 40, 40, 4)]:
-            for dtype in [np.uint8, np.int32, np.float32]:
-                for iters in [1, 2, 3, 4, 5]:
-                    yield (
-                        run_tf_dataset_with_stop_iter,
-                        dev,
-                        max_shape,
-                        dtype,
-                        iters * batch_size - 3,
-                    )
+class TestTFDatasetWithStopIter:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @cartesian_params(
+        ["cpu", "gpu"],
+        [(10, 20), (120, 120, 3), (3, 40, 40, 4)],
+        [np.uint8, np.int32, np.float32],
+        [1, 2, 3, 4, 5],
+    )
+    def test_tf_dataset_with_stop_iter(self, dev, max_shape, dtype, iters):
+        batch_size = 12
+        run_tf_dataset_with_stop_iter(
+            dev,
+            max_shape,
+            dtype,
+            iters * batch_size - 3,
+        )
 
 
 def run_tf_dataset_multi_input(dev, start_values, input_names, batches):
@@ -187,13 +223,23 @@ def run_tf_dataset_multi_input(dev, start_values, input_names, batches):
 input_names = [["input_{}".format(i) for i, _ in enumerate(vals)] for vals in start_values]
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_multi_input():
+def _generate_tf_dataset_multi_input_test_cases():
+    cases = []
     for dev in ["cpu", "gpu"]:
         for starts, names in zip(start_values, input_names):
-            yield run_tf_dataset_multi_input, dev, starts, names, ["dataset" for _ in input_names]
+            cases.append((dev, starts, names, ["dataset" for _ in input_names]))
             for batches in list(itertools.product([True, False], repeat=len(input_names))):
-                yield run_tf_dataset_multi_input, dev, starts, names, batches
+                cases.append((dev, starts, names, batches))
+    return cases
+
+
+class TestTFDatasetMultiInput:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*_generate_tf_dataset_multi_input_test_cases())
+    def test_tf_dataset_multi_input(self, dev, starts, names, batches):
+        run_tf_dataset_multi_input(dev, starts, names, batches)
 
 
 def run_tf_with_dali_external_source(dev, es_args, ed_dev, dtype, *_):
@@ -205,9 +251,15 @@ def run_tf_with_dali_external_source(dev, es_args, ed_dev, dtype, *_):
     )
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_with_dali_external_source():
-    yield from gen_tf_with_dali_external_source(run_tf_with_dali_external_source)
+class TestTFWithDALIExternalSource:
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    @params(*gen_tf_with_dali_external_source(run_tf_with_dali_external_source))
+    def test_tf_with_dali_external_source(
+        self, test_run, dev, es_args, es_dev, dtype, iter_limit, dense
+    ):
+        test_run(dev, es_args, es_dev, dtype, iter_limit, dense)
 
 
 tf_dataset_wrong_placement_error_msg = (
diff --git a/dali/test/python/test_dali_tf_dataset_mnist_eager.py b/dali/test/python/test_dali_tf_dataset_mnist_eager.py
index dd742c314de..e26600e2e69 100644
--- a/dali/test/python/test_dali_tf_dataset_mnist_eager.py
+++ b/dali/test/python/test_dali_tf_dataset_mnist_eager.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import tensorflow as tf
-from nose_utils import with_setup, raises, SkipTest
+from nose_utils import raises, SkipTest
 
 import test_dali_tf_dataset_mnist as mnist
 from test_utils_tensorflow import skip_for_incompatible_tf, available_gpus
@@ -34,54 +34,60 @@ def test_keras_single_cpu():
     mnist.run_keras_single_device("cpu", 0)
 
 
-@with_setup(skip_for_incompatible_tf)
-@raises(tf.errors.OpError, "TF device and DALI device mismatch")
-def test_keras_wrong_placement_gpu():
-    with tf.device("cpu:0"):
-        model = mnist.keras_model()
-        train_dataset = mnist.get_dataset("gpu", 0)
+class TestKerasWrongPlacement:
+    def setUp(self):
+        skip_for_incompatible_tf()
 
-        model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
+    @raises(tf.errors.OpError, "TF device and DALI device mismatch")
+    def test_keras_wrong_placement_gpu(self):
+        with tf.device("cpu:0"):
+            model = mnist.keras_model()
+            train_dataset = mnist.get_dataset("gpu", 0)
 
+            model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
 
-@with_setup(skip_for_incompatible_tf)
-@raises(tf.errors.OpError, "TF device and DALI device mismatch")
-def test_keras_wrong_placement_cpu():
-    with tf.device("gpu:0"):
-        model = mnist.keras_model()
-        train_dataset = mnist.get_dataset("cpu", 0)
+    @raises(tf.errors.OpError, "TF device and DALI device mismatch")
+    def test_keras_wrong_placement_cpu(self):
+        with tf.device("gpu:0"):
+            model = mnist.keras_model()
+            train_dataset = mnist.get_dataset("cpu", 0)
 
-        model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
+            model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
 
 
-@with_setup(skip_for_incompatible_tf)
-def test_keras_multi_gpu_mirrored_strategy():
-    # due to compatibility problems between the driver, cuda version and
-    # TensorFlow 2.12 test_keras_multi_gpu_mirrored_strategy doesn't work.
-    if Version(tf.__version__) >= Version("2.12.0"):
-        raise SkipTest("This test is not supported for TensorFlow 2.12")
-    strategy = tf.distribute.MirroredStrategy(devices=available_gpus())
+class TestKerasMultiGPUMirroredStrategy:
+    def setUp(self):
+        skip_for_incompatible_tf()
 
-    with strategy.scope():
-        model = mnist.keras_model()
+    def test_keras_multi_gpu_mirrored_strategy(self):
+        # due to compatibility problems between the driver, cuda version and
+        # TensorFlow 2.12 test_keras_multi_gpu_mirrored_strategy doesn't work.
+        if Version(tf.__version__) >= Version("2.12.0"):
+            raise SkipTest("This test is not supported for TensorFlow 2.12")
+        strategy = tf.distribute.MirroredStrategy(devices=available_gpus())
 
-    train_dataset = mnist.get_dataset_multi_gpu(strategy)
+        with strategy.scope():
+            model = mnist.keras_model()
 
-    model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
+        train_dataset = mnist.get_dataset_multi_gpu(strategy)
+
+        model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
 
-    assert model.evaluate(train_dataset, steps=mnist.ITERATIONS)[1] > mnist.TARGET
+        assert model.evaluate(train_dataset, steps=mnist.ITERATIONS)[1] > mnist.TARGET
 
 
-@with_setup(mnist.clear_checkpoints, mnist.clear_checkpoints)
-def test_estimators_single_gpu():
-    mnist.run_estimators_single_device("gpu", 0)
+class TestEstimators:
+    def setUp(self):
+        mnist.clear_checkpoints()
 
+    def tearDown(self):
+        mnist.clear_checkpoints()
 
-@with_setup(mnist.clear_checkpoints, mnist.clear_checkpoints)
-def test_estimators_single_other_gpu():
-    mnist.run_estimators_single_device("gpu", 1)
+    def test_estimators_single_gpu(self):
+        mnist.run_estimators_single_device("gpu", 0)
 
+    def test_estimators_single_other_gpu(self):
+        mnist.run_estimators_single_device("gpu", 1)
 
-@with_setup(mnist.clear_checkpoints, mnist.clear_checkpoints)
-def test_estimators_single_cpu():
-    mnist.run_estimators_single_device("cpu", 0)
+    def test_estimators_single_cpu(self):
+        mnist.run_estimators_single_device("cpu", 0)
diff --git a/dali/test/python/test_dali_tf_dataset_mnist_graph.py b/dali/test/python/test_dali_tf_dataset_mnist_graph.py
index 0a1aba5441c..d679b82f1af 100644
--- a/dali/test/python/test_dali_tf_dataset_mnist_graph.py
+++ b/dali/test/python/test_dali_tf_dataset_mnist_graph.py
@@ -14,32 +14,23 @@
 
 import tensorflow as tf
 import tensorflow.compat.v1 as tf_v1
-from nose_utils import with_setup, SkipTest, raises
+from nose2.tools import params
+from nose_utils import SkipTest, raises
 import test_dali_tf_dataset_mnist as mnist
 from packaging.version import Version
 
 mnist.tf.compat.v1.disable_eager_execution()
 
 
-@with_setup(tf.keras.backend.clear_session)
-def test_keras_single_gpu():
-    if Version(tf.__version__) >= Version("2.16"):
-        raise SkipTest("TF < 2.16 is required for this test")
-    mnist.run_keras_single_device("gpu", 0)
-
-
-@with_setup(tf.keras.backend.clear_session)
-def test_keras_single_other_gpu():
-    if Version(tf.__version__) >= Version("2.16"):
-        raise SkipTest("TF < 2.16 is required for this test")
-    mnist.run_keras_single_device("gpu", 1)
+class TestKeras:
+    def setUp(self):
+        tf.keras.backend.clear_session()
 
-
-@with_setup(tf.keras.backend.clear_session)
-def test_keras_single_cpu():
-    if Version(tf.__version__) >= Version("2.16"):
-        raise SkipTest("TF < 2.16 is required for this test")
-    mnist.run_keras_single_device("cpu", 0)
+    @params(("gpu", 0), ("gpu", 1), ("cpu", 0))
+    def test_keras_single_device(self, device, device_id):
+        if Version(tf.__version__) >= Version("2.16"):
+            raise SkipTest("TF < 2.16 is required for this test")
+        mnist.run_keras_single_device(device, device_id)
 
 
 @raises(tf.errors.OpError, "TF device and DALI device mismatch. TF*: CPU, DALI*: GPU for output")
@@ -64,19 +55,13 @@ def test_keras_wrong_placement_cpu():
         model.fit(train_dataset, epochs=mnist.EPOCHS, steps_per_epoch=mnist.ITERATIONS)
 
 
-@with_setup(tf.compat.v1.reset_default_graph)
-def test_graph_single_gpu():
-    mnist.run_graph_single_device("gpu", 0)
-
-
-@with_setup(tf.compat.v1.reset_default_graph)
-def test_graph_single_cpu():
-    mnist.run_graph_single_device("cpu", 0)
-
+class TestGraph:
+    def setUp(self):
+        tf.compat.v1.reset_default_graph()
 
-@with_setup(tf.compat.v1.reset_default_graph)
-def test_graph_single_other_gpu():
-    mnist.run_graph_single_device("gpu", 1)
+    @params(("gpu", 0), ("cpu", 0), ("gpu", 1))
+    def test_graph_single_device(self, device, device_id):
+        mnist.run_graph_single_device(device, device_id)
 
 
 # This function is copied form:
@@ -107,61 +92,64 @@ def average_gradients(tower_grads):
     return average_grads
 
 
-@with_setup(tf_v1.reset_default_graph)
-def test_graph_multi_gpu():
-    iterator_initializers = []
+class TestGraphMultiGPU:
+    def setUp(self):
+        tf_v1.reset_default_graph()
 
-    with tf.device("/cpu:0"):
-        tower_grads = []
+    def test_graph_multi_gpu(self):
+        iterator_initializers = []
 
-        for i in range(mnist.num_available_gpus()):
-            with tf.device("/gpu:{}".format(i)):
-                daliset = mnist.get_dataset("gpu", i, i, mnist.num_available_gpus())
+        with tf.device("/cpu:0"):
+            tower_grads = []
 
-                iterator = tf_v1.data.make_initializable_iterator(daliset)
-                iterator_initializers.append(iterator.initializer)
-                images, labels = iterator.get_next()
+            for i in range(mnist.num_available_gpus()):
+                with tf.device("/gpu:{}".format(i)):
+                    daliset = mnist.get_dataset("gpu", i, i, mnist.num_available_gpus())
 
-                images = tf_v1.reshape(
-                    images, [mnist.BATCH_SIZE, mnist.IMAGE_SIZE * mnist.IMAGE_SIZE]
-                )
-                labels = tf_v1.reshape(
-                    tf_v1.one_hot(labels, mnist.NUM_CLASSES), [mnist.BATCH_SIZE, mnist.NUM_CLASSES]
-                )
+                    iterator = tf_v1.data.make_initializable_iterator(daliset)
+                    iterator_initializers.append(iterator.initializer)
+                    images, labels = iterator.get_next()
 
-                logits_train = mnist.graph_model(images, reuse=(i != 0), is_training=True)
-                logits_test = mnist.graph_model(images, reuse=True, is_training=False)
+                    images = tf_v1.reshape(
+                        images, [mnist.BATCH_SIZE, mnist.IMAGE_SIZE * mnist.IMAGE_SIZE]
+                    )
+                    labels = tf_v1.reshape(
+                        tf_v1.one_hot(labels, mnist.NUM_CLASSES),
+                        [mnist.BATCH_SIZE, mnist.NUM_CLASSES],
+                    )
 
-                loss_op = tf_v1.reduce_mean(
-                    tf_v1.nn.softmax_cross_entropy_with_logits(logits=logits_train, labels=labels)
-                )
-                optimizer = tf_v1.train.AdamOptimizer()
-                grads = optimizer.compute_gradients(loss_op)
+                    logits_train = mnist.graph_model(images, reuse=(i != 0), is_training=True)
+                    logits_test = mnist.graph_model(images, reuse=True, is_training=False)
 
-                if i == 0:
-                    correct_pred = tf_v1.equal(
-                        tf_v1.argmax(logits_test, 1), tf_v1.argmax(labels, 1)
+                    loss_op = tf_v1.reduce_mean(
+                        tf_v1.nn.softmax_cross_entropy_with_logits(
+                            logits=logits_train, labels=labels
+                        )
                     )
-                    accuracy = tf_v1.reduce_mean(tf_v1.cast(correct_pred, tf_v1.float32))
-
-                tower_grads.append(grads)
+                    optimizer = tf_v1.train.AdamOptimizer()
+                    grads = optimizer.compute_gradients(loss_op)
 
-        tower_grads = average_gradients(tower_grads)
-        train_step = optimizer.apply_gradients(tower_grads)
+                    if i == 0:
+                        correct_pred = tf_v1.equal(
+                            tf_v1.argmax(logits_test, 1), tf_v1.argmax(labels, 1)
+                        )
+                        accuracy = tf_v1.reduce_mean(tf_v1.cast(correct_pred, tf_v1.float32))
 
-    mnist.train_graph(iterator_initializers, train_step, accuracy)
+                    tower_grads.append(grads)
 
+            tower_grads = average_gradients(tower_grads)
+            train_step = optimizer.apply_gradients(tower_grads)
 
-@with_setup(mnist.clear_checkpoints, mnist.clear_checkpoints)
-def test_estimators_single_gpu():
-    mnist.run_estimators_single_device("gpu", 0)
+        mnist.train_graph(iterator_initializers, train_step, accuracy)
 
 
-@with_setup(mnist.clear_checkpoints, mnist.clear_checkpoints)
-def test_estimators_single_other_gpu():
-    mnist.run_estimators_single_device("gpu", 1)
+class TestEstimators:
+    def setUp(self):
+        mnist.clear_checkpoints()
 
+    def tearDown(self):
+        mnist.clear_checkpoints()
 
-@with_setup(mnist.clear_checkpoints, mnist.clear_checkpoints)
-def test_estimators_single_cpu():
-    mnist.run_estimators_single_device("cpu", 0)
+    @params(("gpu", 0), ("gpu", 1), ("cpu", 0))
+    def test_estimators_single_device(self, device, device_id):
+        mnist.run_estimators_single_device(device, device_id)
diff --git a/dali/test/python/test_dali_tf_exec2.py b/dali/test/python/test_dali_tf_exec2.py
index 3c015cf3e50..17dc13bf372 100644
--- a/dali/test/python/test_dali_tf_exec2.py
+++ b/dali/test/python/test_dali_tf_exec2.py
@@ -19,9 +19,9 @@
 import nvidia.dali.fn as fn
 import nvidia.dali.types as types
 import nvidia.dali.plugin.tf as dali_tf
-from nose_utils import with_setup
 from test_utils_tensorflow import skip_inputs_for_incompatible_tf
 from test_utils import get_dali_extra_path
+import unittest
 
 
 test_data_root = get_dali_extra_path()
@@ -44,34 +44,55 @@ def dali_exec2_pipeline():
     return output.cpu()
 
 
-@with_setup(skip_inputs_for_incompatible_tf)
-def test_tf_dataset_exec2():
-    """Test that exec_dynamic is propagated to DALI pipeline from dali_tf.DALIDatasetWithInputs"""
-    # From Tensorflow's perspective, this is a CPU pipeline
-    with tf.device("/cpu:0"):
-        dali_dataset = dali_tf.experimental.DALIDatasetWithInputs(
-            pipeline=dali_exec2_pipeline(),
-            batch_size=5,
-            output_shapes=(5,),
-            output_dtypes=(tf.int32),
-            num_threads=4,
-            device_id=0,
-        )
-
-        @tf.function
-        def tf_function_with_conditionals(dali_dataset):
-            negative = tf.constant(0)
-            positive = tf.constant(0)
-            for input in dali_dataset:
-                if tf.reduce_sum(input) < 0:
-                    negative = negative + 1
-                else:
-                    positive = positive + 1
-            return negative, positive
-
-        pos, neg = tf_function_with_conditionals(dali_dataset.take(5))
-        assert pos == 3
-        assert neg == 2
+class TestTFDatasetExec2(unittest.TestCase):
+    def setUp(self):
+        skip_inputs_for_incompatible_tf()
+
+    def test_tf_dataset_exec2(self):
+        """Test that exec_dynamic is propagated to DALI pipeline
+        by dali_tf.DALIDatasetWithInputs"""
+        # From Tensorflow's perspective, this is a CPU pipeline
+        with tf.device("/cpu:0"):
+            dali_dataset = dali_tf.experimental.DALIDatasetWithInputs(
+                pipeline=dali_exec2_pipeline(),
+                batch_size=5,
+                output_shapes=(5,),
+                output_dtypes=(tf.int32),
+                num_threads=4,
+                device_id=0,
+            )
+
+            @tf.function
+            def tf_function_with_conditionals(dali_dataset):
+                negative = tf.constant(0)
+                positive = tf.constant(0)
+                for input in dali_dataset:
+                    if tf.reduce_sum(input) < 0:
+                        negative = negative + 1
+                    else:
+                        positive = positive + 1
+                return negative, positive
+
+            pos, neg = tf_function_with_conditionals(dali_dataset.take(5))
+            # Eager mode: integers, graph mode: tensors, need to fetch value if it's Tensor
+            if (
+                tf.executing_eagerly() is False
+                or getattr(tf.compat.v1, "_eager_context", None) is not None
+            ):
+                # get concrete function and run in session for static graph mode
+                # fallback for session-based TF execution (e.g. when other test turned eager off)
+                try:
+                    from tensorflow.compat.v1 import Session
+                except ImportError:
+                    # Older TF versions don't have compat.v1 layer
+                    from tensorflow import Session
+
+                with Session() as sess:
+                    pos_val, neg_val = sess.run([pos, neg])
+            else:
+                pos_val, neg_val = pos, neg
+            assert pos_val == 3
+            assert neg_val == 2
 
 
 @pipeline_def(num_threads=4, exec_dynamic=True)
diff --git a/dali/test/python/test_dltensor_operator.py b/dali/test/python/test_dltensor_operator.py
index f96fdc11a5e..ec7cfe52767 100644
--- a/dali/test/python/test_dltensor_operator.py
+++ b/dali/test/python/test_dltensor_operator.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019, 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2019, 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 from nvidia.dali.pipeline import Pipeline
 from nvidia.dali import fn, pipeline_def
 from nvidia.dali.python_function_plugin import current_dali_stream
+from nose_utils import attr
 
 test_data_root = os.environ["DALI_EXTRA_PATH"]
 images_dir = os.path.join(test_data_root, "db", "single", "jpeg")
@@ -36,11 +37,6 @@ def setup_pytorch():
     torch_stream = torch.cuda.Stream()
 
 
-def setup_mxnet():
-    global mxnd
-    from mxnet import ndarray as mxnd
-
-
 def setup_cupy():
     global cupy
     global cupy_stream
@@ -182,6 +178,7 @@ def pytorch_red_channel_op(in1, in2):
     return [t.narrow(2, 0, 1).squeeze() for t in in1], [t.narrow(2, 0, 1).squeeze() for t in in2]
 
 
+@attr("pytorch")
 def test_pytorch():
     setup_pytorch()
     for testcase in [simple_pytorch_op, pytorch_red_channel_op]:
@@ -192,51 +189,6 @@ def test_pytorch():
     yield from _gpu_permuted_extents_torch_suite()
 
 
-def mxnet_adapter(fun, in1, in2):
-    tin1 = [mxnd.from_dlpack(dltensor) for dltensor in in1]
-    tin2 = [mxnd.from_dlpack(dltensor) for dltensor in in2]
-    tout1, tout2 = fun(tin1, tin2)
-    return [mxnd.to_dlpack_for_read(tout) for tout in tout1], [
-        mxnd.to_dlpack_for_read(tout) for tout in tout2
-    ]
-
-
-def mxnet_wrapper(fun):
-    return lambda in1, in2: mxnet_adapter(fun, in1, in2)
-
-
-def mxnet_compare(fun, pre1, pre2, post1, post2):
-    mxnet_pre1 = [mxnd.array(pre1.at(i)) for i in range(BATCH_SIZE)]
-    mxnet_pre2 = [mxnd.array(pre2.at(i)) for i in range(BATCH_SIZE)]
-    mxnet_post1, mxnet_post2 = fun(mxnet_pre1, mxnet_pre2)
-    for i in range(BATCH_SIZE):
-        assert numpy.array_equal(post1.at(i), mxnet_post1[i].asnumpy())
-        assert numpy.array_equal(post2.at(i), mxnet_post2[i].asnumpy())
-
-
-def mxnet_case(fun, device):
-    setup_mxnet()
-    common_case(mxnet_wrapper(fun), device, partial(mxnet_compare, fun))
-
-
-def mxnet_flatten(in1, in2):
-    return [mxnd.flatten(t) for t in in1], [mxnd.flatten(t) for t in in2]
-
-
-def mxnet_slice(in1, in2):
-    return [t[:, :, 1] for t in in1], [t[:, :, 2] for t in in2]
-
-
-def mxnet_cast(in1, in2):
-    return [mxnd.cast(t, dtype="float32") for t in in1], [mxnd.cast(t, dtype="int64") for t in in2]
-
-
-def test_mxnet():
-    for testcase in [mxnet_flatten, mxnet_slice, mxnet_cast]:
-        for device in ["cpu", "gpu"]:
-            yield mxnet_case, testcase, device
-
-
 def cupy_adapter_sync(fun, in1, in2):
     with cupy_stream:
         tin1 = [cupy.fromDlpack(dltensor) for dltensor in in1]
@@ -327,6 +279,7 @@ def cupy_kernel_gray_scale(in1, in2, stream=None):
     return out1, out2
 
 
+@attr("cupy")
 def test_cupy():
     setup_cupy()
     print(cupy)
@@ -335,6 +288,7 @@ def test_cupy():
     yield from _cupy_flip_with_negative_strides_suite()
 
 
+@attr("cupy")
 def test_cupy_kernel_gray_scale():
     setup_cupy()
     cupy_case(cupy_kernel_gray_scale, synchronize=False)
diff --git a/dali/test/python/test_external_source_impl_utils.py b/dali/test/python/test_external_source_impl_utils.py
index 1617e306a6b..6048b36c703 100644
--- a/dali/test/python/test_external_source_impl_utils.py
+++ b/dali/test/python/test_external_source_impl_utils.py
@@ -96,21 +96,6 @@ def test_pytorch_containers():
     yield from run_checks(samples_cpu, batches_cpu, disallowed_samples, [])
 
 
-@attr("mxnet")
-def test_mxnet_containers():
-    import mxnet as mx
-
-    samples_cpu = [
-        (mx.nd.array(test_array), test_array),
-    ]
-    batches_cpu = [
-        ([mx.nd.array(test_array)], [test_array]),
-        ([mx.nd.array(test_array)] * 4, [test_array] * 4),
-    ]
-    disallowed_samples = [mx.nd.array(test_array, ctx=mx.gpu(0))]
-    yield from run_checks(samples_cpu, batches_cpu, disallowed_samples, [])
-
-
 @attr("cupy")
 def test_cupy_containers():
     import cupy as cp
diff --git a/dali/test/python/test_external_source_parallel.py b/dali/test/python/test_external_source_parallel.py
index f74346a1b9f..d20667463ce 100644
--- a/dali/test/python/test_external_source_parallel.py
+++ b/dali/test/python/test_external_source_parallel.py
@@ -16,7 +16,12 @@
 import nvidia.dali as dali
 from nvidia.dali.types import SampleInfo, BatchInfo
 import test_external_source_parallel_utils as utils
-from nose_utils import raises, with_setup
+from test_pool_utils import setup_function, teardown_function, capture_processes
+from nose2.tools import params, cartesian_params
+from nose_utils import raises
+import unittest
+import functools
+import nvidia.dali.backend as _b
 
 
 def no_arg_fun():
@@ -128,45 +133,108 @@ def test_wrong_source():
         (generator_fun(), (TypeError, batch_required_msg.format("an iterable"))),
     ]
     for source, (error_type, error_msg) in disallowed_sources:
-        yield raises(error_type, error_msg)(check_source_build), source
+        raises(error_type, error_msg)(check_source_build)(source)
 
 
 # Test that we can launch several CPU-only pipelines by fork as we don't touch CUDA context.
-@with_setup(utils.setup_function, utils.teardown_function)
-def test_parallel_fork_cpu_only():
-    pipeline_pairs = 4
-    batch_size = 10
-    iters = 40
-    callback = utils.ExtCallback((4, 5), iters * batch_size, np.int32)
-    parallel_pipes = [
-        (
-            utils.create_pipe(
-                callback,
-                "cpu",
-                batch_size,
-                py_num_workers=4,
-                py_start_method="fork",
-                parallel=True,
-                device_id=None,
-            ),
-            utils.create_pipe(
-                callback,
-                "cpu",
+class TestParallelFork(unittest.TestCase):
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    # call it explicitly as it needs not GPU context in the process
+    def _test_parallel_fork_cpu_only(self):
+        pipeline_pairs = 4
+        batch_size = 10
+        iters = 40
+        callback = utils.ExtCallback((4, 5), iters * batch_size, np.int32)
+        parallel_pipes = [
+            (
+                utils.create_pipe(
+                    callback,
+                    "cpu",
+                    batch_size,
+                    py_num_workers=4,
+                    py_start_method="fork",
+                    parallel=True,
+                    device_id=None,
+                ),
+                utils.create_pipe(
+                    callback,
+                    "cpu",
+                    batch_size,
+                    py_num_workers=4,
+                    py_start_method="fork",
+                    parallel=True,
+                    device_id=None,
+                ),
+            )
+            for i in range(pipeline_pairs)
+        ]
+        for pipe0, pipe1 in parallel_pipes:
+            pipe0.build()
+            pipe1.build()
+            capture_processes(pipe0._py_pool)
+            capture_processes(pipe1._py_pool)
+            utils.compare_pipelines(pipe0, pipe1, batch_size, iters)
+
+    def test_parallel_fork(self):
+        epoch_size = 250
+        callback = utils.ExtCallback((4, 5), epoch_size, np.int32)
+        # if context is already initialized, use spawn to avoid fork wich will fail immediately
+        init_method = "fork" if not _b.IsDriverInitialized() else "spawn"
+        pipes = [
+            (
+                utils.create_pipe(
+                    callback,
+                    "cpu",
+                    batch_size,
+                    py_num_workers=num_workers,
+                    py_start_method=init_method,
+                    parallel=True,
+                ),
+                utils.create_pipe(callback, "cpu", batch_size, parallel=False),
+                dtype,
                 batch_size,
-                py_num_workers=4,
-                py_start_method="fork",
-                parallel=True,
-                device_id=None,
-            ),
+            )
+            for dtype in [np.float32, np.int16]
+            for num_workers in [1, 3, 4]
+            for batch_size in [1, 16, 150, 250]
+        ]
+        pipes.append(
+            (
+                utils.create_pipe(
+                    Iterable(32, (4, 5), dtype=np.int16),
+                    "cpu",
+                    32,
+                    py_num_workers=1,
+                    py_start_method=init_method,
+                    parallel=True,
+                    batch=True,
+                ),
+                utils.create_pipe(
+                    Iterable(32, (4, 5), dtype=np.int16), "cpu", 32, parallel=False, batch=True
+                ),
+                np.int16,
+                32,
+            )
         )
-        for i in range(pipeline_pairs)
-    ]
-    for pipe0, pipe1 in parallel_pipes:
-        pipe0.build()
-        pipe1.build()
-        utils.capture_processes(pipe0._py_pool)
-        utils.capture_processes(pipe1._py_pool)
-        utils.compare_pipelines(pipe0, pipe1, batch_size, iters)
+
+        for parallel_pipe, _, _, _ in pipes:
+            parallel_pipe.start_py_workers()
+        for parallel_pipe, pipe, dtype, batch_size in pipes:
+            utils.check_callback(parallel_pipe, pipe, epoch_size, batch_size, dtype)
+            parallel_pipe._py_pool.close()
+        # test that another pipeline with forking initialization fails
+        # as there is CUDA contexts already initialized
+        parallel_pipe = utils.create_pipe(
+            callback, "cpu", 16, py_num_workers=4, py_start_method="fork", parallel=True
+        )
+        raises(
+            RuntimeError, "Cannot fork a process when the CUDA has been initialized in the process."
+        )(utils.build_and_run_pipeline)(parallel_pipe, 1)
 
 
 @raises(
@@ -189,117 +257,67 @@ def test_parallel_no_workers():
     parallel_pipe.build()
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def test_parallel_fork():
-    epoch_size = 250
-    callback = utils.ExtCallback((4, 5), epoch_size, np.int32)
-    pipes = [
-        (
-            utils.create_pipe(
-                callback,
-                "cpu",
-                batch_size,
-                py_num_workers=num_workers,
-                py_start_method="fork",
-                parallel=True,
-            ),
-            utils.create_pipe(callback, "cpu", batch_size, parallel=False),
-            dtype,
-            batch_size,
-        )
-        for dtype in [np.float32, np.int16]
-        for num_workers in [1, 3, 4]
-        for batch_size in [1, 16, 150, 250]
-    ]
-    pipes.append(
-        (
-            utils.create_pipe(
-                Iterable(32, (4, 5), dtype=np.int16),
-                "cpu",
-                32,
-                py_num_workers=1,
-                py_start_method="fork",
-                parallel=True,
-                batch=True,
-            ),
-            utils.create_pipe(
-                Iterable(32, (4, 5), dtype=np.int16), "cpu", 32, parallel=False, batch=True
-            ),
-            np.int16,
-            32,
-        )
-    )
-    for parallel_pipe, _, _, _ in pipes:
-        parallel_pipe.start_py_workers()
-    for parallel_pipe, pipe, dtype, batch_size in pipes:
-        yield utils.check_callback, parallel_pipe, pipe, epoch_size, batch_size, dtype
-        # explicitly call py_pool close
-        # as nose might still reference parallel_pipe from the yield above
-        parallel_pipe._py_pool.close()
-    # test that another pipeline with forking initialization fails
-    # as there is CUDA contexts already initialized
-    parallel_pipe = utils.create_pipe(
-        callback, "cpu", 16, py_num_workers=4, py_start_method="fork", parallel=True
-    )
-    yield raises(
-        RuntimeError, "Cannot fork a process when the CUDA has been initialized in the process."
-    )(utils.build_and_run_pipeline), parallel_pipe, 1
-
-
-def test_dtypes():
-    yield from utils.check_spawn_with_callback(utils.ExtCallback)
+class TestSimpleCallbacks:
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-def test_random_data():
-    yield from utils.check_spawn_with_callback(
-        utils.ExtCallback, shapes=[(100, 40, 3), (8, 64, 64, 3)], random_data=True
-    )
+    def test_dtypes(self):
+        utils.check_spawn_with_callback(utils.ExtCallback)
 
+    def test_random_data(self):
+        utils.check_spawn_with_callback(
+            utils.ExtCallback, shapes=[(100, 40, 3), (8, 64, 64, 3)], random_data=True
+        )
 
-def test_randomly_shaped_data():
-    yield from utils.check_spawn_with_callback(
-        utils.ExtCallback,
-        shapes=[(100, 40, 3), (8, 64, 64, 3)],
-        random_data=True,
-        random_shape=True,
-    )
+    def test_randomly_shaped_data(self):
+        utils.check_spawn_with_callback(
+            utils.ExtCallback,
+            shapes=[(100, 40, 3), (8, 64, 64, 3)],
+            random_data=True,
+            random_shape=True,
+        )
 
+    def test_num_outputs(self):
+        utils.check_spawn_with_callback(
+            utils.ExtCallbackMultipleOutputs,
+            utils.ExtCallbackMultipleOutputs,
+            num_outputs=2,
+            dtypes=[np.uint8, float],
+        )
 
-def test_num_outputs():
-    yield from utils.check_spawn_with_callback(
-        utils.ExtCallbackMultipleOutputs,
-        utils.ExtCallbackMultipleOutputs,
-        num_outputs=2,
-        dtypes=[np.uint8, float],
-    )
+    def test_tensor_cpu(self):
+        utils.check_spawn_with_callback(utils.ExtCallbackTensorCPU)
 
 
-def test_tensor_cpu():
-    yield from utils.check_spawn_with_callback(utils.ExtCallbackTensorCPU)
+class TestExceptionPropagation:
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_exception_propagation(callback, batch_size, num_workers, expected):
-    pipe = utils.create_pipe(
-        callback,
-        "cpu",
-        batch_size,
-        py_num_workers=num_workers,
-        py_start_method="spawn",
-        parallel=True,
+    @cartesian_params(
+        [(StopIteration, StopIteration), (utils.CustomException, Exception)],
+        [1, 4],
+        [1, 15, 150],
     )
-    raises(expected)(utils.build_and_run_pipeline)(pipe, None)
-
-
-def test_exception_propagation():
-    for raised, expected in [(StopIteration, StopIteration), (utils.CustomException, Exception)]:
+    def test_exception_propagation(self, exceptions, batch_size, num_workers):
+        raised, expected = exceptions
         callback = utils.ExtCallback((4, 4), 250, np.int32, exception_class=raised)
-        for num_workers in [1, 4]:
-            for batch_size in [1, 15, 150]:
-                yield _test_exception_propagation, callback, batch_size, num_workers, expected
+        pipe = utils.create_pipe(
+            callback,
+            "cpu",
+            batch_size,
+            py_num_workers=num_workers,
+            py_start_method="spawn",
+            parallel=True,
+        )
+        raises(expected)(utils.build_and_run_pipeline)(pipe, None)
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
 def _test_stop_iteration_resume(callback, batch_size, layout, num_workers):
     pipe = utils.create_pipe(
         callback,
@@ -313,34 +331,57 @@ def _test_stop_iteration_resume(callback, batch_size, layout, num_workers):
     utils.check_stop_iteration_resume(pipe, batch_size, layout)
 
 
-def test_stop_iteration_resume():
-    callback = utils.ExtCallback((4, 4), 250, "int32")
-    layout = "XY"
-    for num_workers in [1, 4]:
-        for batch_size in [1, 15, 150]:
-            yield _test_stop_iteration_resume, callback, batch_size, layout, num_workers
+class TestStopIterationResume:
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_layout(callback, batch_size, layout, num_workers):
-    pipe = utils.create_pipe(
-        callback,
-        "cpu",
-        batch_size,
-        layout=layout,
-        py_num_workers=num_workers,
-        py_start_method="spawn",
-        parallel=True,
+    @cartesian_params(
+        [1, 4],
+        [1, 15, 150],
     )
-    utils.check_layout(pipe, layout)
+    def test_stop_iteration_resume(self, num_workers, batch_size):
+        callback = utils.ExtCallback((4, 4), 250, "int32")
+        layout = "XY"
+        pipe = utils.create_pipe(
+            callback,
+            "cpu",
+            batch_size,
+            layout=layout,
+            py_num_workers=num_workers,
+            py_start_method="spawn",
+            parallel=True,
+        )
+        utils.check_stop_iteration_resume(pipe, batch_size, layout)
+
 
+class TestLayout:
+    def setUp(self):
+        setup_function()
 
-def test_layout():
-    for layout, dims in zip(["X", "XY", "XYZ"], ((4,), (4, 4), (4, 4, 4))):
+    def tearDown(self):
+        teardown_function()
+
+    @cartesian_params(
+        [((4,), "X"), ((4, 4), "XY"), ((4, 4, 4), "XYZ")],
+        [1, 4],
+        [1, 256, 600],
+    )
+    def test_layout(self, inputs_description, batch_size, num_workers):
+        dims, layout = inputs_description
         callback = utils.ExtCallback(dims, 1024, "int32")
-        for num_workers in [1, 4]:
-            for batch_size in [1, 256, 600]:
-                yield _test_layout, callback, batch_size, layout, num_workers
+        pipe = utils.create_pipe(
+            callback,
+            "cpu",
+            batch_size,
+            layout=layout,
+            py_num_workers=num_workers,
+            py_start_method="spawn",
+            parallel=True,
+        )
+        utils.check_layout(pipe, layout)
 
 
 class ext_cb:
@@ -352,38 +393,6 @@ def __call__(self, sinfo):
         return np.full(self.shape, sinfo.idx_in_epoch, dtype=np.int32)
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_vs_non_parallel(batch_size, cb_parallel, cb_seq, batch, py_num_workers):
-    pipe = dali.Pipeline(
-        batch_size=batch_size,
-        device_id=None,
-        num_threads=5,
-        py_num_workers=py_num_workers,
-        py_start_method="spawn",
-    )
-    with pipe:
-        ext_seq = dali.fn.external_source(cb_parallel, batch=batch, parallel=False)
-        ext_par = dali.fn.external_source(cb_seq, batch=batch, parallel=True)
-        pipe.set_outputs(ext_seq, ext_par)
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    for i in range(10):
-        seq, par = pipe.run()
-        for j in range(batch_size):
-            s = seq.at(j)
-            p = par.at(j)
-            assert np.array_equal(s, p)
-
-
-def test_vs_non_parallel():
-    for shape in [[], [10], [100, 100, 100]]:
-        for batch_size, cb_parallel, cb_seq, batch, py_num_workers in [
-            (50, ext_cb("cb 1", shape), ext_cb("cb 2", shape), False, 14),
-            (50, Iterable(50, shape), Iterable(50, shape), True, 1),
-        ]:
-            yield _test_vs_non_parallel, batch_size, cb_parallel, cb_seq, batch, py_num_workers
-
-
 def generator_shape_empty():
     count = 0
     while True:
@@ -402,54 +411,65 @@ def generator_shape_100x3():
         yield [np.full([10, 10, 10], count + i) for i in range(50)]
 
 
-def test_generator_vs_non_parallel():
-    for cb in [generator_shape_empty, generator_shape_10, generator_shape_100x3]:
-        yield _test_vs_non_parallel, 50, cb, cb, True, 1
+class TestVsNonParallel:
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_cycle_raise(cb, is_gen_fun, batch_size, epoch_size, reader_queue_size):
-    pipe = utils.create_pipe(
-        cb,
-        "cpu",
-        batch_size=batch_size,
-        py_num_workers=1,
-        py_start_method="spawn",
-        parallel=True,
-        device_id=None,
-        batch=True,
-        num_threads=5,
-        cycle="raise",
-        reader_queue_depth=reader_queue_size,
+    @cartesian_params(
+        [[], [10], [100, 100, 100]],
+        [
+            (50, functools.partial(ext_cb, "cb 1"), functools.partial(ext_cb, "cb 2"), False, 14),
+            (50, functools.partial(Iterable, 50), functools.partial(Iterable, 50), True, 1),
+        ],
     )
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    if is_gen_fun:
-        refer_iter = cb()
-    else:
-        refer_iter = cb
-    for _ in range(3):
-        i = 0
-        while True:
-            try:
-                (batch,) = pipe.run()
-                expected_batch = next(refer_iter)
-                assert len(batch) == len(
-                    expected_batch
-                ), f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}"
-                for sample, expected_sample in zip(batch, expected_batch):
-                    np.testing.assert_equal(sample, expected_sample)
-                i += 1
-            except StopIteration:
-                pipe.reset()
-                if is_gen_fun:
-                    refer_iter = cb()
-                else:
-                    refer_iter = iter(cb)
-                assert (
-                    i == epoch_size
-                ), f"Number of iterations mismatch: expected {epoch_size}, got {i}"
-                break
+    def test_vs_non_parallel(self, shape, pipe_description):
+        batch_size, cb_parallel, cb_seq, batch, py_num_workers = pipe_description
+        cb_parallel = cb_parallel(shape)
+        cb_seq = cb_seq(shape)
+        pipe = dali.Pipeline(
+            batch_size=batch_size,
+            device_id=None,
+            num_threads=5,
+            py_num_workers=py_num_workers,
+            py_start_method="spawn",
+        )
+        with pipe:
+            ext_seq = dali.fn.external_source(cb_parallel, batch=batch, parallel=False)
+            ext_par = dali.fn.external_source(cb_seq, batch=batch, parallel=True)
+            pipe.set_outputs(ext_seq, ext_par)
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for i in range(10):
+            seq, par = pipe.run()
+            for j in range(batch_size):
+                s = seq.at(j)
+                p = par.at(j)
+                assert np.array_equal(s, p)
+
+    @params(generator_shape_empty, generator_shape_10, generator_shape_100x3)
+    def test_generator_vs_non_parallel(self, cb):
+        pipe = dali.Pipeline(
+            batch_size=50,
+            device_id=None,
+            num_threads=5,
+            py_num_workers=1,
+            py_start_method="spawn",
+        )
+        with pipe:
+            ext_seq = dali.fn.external_source(cb, batch=True, parallel=False)
+            ext_par = dali.fn.external_source(cb, batch=True, parallel=True)
+            pipe.set_outputs(ext_seq, ext_par)
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for i in range(10):
+            seq, par = pipe.run()
+            for j in range(50):
+                s = seq.at(j)
+                p = par.at(j)
+                assert np.array_equal(s, p)
 
 
 def generator_epoch_size_1():
@@ -461,205 +481,203 @@ def generator_epoch_size_4():
         yield [np.full((4, 5), j + i) for i in range(20)]
 
 
-def test_cycle_raise():
-    batch_size = 20
-    for epoch_size, cb, is_gen_fun in [
-        (1, Iterable(batch_size, (4, 5), epoch_size=1), False),
-        (4, Iterable(batch_size, (4, 5), epoch_size=4), False),
-        (1, generator_epoch_size_1, True),
-        (4, generator_epoch_size_4, True),
-    ]:
-        for reader_queue_size in (1, 2, 6):
-            yield _test_cycle_raise, cb, is_gen_fun, batch_size, epoch_size, reader_queue_size
+class TestCycleRaise:
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_cycle_quiet(cb, is_gen_fun, batch_size, epoch_size, reader_queue_size):
-    pipe = utils.create_pipe(
-        cb,
-        "cpu",
-        batch_size=batch_size,
-        py_num_workers=1,
-        py_start_method="spawn",
-        parallel=True,
-        device_id=None,
-        batch=True,
-        num_threads=5,
-        cycle="quiet",
-        reader_queue_depth=reader_queue_size,
+    BATCH_SIZE = 20
+
+    @cartesian_params(
+        [
+            (1, Iterable(BATCH_SIZE, (4, 5), epoch_size=1), False),
+            (4, Iterable(BATCH_SIZE, (4, 5), epoch_size=4), False),
+            (1, generator_epoch_size_1, True),
+            (4, generator_epoch_size_4, True),
+        ],
+        (1, 2, 6),
     )
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    refer_iter = cb
-    for i in range(3 * epoch_size + 1):
-        if i % epoch_size == 0:
-            if is_gen_fun:
-                refer_iter = cb()
-            else:
-                refer_iter = iter(cb)
-        (batch,) = pipe.run()
-        expected_batch = next(refer_iter)
-        assert len(batch) == len(
-            expected_batch
-        ), f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}"
-        for sample, expected_sample in zip(batch, expected_batch):
-            np.testing.assert_equal(sample, expected_sample)
-
-
-def test_cycle_quiet():
-    batch_size = 20
-    for epoch_size, cb, is_gen_fun in [
-        (1, Iterable(batch_size, (4, 5), epoch_size=1), False),
-        (4, Iterable(batch_size, (4, 5), epoch_size=4), False),
-        (1, generator_epoch_size_1, True),
-        (4, generator_epoch_size_4, True),
-    ]:
-        for reader_queue_size in (1, 2, 6):
-            yield _test_cycle_quiet, cb, is_gen_fun, batch_size, epoch_size, reader_queue_size
-
-
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_cycle_quiet_non_resetable(iterable, reader_queue_size, batch_size, epoch_size):
-    pipe = utils.create_pipe(
-        iterable,
-        "cpu",
-        batch_size=batch_size,
-        py_num_workers=1,
-        py_start_method="spawn",
-        parallel=True,
-        device_id=None,
-        batch=True,
-        num_threads=5,
-        cycle="quiet",
-        reader_queue_depth=reader_queue_size,
+    def test_cycle_raise(self, case_description, reader_queue_size):
+        epoch_size, cb, is_gen_fun = case_description
+        pipe = utils.create_pipe(
+            cb,
+            "cpu",
+            batch_size=self.BATCH_SIZE,
+            py_num_workers=1,
+            py_start_method="spawn",
+            parallel=True,
+            device_id=None,
+            batch=True,
+            num_threads=5,
+            cycle="raise",
+            reader_queue_depth=reader_queue_size,
+        )
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        if is_gen_fun:
+            refer_iter = cb()
+        else:
+            refer_iter = cb
+        for _ in range(3):
+            i = 0
+            while True:
+                try:
+                    (batch,) = pipe.run()
+                    expected_batch = next(refer_iter)
+                    assert len(batch) == len(
+                        expected_batch
+                    ), f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}"
+                    for sample, expected_sample in zip(batch, expected_batch):
+                        np.testing.assert_equal(sample, expected_sample)
+                    i += 1
+                except StopIteration:
+                    pipe.reset()
+                    if is_gen_fun:
+                        refer_iter = cb()
+                    else:
+                        refer_iter = iter(cb)
+                    assert (
+                        i == epoch_size
+                    ), f"Number of iterations mismatch: expected {epoch_size}, got {i}"
+                    break
+
+
+class TestCycleQuiet:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    BATCH_SIZE = 20
+    EPOCH_SIZE = 3
+
+    @cartesian_params(
+        [
+            (1, Iterable(BATCH_SIZE, (4, 5), epoch_size=1), False),
+            (4, Iterable(BATCH_SIZE, (4, 5), epoch_size=4), False),
+            (1, generator_epoch_size_1, True),
+            (4, generator_epoch_size_4, True),
+        ],
+        (1, 2, 6),
     )
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    for _ in range(epoch_size):
-        pipe.run()
-    try:
-        pipe.run()
-    except StopIteration:
-        pipe.reset()
+    def test_cycle_quiet(self, case_description, reader_queue_size):
+        epoch_size, cb, is_gen_fun = case_description
+        pipe = utils.create_pipe(
+            cb,
+            "cpu",
+            batch_size=self.BATCH_SIZE,
+            py_num_workers=1,
+            py_start_method="spawn",
+            parallel=True,
+            device_id=None,
+            batch=True,
+            num_threads=5,
+            cycle="quiet",
+            reader_queue_depth=reader_queue_size,
+        )
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        refer_iter = cb
+        for i in range(3 * epoch_size + 1):
+            if i % epoch_size == 0:
+                if is_gen_fun:
+                    refer_iter = cb()
+                else:
+                    refer_iter = iter(cb)
+            (batch,) = pipe.run()
+            expected_batch = next(refer_iter)
+            assert len(batch) == len(
+                expected_batch
+            ), f"Batch length mismatch: expected {len(expected_batch)}, got {len(batch)}"
+            for sample, expected_sample in zip(batch, expected_batch):
+                np.testing.assert_equal(sample, expected_sample)
+
+    @params(1, 3, 6)
+    def test_cycle_quiet_non_resetable(self, reader_queue_size):
+        iterable = FaultyResetIterable(self.EPOCH_SIZE, (5, 4), epoch_size=self.EPOCH_SIZE)
+        pipe = utils.create_pipe(
+            iterable,
+            "cpu",
+            batch_size=self.EPOCH_SIZE,
+            py_num_workers=1,
+            py_start_method="spawn",
+            parallel=True,
+            device_id=None,
+            batch=True,
+            num_threads=5,
+            cycle="quiet",
+            reader_queue_depth=reader_queue_size,
+        )
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for _ in range(self.EPOCH_SIZE):
+            pipe.run()
         try:
             pipe.run()
         except StopIteration:
-            pass
+            pipe.reset()
+            try:
+                pipe.run()
+            except StopIteration:
+                pass
+            else:
+                assert False, "Expected stop iteration"
         else:
-            assert False, "Expected stop iteration"
-    else:
-        assert False, "Expected stop iteration at the end of the epoch"
+            assert False, "Expected stop iteration at the end of the epoch"
 
 
-def test_cycle_quiet_non_resetable():
-    epoch_size = 3
-    batch_size = 20
-    iterable = FaultyResetIterable(batch_size, (5, 4), epoch_size=epoch_size)
-    for reader_queue_size in (1, 3, 6):
-        yield _test_cycle_quiet_non_resetable, iterable, reader_queue_size, batch_size, epoch_size
+class TestCycleNoResetting(unittest.TestCase):
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_cycle_no_resetting(cb, batch_size, epoch_size, reader_queue_size):
-    pipe = utils.create_pipe(
-        cb,
-        "cpu",
-        batch_size=batch_size,
-        py_num_workers=1,
-        py_start_method="spawn",
-        parallel=True,
-        device_id=None,
-        batch=True,
-        num_threads=5,
-        cycle=None,
-        reader_queue_depth=reader_queue_size,
-    )
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    for _ in range(epoch_size):
-        pipe.run()
-    try:
-        pipe.run()
-    except StopIteration:
-        pipe.reset()
-    else:
-        assert False, "Expected stop iteration"
-    pipe.run()
-
-
-def test_cycle_no_resetting():
-    batch_size = 20
-    for epoch_size, cb in [
-        (1, Iterable(batch_size, (4, 5), epoch_size=1)),
-        (4, Iterable(batch_size, (4, 5), epoch_size=4)),
-        (1, generator_epoch_size_1),
-        (4, generator_epoch_size_4),
-    ]:
-        for reader_queue_size in (1, 2, 6):
-            yield raises(StopIteration)(
-                _test_cycle_no_resetting
-            ), cb, batch_size, epoch_size, reader_queue_size
-
-
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_all_kinds_parallel(
-    sample_cb, batch_cb, iter_cb, batch_size, py_num_workers, reader_queue_sizes, num_iters
-):
-    @dali.pipeline_def(
-        batch_size=batch_size,
-        num_threads=4,
-        device_id=None,
-        py_num_workers=py_num_workers,
-        py_start_method="spawn",
-    )
-    def pipeline():
-        queue_size_1, queue_size_2, queue_size_3 = reader_queue_sizes
-        sample_out = dali.fn.external_source(
-            source=sample_cb, parallel=True, batch=False, prefetch_queue_depth=queue_size_1
-        )
-        batch_out = dali.fn.external_source(
-            source=batch_cb,
-            parallel=True,
-            batch=True,
-            prefetch_queue_depth=queue_size_2,
-            batch_info=True,
-        )
-        iter_out = dali.fn.external_source(
-            source=iter_cb,
-            parallel=True,
-            batch=True,
-            prefetch_queue_depth=queue_size_3,
-            cycle="raise",
-        )
-        return (sample_out, batch_out, iter_out)
+    BATCH_SIZE = 20
 
-    pipe = pipeline()
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    for _ in range(3):
-        i = 0
-        while True:
+    @cartesian_params(
+        [
+            (1, Iterable(BATCH_SIZE, (4, 5), epoch_size=1)),
+            (4, Iterable(BATCH_SIZE, (4, 5), epoch_size=4)),
+            (1, generator_epoch_size_1),
+            (4, generator_epoch_size_4),
+        ],
+        (1, 2, 6),
+    )
+    def test_cycle_no_resetting(self, es_description, reader_queue_size):
+        with self.assertRaises(StopIteration):
+            epoch_size, cb = es_description
+            pipe = utils.create_pipe(
+                cb,
+                "cpu",
+                batch_size=self.BATCH_SIZE,
+                py_num_workers=1,
+                py_start_method="spawn",
+                parallel=True,
+                device_id=None,
+                batch=True,
+                num_threads=5,
+                cycle=None,
+                reader_queue_depth=reader_queue_size,
+            )
+            pipe.build()
+            capture_processes(pipe._py_pool)
+            for _ in range(epoch_size):
+                pipe.run()
             try:
-                (sample_outs, batch_outs, iter_outs) = pipe.run()
-                assert len(sample_outs) == len(
-                    batch_outs
-                ), f"Batch length mismatch: sample: {len(sample_outs)}, batch: {len(batch_outs)}"
-                assert len(batch_outs) == len(
-                    iter_outs
-                ), f"Batch length mismatch: batch: {len(batch_outs)}, iter: {len(iter_outs)}"
-                for sample_out, batch_out, iter_out in zip(sample_outs, batch_outs, iter_outs):
-                    np.testing.assert_equal(np.array(sample_out), np.array(batch_out))
-                    np.testing.assert_equal(np.array(batch_out), np.array(iter_out))
-                i += 1
+                pipe.run()
             except StopIteration:
                 pipe.reset()
-                assert (
-                    i == num_iters
-                ), f"Number of iterations mismatch: expected {num_iters}, got {i}"
-                break
+            else:
+                assert False, "Expected stop iteration"
+            pipe.run()
 
 
-def test_all_kinds_parallel():
+def _make_all_kinds_parallel_cases():
+    cases = []
     for batch_size in (1, 17):
         for num_iters in (1, 3, 31):
             for trailing in (0, 30):
@@ -678,16 +696,92 @@ def test_all_kinds_parallel():
                     (1, 1, 3),
                 ):
                     for num_workers in (1, 7):
-                        yield (
-                            _test_all_kinds_parallel,
-                            sample_cb,
-                            batch_cb,
-                            iterator_cb,
-                            batch_size,
-                            num_workers,
-                            reader_queue_sizes,
-                            num_iters,
+                        cases.append(
+                            (
+                                sample_cb,
+                                batch_cb,
+                                iterator_cb,
+                                batch_size,
+                                num_workers,
+                                reader_queue_sizes,
+                                num_iters,
+                            )
                         )
+    return cases
+
+
+class TestAllKindsParallel:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @params(*_make_all_kinds_parallel_cases())
+    def test_all_kinds_parallel(
+        self,
+        sample_cb,
+        batch_cb,
+        iterator_cb,
+        batch_size,
+        num_workers,
+        reader_queue_sizes,
+        num_iters,
+    ):
+        @dali.pipeline_def(
+            batch_size=batch_size,
+            num_threads=4,
+            device_id=None,
+            py_num_workers=num_workers,
+            py_start_method="spawn",
+        )
+        def pipeline():
+            queue_size_1, queue_size_2, queue_size_3 = reader_queue_sizes
+            sample_out = dali.fn.external_source(
+                source=sample_cb, parallel=True, batch=False, prefetch_queue_depth=queue_size_1
+            )
+            batch_out = dali.fn.external_source(
+                source=batch_cb,
+                parallel=True,
+                batch=True,
+                prefetch_queue_depth=queue_size_2,
+                batch_info=True,
+            )
+            iter_out = dali.fn.external_source(
+                source=iterator_cb,
+                parallel=True,
+                batch=True,
+                prefetch_queue_depth=queue_size_3,
+                cycle="raise",
+            )
+            return (sample_out, batch_out, iter_out)
+
+        pipe = pipeline()
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for _ in range(3):
+            i = 0
+            while True:
+                try:
+                    (sample_outs, batch_outs, iter_outs) = pipe.run()
+                    assert len(sample_outs) == len(
+                        batch_outs
+                    ), f"Batch length mismatch: sample: {len(sample_outs)}, "
+                    f"batch: {len(batch_outs)}"
+                    assert len(batch_outs) == len(iter_outs), (
+                        f"Batch length mismatch: batch: {len(batch_outs)}, "
+                        f"iter: {len(iter_outs)}"
+                    )
+                    for sample_out, batch_out, iter_out in zip(sample_outs, batch_outs, iter_outs):
+                        np.testing.assert_equal(np.array(sample_out), np.array(batch_out))
+                        np.testing.assert_equal(np.array(batch_out), np.array(iter_out))
+                    i += 1
+                except StopIteration:
+                    pipe.reset()
+                    assert (
+                        i == num_iters
+                    ), f"Number of iterations mismatch: expected {num_iters}, got {i}"
+                    break
 
 
 def collect_iterations(pipe, num_iters):
@@ -702,122 +796,123 @@ def collect_iterations(pipe, num_iters):
     return outs
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_cycle_multiple_iterators(
-    batch_size, iters_num, py_num_workers, reader_queue_sizes, cycle_policies, epoch_sizes
-):
-    @dali.pipeline_def(
-        batch_size=batch_size,
-        num_threads=4,
-        device_id=None,
-        py_num_workers=py_num_workers,
-        py_start_method="spawn",
-    )
-    def pipeline(sample_cb, iter_1, iter_2, parallel):
-        if parallel:
-            queue_size_0, queue_size_1, queue_size_2 = reader_queue_sizes
-        else:
-            queue_size_0, queue_size_1, queue_size_2 = None, None, None
-        cycle_1, cycle_2 = cycle_policies
-        sample_out = dali.fn.external_source(
-            source=sample_cb, parallel=parallel, batch=False, prefetch_queue_depth=queue_size_0
-        )
-        iter1_out = dali.fn.external_source(
-            source=iter_1,
-            parallel=parallel,
-            batch=True,
-            prefetch_queue_depth=queue_size_1,
-            cycle=cycle_1,
-        )
-        iter2_out = dali.fn.external_source(
-            source=iter_2,
-            parallel=parallel,
-            batch=True,
-            prefetch_queue_depth=queue_size_2,
-            cycle=cycle_2,
-        )
-        return (sample_out, iter1_out, iter2_out)
-
-    shape = (2, 3)
-    sample_epoch_size, iter_1_epoch_size, iter_2_epoch_size = epoch_sizes
-    sample_cb = utils.ExtCallback((4, 5), sample_epoch_size * batch_size, np.int32)
-    iter_1 = Iterable(batch_size, shape, epoch_size=iter_1_epoch_size, dtype=np.int32)
-    iter_2 = Iterable(batch_size, shape, epoch_size=iter_2_epoch_size, dtype=np.int32)
-    pipe_parallel = pipeline(sample_cb, iter_1, iter_2, parallel=True)
-    pipe_seq = pipeline(sample_cb, iter_1, iter_2, parallel=False)
-    pipe_parallel.build()
-    utils.capture_processes(pipe_parallel._py_pool)
-    pipe_seq.build()
-    parallel_outs = collect_iterations(pipe_parallel, iters_num)
-    seq_outs = collect_iterations(pipe_seq, iters_num)
-    assert len(parallel_outs) == len(seq_outs)
-    for parallel_out, seq_out in zip(parallel_outs, seq_outs):
-        if parallel_out == StopIteration or seq_out == StopIteration:
-            assert parallel_out == seq_out
-            continue
-        assert len(parallel_out) == len(seq_out) == 3
-        for batch_parallel, batch_seq in zip(parallel_out, seq_out):
-            assert len(batch_parallel) == len(batch_seq) == batch_size
-            for sample_parallel, sample_seq in zip(batch_parallel, batch_seq):
-                np.testing.assert_equal(np.array(sample_parallel), np.array(sample_seq))
-
-
-def test_cycle_multiple_iterators():
-    batch_size = 50
-    iters_num = 17
-    num_workers = 4
-    for prefetch_queue_depths in ((3, 1, 1), (1, 3, 1), (1, 1, 3), (1, 1, 1), (3, 3, 3)):
-        for cycle_policies in (
+class TestCycleMultipleIterators:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @cartesian_params(
+        ((3, 1, 1), (1, 3, 1), (1, 1, 3), (1, 1, 1), (3, 3, 3)),
+        (
             ("raise", "raise"),
             ("quiet", "raise"),
             ("raise", "quiet"),
             ("quiet", "quiet"),
             (True, True),
-        ):
-            for epoch_sizes in ((8, 4, 6), (8, 6, 4), (4, 6, 8), (1, 1, 1)):
-                yield (
-                    _test_cycle_multiple_iterators,
-                    batch_size,
-                    iters_num,
-                    num_workers,
-                    prefetch_queue_depths,
-                    cycle_policies,
-                    epoch_sizes,
-                )
+        ),
+        ((8, 4, 6), (8, 6, 4), (4, 6, 8), (1, 1, 1)),
+    )
+    def test_cycle_multiple_iterators(self, prefetch_queue_depths, cycle_policies, epoch_sizes):
+        batch_size = 50
+        iters_num = 17
+        num_workers = 4
+
+        @dali.pipeline_def(
+            batch_size=batch_size,
+            num_threads=4,
+            device_id=None,
+            py_num_workers=num_workers,
+            py_start_method="spawn",
+        )
+        def pipeline(sample_cb, iter_1, iter_2, parallel):
+            if parallel:
+                queue_size_0, queue_size_1, queue_size_2 = prefetch_queue_depths
+            else:
+                queue_size_0, queue_size_1, queue_size_2 = None, None, None
+            cycle_1, cycle_2 = cycle_policies
+            sample_out = dali.fn.external_source(
+                source=sample_cb, parallel=parallel, batch=False, prefetch_queue_depth=queue_size_0
+            )
+            iter1_out = dali.fn.external_source(
+                source=iter_1,
+                parallel=parallel,
+                batch=True,
+                prefetch_queue_depth=queue_size_1,
+                cycle=cycle_1,
+            )
+            iter2_out = dali.fn.external_source(
+                source=iter_2,
+                parallel=parallel,
+                batch=True,
+                prefetch_queue_depth=queue_size_2,
+                cycle=cycle_2,
+            )
+            return (sample_out, iter1_out, iter2_out)
+
+        shape = (2, 3)
+        sample_epoch_size, iter_1_epoch_size, iter_2_epoch_size = epoch_sizes
+        sample_cb = utils.ExtCallback((4, 5), sample_epoch_size * batch_size, np.int32)
+        iter_1 = Iterable(batch_size, shape, epoch_size=iter_1_epoch_size, dtype=np.int32)
+        iter_2 = Iterable(batch_size, shape, epoch_size=iter_2_epoch_size, dtype=np.int32)
+        pipe_parallel = pipeline(sample_cb, iter_1, iter_2, parallel=True)
+        pipe_seq = pipeline(sample_cb, iter_1, iter_2, parallel=False)
+        pipe_parallel.build()
+        capture_processes(pipe_parallel._py_pool)
+        pipe_seq.build()
+        parallel_outs = collect_iterations(pipe_parallel, iters_num)
+        seq_outs = collect_iterations(pipe_seq, iters_num)
+        assert len(parallel_outs) == len(seq_outs)
+        for parallel_out, seq_out in zip(parallel_outs, seq_outs):
+            if parallel_out == StopIteration or seq_out == StopIteration:
+                assert parallel_out == seq_out
+                continue
+            assert len(parallel_out) == len(seq_out) == 3
+            for batch_parallel, batch_seq in zip(parallel_out, seq_out):
+                assert len(batch_parallel) == len(batch_seq) == batch_size
+                for sample_parallel, sample_seq in zip(batch_parallel, batch_seq):
+                    np.testing.assert_equal(np.array(sample_parallel), np.array(sample_seq))
 
 
 def ext_cb2(sinfo):
     return np.array([sinfo.idx_in_epoch, sinfo.idx_in_batch, sinfo.iteration], dtype=np.int32)
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def test_discard():
-    bs = 5
-    pipe = dali.Pipeline(
-        batch_size=bs, device_id=None, num_threads=5, py_num_workers=4, py_start_method="spawn"
-    )
-    with pipe:
-        ext1 = dali.fn.external_source([[np.float32(i) for i in range(bs)]] * 3, cycle="raise")
-        ext2 = dali.fn.external_source(ext_cb2, batch=False, parallel=True)
-        ext3 = dali.fn.external_source(ext_cb2, batch=False, parallel=False)
-        pipe.set_outputs(ext1, ext2, ext3)
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    sample_in_epoch = 0
-    iteration = 0
-    for i in range(10):
-        try:
-            e1, e2, e3 = pipe.run()
-            for i in range(bs):
-                assert e1.at(i) == i
-                assert np.array_equal(e2.at(i), np.array([sample_in_epoch, i, iteration]))
-                assert np.array_equal(e3.at(i), np.array([sample_in_epoch, i, iteration]))
-                sample_in_epoch += 1
-            iteration += 1
-        except StopIteration:
-            sample_in_epoch = 0
-            iteration = 0
-            pipe.reset()
+class TestDiscard:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    def test_discard(self):
+        bs = 5
+        pipe = dali.Pipeline(
+            batch_size=bs, device_id=None, num_threads=5, py_num_workers=4, py_start_method="spawn"
+        )
+        with pipe:
+            ext1 = dali.fn.external_source([[np.float32(i) for i in range(bs)]] * 3, cycle="raise")
+            ext2 = dali.fn.external_source(ext_cb2, batch=False, parallel=True)
+            ext3 = dali.fn.external_source(ext_cb2, batch=False, parallel=False)
+            pipe.set_outputs(ext1, ext2, ext3)
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        sample_in_epoch = 0
+        iteration = 0
+        for i in range(10):
+            try:
+                e1, e2, e3 = pipe.run()
+                for i in range(bs):
+                    assert e1.at(i) == i
+                    assert np.array_equal(e2.at(i), np.array([sample_in_epoch, i, iteration]))
+                    assert np.array_equal(e3.at(i), np.array([sample_in_epoch, i, iteration]))
+                    sample_in_epoch += 1
+                iteration += 1
+            except StopIteration:
+                sample_in_epoch = 0
+                iteration = 0
+                pipe.reset()
 
 
 class SampleCb:
@@ -839,99 +934,112 @@ def __call__(self, sample_info):
         )
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_epoch_idx(
-    batch_size,
-    epoch_size,
-    cb,
-    py_num_workers,
-    prefetch_queue_depth,
-    reader_queue_depth,
-    batch_mode,
-    batch_info,
-):
-    num_epochs = 3
-    pipe = utils.create_pipe(
-        cb,
-        "cpu",
-        batch_size=batch_size,
-        py_num_workers=py_num_workers,
-        py_start_method="spawn",
-        parallel=True,
-        device_id=0,
-        batch=batch_mode,
-        num_threads=1,
-        cycle=None,
-        batch_info=batch_info,
-        prefetch_queue_depth=prefetch_queue_depth,
-        reader_queue_depth=reader_queue_depth,
-    )
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    for epoch_idx in range(num_epochs):
-        for iteration in range(epoch_size):
-            (batch,) = pipe.run()
-            assert len(batch) == batch_size
-            for sample_i, sample in enumerate(batch):
-                expected = np.array(
-                    [
-                        iteration * batch_size + sample_i,
-                        sample_i,
-                        iteration,
-                        epoch_idx if not batch_mode or batch_info else 0,
-                    ]
-                )
-                np.testing.assert_array_equal(sample, expected)
-        try:
-            pipe.run()
-        except StopIteration:
-            pipe.reset()
-        else:
-            assert False, "expected StopIteration"
-
-
-def test_epoch_idx():
+def _generate_epoch_idx_test_cases():
     num_workers = 4
     prefetch_queue_depth = 2
+    cases = []
     for batch_size in (1, 50):
         for epoch_size in (1, 3, 7):
             for reader_queue_depth in (1, 5):
                 sample_cb = SampleCb(batch_size, epoch_size)
-                yield (
-                    _test_epoch_idx,
-                    batch_size,
-                    epoch_size,
-                    sample_cb,
-                    num_workers,
-                    prefetch_queue_depth,
-                    reader_queue_depth,
-                    False,
-                    None,
+                cases.append(
+                    (
+                        batch_size,
+                        epoch_size,
+                        sample_cb,
+                        num_workers,
+                        prefetch_queue_depth,
+                        reader_queue_depth,
+                        False,
+                        None,
+                    )
                 )
-                batch_cb = SampleCallbackBatched(sample_cb, batch_size, True)
-                yield (
-                    _test_epoch_idx,
-                    batch_size,
-                    epoch_size,
-                    batch_cb,
-                    num_workers,
-                    prefetch_queue_depth,
-                    reader_queue_depth,
-                    True,
-                    True,
+                batch_cb_true = SampleCallbackBatched(sample_cb, batch_size, True)
+                cases.append(
+                    (
+                        batch_size,
+                        epoch_size,
+                        batch_cb_true,
+                        num_workers,
+                        prefetch_queue_depth,
+                        reader_queue_depth,
+                        True,
+                        True,
+                    )
                 )
-                batch_cb = SampleCallbackBatched(sample_cb, batch_size, False)
-                yield (
-                    _test_epoch_idx,
-                    batch_size,
-                    epoch_size,
-                    batch_cb,
-                    num_workers,
-                    prefetch_queue_depth,
-                    reader_queue_depth,
-                    True,
-                    False,
+                batch_cb_false = SampleCallbackBatched(sample_cb, batch_size, False)
+                cases.append(
+                    (
+                        batch_size,
+                        epoch_size,
+                        batch_cb_false,
+                        num_workers,
+                        prefetch_queue_depth,
+                        reader_queue_depth,
+                        True,
+                        False,
+                    )
                 )
+    return cases
+
+
+class TestEpochIdx:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @params(*_generate_epoch_idx_test_cases())
+    def test_epoch_idx(
+        self,
+        batch_size,
+        epoch_size,
+        cb,
+        py_num_workers,
+        prefetch_queue_depth,
+        reader_queue_depth,
+        batch_mode,
+        batch_info,
+    ):
+        num_epochs = 3
+        pipe = utils.create_pipe(
+            cb,
+            "cpu",
+            batch_size=batch_size,
+            py_num_workers=py_num_workers,
+            py_start_method="spawn",
+            parallel=True,
+            device_id=0,
+            batch=batch_mode,
+            num_threads=1,
+            cycle=None,
+            batch_info=batch_info,
+            prefetch_queue_depth=prefetch_queue_depth,
+            reader_queue_depth=reader_queue_depth,
+        )
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for epoch_idx in range(num_epochs):
+            for iteration in range(epoch_size):
+                (batch,) = pipe.run()
+                assert len(batch) == batch_size
+                for sample_i, sample in enumerate(batch):
+                    expected = np.array(
+                        [
+                            iteration * batch_size + sample_i,
+                            sample_i,
+                            iteration,
+                            epoch_idx if not batch_mode or batch_info else 0,
+                        ]
+                    )
+                    np.testing.assert_array_equal(sample, expected)
+            try:
+                pipe.run()
+            except StopIteration:
+                pipe.reset()
+            else:
+                assert False, "expected StopIteration"
 
 
 class PermutableSampleCb:
@@ -956,66 +1064,61 @@ def __call__(self, sample_info):
         return np.array([self.perm[sample_info.idx_in_epoch]], dtype=np.int32)
 
 
-@with_setup(utils.setup_function, utils.teardown_function)
-def _test_permute_dataset(
-    batch_size,
-    epoch_size,
-    trailing_samples,
-    cb,
-    py_num_workers,
-    prefetch_queue_depth,
-    reader_queue_depth,
-):
-    num_epochs = 3
-    pipe = utils.create_pipe(
-        cb,
-        "cpu",
-        batch_size=batch_size,
-        py_num_workers=py_num_workers,
-        py_start_method="spawn",
-        parallel=True,
-        device_id=0,
-        batch=False,
-        num_threads=1,
-        cycle=None,
-        prefetch_queue_depth=prefetch_queue_depth,
-        reader_queue_depth=reader_queue_depth,
-    )
-    pipe.build()
-    utils.capture_processes(pipe._py_pool)
-    for epoch_idx in range(num_epochs):
-        epoch_data = [False for _ in range(epoch_size * batch_size + trailing_samples)]
-        for _ in range(epoch_size):
-            (batch,) = pipe.run()
-            assert len(batch) == batch_size
-            for sample in batch:
-                epoch_data[np.array(sample)[0]] = True
-        assert (
-            sum(epoch_data) == epoch_size * batch_size
-        ), "Epoch number {} did not contain some samples from data set".format(epoch_idx)
-        try:
-            pipe.run()
-        except StopIteration:
-            pipe.reset()
-        else:
-            assert False, "expected StopIteration"
+class TestPermuteDataset:
+    def setUp(self):
+        setup_function()
 
+    def tearDown(self):
+        teardown_function()
 
-def test_permute_dataset():
-    for batch_size, trailing_samples in ((4, 0), (100, 0), (100, 99)):
-        for epoch_size in (3, 7):
-            cb = PermutableSampleCb(batch_size, epoch_size, trailing_samples=trailing_samples)
-            for reader_queue_depth in (1, 5):
-                yield (
-                    _test_permute_dataset,
-                    batch_size,
-                    epoch_size,
-                    trailing_samples,
-                    cb,
-                    4,
-                    1,
-                    reader_queue_depth,
-                )
+    @cartesian_params(
+        ((4, 0), (100, 0), (100, 99)),
+        (3, 7),
+        (1, 5),
+    )
+    def test_permute_dataset(
+        self,
+        samples_data,
+        epoch_size,
+        reader_queue_depth,
+    ):
+        batch_size, trailing_samples = samples_data
+        num_epochs = 3
+        py_num_workers = 4
+        prefetch_queue_depth = 1
+        cb = PermutableSampleCb(batch_size, epoch_size, trailing_samples=trailing_samples)
+        pipe = utils.create_pipe(
+            cb,
+            "cpu",
+            batch_size=batch_size,
+            py_num_workers=py_num_workers,
+            py_start_method="spawn",
+            parallel=True,
+            device_id=0,
+            batch=False,
+            num_threads=1,
+            cycle=None,
+            prefetch_queue_depth=prefetch_queue_depth,
+            reader_queue_depth=reader_queue_depth,
+        )
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for epoch_idx in range(num_epochs):
+            epoch_data = [False for _ in range(epoch_size * batch_size + trailing_samples)]
+            for _ in range(epoch_size):
+                (batch,) = pipe.run()
+                assert len(batch) == batch_size
+                for sample in batch:
+                    epoch_data[np.array(sample)[0]] = True
+            assert (
+                sum(epoch_data) == epoch_size * batch_size
+            ), "Epoch number {} did not contain some samples from data set".format(epoch_idx)
+            try:
+                pipe.run()
+            except StopIteration:
+                pipe.reset()
+            else:
+                assert False, "expected StopIteration"
 
 
 class PerIterShapeSource:
diff --git a/dali/test/python/test_external_source_parallel_custom_serialization.py b/dali/test/python/test_external_source_parallel_custom_serialization.py
index 550213e8f9e..0fc5917f683 100644
--- a/dali/test/python/test_external_source_parallel_custom_serialization.py
+++ b/dali/test/python/test_external_source_parallel_custom_serialization.py
@@ -317,8 +317,9 @@ def _create_and_compare_simple_pipelines(
         _run_and_compare_outputs(batch_size, parallel_pipeline, serial_pipeline)
 
 
-# It uses fork method to start so need to be run as the first test
-def test_no_pickling_in_forking_mode():
+# Make it private and run it explicitly as it uses fork method to start
+# so need to be run as the first test
+def _test_no_pickling_in_forking_mode():
     # modify callback name so that an attempt to pickle it in spawn mode would fail
     _simple_callback.__name__ = _simple_callback.__qualname__ = "simple_callback"
     _create_and_compare_simple_pipelines(
diff --git a/dali/test/python/test_external_source_parallel_large_sample.py b/dali/test/python/test_external_source_parallel_large_sample.py
index e88b4fabf86..602bebfcc09 100644
--- a/dali/test/python/test_external_source_parallel_large_sample.py
+++ b/dali/test/python/test_external_source_parallel_large_sample.py
@@ -13,54 +13,56 @@
 # limitations under the License.
 
 import numpy as np
-from nose_utils import with_setup
+from nose2.tools import params
 from nvidia.dali import pipeline_def
 import nvidia.dali.fn as fn
-from test_external_source_parallel_utils import setup_function, teardown_function, capture_processes
+from test_pool_utils import setup_function, teardown_function, capture_processes
 
 
 def large_sample_cb(sample_info):
     return np.full((512, 1024, 1024), sample_info.idx_in_epoch, dtype=np.int32)
 
 
-@with_setup(setup_function, teardown_function)
-def _test_large_sample(start_method):
-    batch_size = 2
+class TestLargeSample:
+    def setUp(self):
+        setup_function()
 
-    @pipeline_def
-    def create_pipeline():
-        large = fn.external_source(
-            large_sample_cb, batch=False, parallel=True, prefetch_queue_depth=1
-        )
-        # iteration over array in Python is too slow, so reduce the number of elements
-        # to iterate over
-        reduced = fn.reductions.sum(large, axes=(1, 2))
-        return reduced
+    def tearDown(self):
+        teardown_function()
 
-    pipe = create_pipeline(
-        batch_size=batch_size,
-        py_num_workers=2,
-        py_start_method=start_method,
-        prefetch_queue_depth=1,
-        num_threads=2,
-        device_id=0,
-    )
-    pipe.build()
-    capture_processes(pipe._py_pool)
-    for batch_idx in range(8):
-        (out,) = pipe.run()
-        for idx_in_batch in range(batch_size):
-            idx_in_epoch = batch_size * batch_idx + idx_in_batch
-            expected_val = idx_in_epoch * 1024 * 1024
-            a = np.array(out[idx_in_batch])
-            assert a.shape == (512,), "Expected shape (512,) but got {}".format(a.shape)
-            for val in a.flat:
-                assert val == expected_val, (
-                    f"Unexpected value in batch: got {val}, expected {expected_val}, "
-                    f"for batch {batch_idx}, sample {idx_in_batch}"
-                )
+    @params("fork", "spawn")
+    def test_large_sample(self, start_method):
+        batch_size = 2
 
+        @pipeline_def
+        def create_pipeline():
+            large = fn.external_source(
+                large_sample_cb, batch=False, parallel=True, prefetch_queue_depth=1
+            )
+            # iteration over array in Python is too slow, so reduce the number of elements
+            # to iterate over
+            reduced = fn.reductions.sum(large, axes=(1, 2))
+            return reduced
 
-def test_large_sample():
-    for start_method in ("fork", "spawn"):
-        yield _test_large_sample, start_method
+        pipe = create_pipeline(
+            batch_size=batch_size,
+            py_num_workers=2,
+            py_start_method=start_method,
+            prefetch_queue_depth=1,
+            num_threads=2,
+            device_id=0,
+        )
+        pipe.build()
+        capture_processes(pipe._py_pool)
+        for batch_idx in range(8):
+            (out,) = pipe.run()
+            for idx_in_batch in range(batch_size):
+                idx_in_epoch = batch_size * batch_idx + idx_in_batch
+                expected_val = idx_in_epoch * 1024 * 1024
+                a = np.array(out[idx_in_batch])
+                assert a.shape == (512,), "Expected shape (512,) but got {}".format(a.shape)
+                for val in a.flat:
+                    assert val == expected_val, (
+                        f"Unexpected value in batch: got {val}, expected {expected_val}, "
+                        f"for batch {batch_idx}, sample {idx_in_batch}"
+                    )
diff --git a/dali/test/python/test_external_source_parallel_mxnet.py b/dali/test/python/test_external_source_parallel_mxnet.py
deleted file mode 100644
index 7ea2abe965c..00000000000
--- a/dali/test/python/test_external_source_parallel_mxnet.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# it is enough to just import all functions from test_internals_operator_external_source
-# nose will query for the methods available and will run them
-# the test_internals_operator_external_source is 99% the same for cupy and numpy tests
-# so it is better to store everything in one file and just call `use_cupy`
-# to switch between the default numpy and cupy
-
-import mxnet as mx
-from nose_utils import raises, with_setup
-
-from test_pool_utils import setup_function
-from test_external_source_parallel_utils import (
-    ExtCallback,
-    check_spawn_with_callback,
-    create_pipe,
-    build_and_run_pipeline,
-)
-import numpy as np
-
-
-class ExtCallbackMX(ExtCallback):
-    def __call__(self, sample_info):
-        a = super().__call__(sample_info)
-        return mx.nd.array(a, dtype=a.dtype)
-
-
-def test_mxnet():
-    yield from check_spawn_with_callback(ExtCallbackMX)
-
-
-class ExtCallbackMXCuda(ExtCallback):
-    def __call__(self, sample_info):
-        a = super().__call__(sample_info)
-        return mx.nd.array(a, dtype=a.dtype, ctx=mx.gpu(0))
-
-
-@raises(
-    Exception,
-    "Exception traceback received from worker thread*"
-    "TypeError: Unsupported callback return type. GPU tensors*not supported*"
-    "Got*MXNet GPU tensor.",
-)
-@with_setup(setup_function)
-def test_mxnet_cuda():
-    callback = ExtCallbackMXCuda((4, 5), 10, np.int32)
-    pipe = create_pipe(callback, "cpu", 5, py_num_workers=6, py_start_method="spawn", parallel=True)
-    build_and_run_pipeline(pipe)
diff --git a/dali/test/python/test_external_source_parallel_utils.py b/dali/test/python/test_external_source_parallel_utils.py
index 700937cdbee..b9823f49810 100644
--- a/dali/test/python/test_external_source_parallel_utils.py
+++ b/dali/test/python/test_external_source_parallel_utils.py
@@ -14,8 +14,7 @@
 
 import numpy as np
 import nvidia.dali as dali
-from nose_utils import with_setup
-from test_pool_utils import capture_processes, teardown_function, setup_function
+from test_pool_utils import capture_processes
 from test_utils import (
     compare_pipelines,
     check_batch,
@@ -136,7 +135,6 @@ def check_callback(parallel_pipe, pipe, epoch_size, batch_size, dtype=None):
     compare_pipelines(parallel_pipe, pipe, batch_size, iters_no)
 
 
-@with_setup(setup_function, teardown_function)
 def _check_spawn_with_callback(
     callback, callback_ref, batch_size, num_outputs, layout, workers_num, epoch_size, dtype
 ):
@@ -177,8 +175,7 @@ def check_spawn_with_callback(
             )
             for workers_num in [1, 4]:
                 for batch_size in [1, 16, 150]:
-                    yield (
-                        _check_spawn_with_callback,
+                    _check_spawn_with_callback(
                         callback,
                         callback_ref,
                         batch_size,
diff --git a/dali/test/python/test_functional_api.py b/dali/test/python/test_functional_api.py
index e8708b35194..90b70f5d6d3 100644
--- a/dali/test/python/test_functional_api.py
+++ b/dali/test/python/test_functional_api.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -20,7 +20,6 @@
 from nose_utils import assert_raises, attr
 import sys
 import inspect
-import nose
 
 
 def _test_fn_rotate(device):
@@ -185,7 +184,7 @@ def _test_schema_name_for_module(module_name, base_name=""):
             # Check if we can reconstruct the name of the op from provided schema
             assert hasattr(member, "_schema_name")
             full_name = ops._op_name(member._schema_name)
-            nose.tools.eq_(base_name + "." + full_name, module_name + "." + member_name)
+            assert base_name + "." + full_name == module_name + "." + member_name
         elif inspect.ismodule(member) and (module_name + "." + member_name) in sys.modules.keys():
             # Recurse on DALI submodule (filter out non-DALI reexported modules like `sys`)
             _test_schema_name_for_module(module_name + "." + member_name, base_name)
diff --git a/dali/test/python/test_fw_iterators.py b/dali/test/python/test_fw_iterators.py
index 13d8f8cbb39..5ae3484542c 100644
--- a/dali/test/python/test_fw_iterators.py
+++ b/dali/test/python/test_fw_iterators.py
@@ -107,260 +107,6 @@ def create_pipeline(creator, batch_size, num_gpus):
     return pipes, iters
 
 
-@attr("mxnet")
-def test_mxnet_iterator_model_fit():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-    import mxnet as mx
-
-    num_gpus = 1
-    batch_size = 1
-
-    def create_test_pipeline(batch_size, num_threads, device_id, num_gpus, data_paths):
-        pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id)
-        with pipe:
-            _, labels = fn.readers.file(
-                file_root=data_paths, shard_id=device_id, num_shards=num_gpus, name="Reader"
-            )
-        pipe.set_outputs(labels)
-        return pipe
-
-    pipes, _ = create_pipeline(
-        lambda gpu: create_test_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            device_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=image_data_set,
-        ),
-        batch_size,
-        num_gpus,
-    )
-    pipe = pipes[0]
-
-    class MXNetIteratorWrapper(MXNetIterator):
-        def __init__(self, iter):
-            self.iter = iter
-
-        def __getattr__(self, attr):
-            return getattr(self.iter, attr)
-
-        def __next__(self):
-            ret = self.iter.__next__()[0]
-            return ret
-
-    dali_train_iter = MXNetIterator(
-        pipe, [("labels", MXNetIterator.LABEL_TAG)], size=pipe.epoch_size("Reader")
-    )
-    data = mx.symbol.Variable("labels")
-
-    # create a dummy model
-    _ = mx.model.FeedForward.create(
-        data, X=MXNetIteratorWrapper(dali_train_iter), num_epoch=1, learning_rate=0.01
-    )
-
-
-@attr("mxnet")
-def test_mxnet_iterator_last_batch_no_pad_last_batch():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    num_gpus = 1
-    batch_size = 100
-
-    pipes, data_size = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=True,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=False,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = MXNetIterator(
-        pipes,
-        [("ids", MXNetIterator.DATA_TAG)],
-        size=pipes[0].epoch_size("Reader"),
-        last_batch_policy=LastBatchPolicy.FILL,
-    )
-
-    img_ids_list, img_ids_list_set, mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x.data[0].squeeze(-1).asnumpy(), lambda x: x.pad, data_size
-    )
-
-    assert len(img_ids_list) > data_size
-    assert len(img_ids_list_set) == data_size
-    assert len(set(mirrored_data)) != 1
-
-
-@attr("mxnet")
-def test_mxnet_iterator_empty_array():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-    import mxnet as mx
-
-    batch_size = 4
-    size = 5
-
-    all_np_types = [
-        np.bool_,
-        np.int_,
-        np.intc,
-        np.intp,
-        np.int8,
-        np.int16,
-        np.int32,
-        np.int64,
-        np.uint8,
-        np.uint16,
-        np.uint32,
-        np.uint64,
-        np.float32,
-        np.float16,
-        np.short,
-        int,
-        np.longlong,
-        np.ushort,
-        np.ulonglong,
-    ]
-    np_types = []
-    # store in np_types only types supported by MXNet
-    for t in all_np_types:
-        try:
-            mx.nd.zeros([2, 2, 2], ctx=None, dtype=t)
-            np_types.append(t)
-        except mx.base.MXNetError:
-            pass
-
-    test_data_shape = [1, 3, 0, 4]
-
-    def get_data():
-        # create batch of [type_a, type_a, type_b, type_b, ...]
-        out = [[np.empty(test_data_shape, dtype=t)] * batch_size for t in np_types]
-        out = [val for pair in zip(out, out) for val in pair]
-        return out
-
-    pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0)
-    outs = fn.external_source(source=get_data, num_outputs=len(np_types) * 2)
-    pipe.set_outputs(*outs)
-
-    # create map of [(data, type_a), (label, type_a), ...]
-    data_map = [("data_{}".format(i), MXNetIterator.DATA_TAG) for i, t in enumerate(np_types)]
-    label_map = [("label_{}".format(i), MXNetIterator.LABEL_TAG) for i, t in enumerate(np_types)]
-    out_map = [val for pair in zip(data_map, label_map) for val in pair]
-
-    iterator = MXNetIterator(pipe, output_map=out_map, size=size, dynamic_shape=True)
-
-    for batch in iterator:
-        for d, t in zip(batch[0].data, np_types):
-            shape = d.asnumpy().shape
-            assert shape[0] == batch_size
-            assert np.array_equal(shape[1:], test_data_shape)
-            assert d.asnumpy().dtype == t
-
-
-@attr("mxnet")
-def test_mxnet_iterator_last_batch_pad_last_batch():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    num_gpus = 1
-    batch_size = 100
-
-    pipes, data_size = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=True,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=True,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = MXNetIterator(
-        pipes,
-        [("ids", MXNetIterator.DATA_TAG)],
-        size=pipes[0].epoch_size("Reader"),
-        last_batch_policy=LastBatchPolicy.FILL,
-    )
-
-    img_ids_list, img_ids_list_set, mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x.data[0].squeeze(-1).asnumpy(), lambda x: x.pad, data_size
-    )
-
-    assert len(img_ids_list) > data_size
-    assert len(img_ids_list_set) == data_size
-    assert len(set(mirrored_data)) == 1
-
-    dali_train_iter.reset()
-    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x.data[0].squeeze(-1).asnumpy(), lambda x: x.pad, data_size
-    )
-
-    assert len(next_img_ids_list) > data_size
-    assert len(next_img_ids_list_set) == data_size
-    assert len(set(next_mirrored_data)) == 1
-
-
-@attr("mxnet")
-def test_mxnet_iterator_not_fill_last_batch_pad_last_batch():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    num_gpus = 1
-    batch_size = 100
-
-    pipes, data_size = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=True,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=True,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = MXNetIterator(
-        pipes,
-        [("ids", MXNetIterator.DATA_TAG)],
-        size=pipes[0].epoch_size("Reader"),
-        last_batch_policy=LastBatchPolicy.PARTIAL,
-    )
-
-    img_ids_list, img_ids_list_set, mirrored_data, pad, remainder = gather_ids(
-        dali_train_iter, lambda x: x.data[0].squeeze(-1).asnumpy(), lambda x: x.pad, data_size
-    )
-
-    assert pad == remainder
-    assert len(img_ids_list) - pad == data_size
-    assert len(img_ids_list_set) == data_size
-    assert len(set(mirrored_data)) == 1
-
-    dali_train_iter.reset()
-    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, pad, remainder = gather_ids(
-        dali_train_iter, lambda x: x.data[0].squeeze(-1).asnumpy(), lambda x: x.pad, data_size
-    )
-
-    assert pad == remainder
-    assert len(next_img_ids_list) - pad == data_size
-    assert len(next_img_ids_list_set) == data_size
-    assert len(set(next_mirrored_data)) == 1
-
-
 def check_iterator_results(
     pad,
     pipes_number,
@@ -449,456 +195,6 @@ def check_iterator_results(
     return (ids, sample_counter, per_gpu_counter, epoch_counter, rounded_shard_size)
 
 
-@attr("mxnet")
-def check_mxnet_iterator_pass_reader_name(
-    shards_num,
-    pipes_number,
-    batch_size,
-    stick_to_shard,
-    pad,
-    iters,
-    last_batch_policy,
-    auto_reset=False,
-):
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    pipes = [
-        create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=id,
-            num_gpus=shards_num,
-            data_paths=data_sets[0],
-            random_shuffle=False,
-            stick_to_shard=stick_to_shard,
-            shuffle_after_epoch=False,
-            pad_last_batch=pad,
-        )
-        for id in range(pipes_number)
-    ]
-
-    data_set_size = pipes[0].reader_meta("Reader")["epoch_size"]
-    rounded_shard_size = math.ceil(math.ceil(data_set_size / shards_num) / batch_size) * batch_size
-    ids = [pipe.reader_meta("Reader")["shard_id"] for pipe in pipes]
-    per_gpu_counter = [0] * shards_num
-    epoch_counter = 0
-    sample_counter = 0
-
-    if batch_size > data_set_size // shards_num and last_batch_policy == LastBatchPolicy.DROP:
-        assert_raises(
-            RuntimeError,
-            MXNetIterator,
-            pipes,
-            [("ids", MXNetIterator.DATA_TAG)],
-            reader_name="Reader",
-            last_batch_policy=last_batch_policy,
-            glob="It seems that there is no data in the pipeline*last_batch_policy*",
-        )
-        return
-    else:
-        dali_train_iter = MXNetIterator(
-            pipes,
-            [("ids", MXNetIterator.DATA_TAG)],
-            reader_name="Reader",
-            last_batch_policy=last_batch_policy,
-            auto_reset=auto_reset,
-        )
-
-    for _ in range(iters):
-        out_set = []
-        img_ids_list = [[] for _ in range(pipes_number)]
-        orig_length = length = len(dali_train_iter)
-        for it in iter(dali_train_iter):
-            for id in range(pipes_number):
-                tmp = it[id].data[0].squeeze(-1).asnumpy().copy()
-                if it[id].pad:
-                    tmp = tmp[0 : -it[id].pad]
-                img_ids_list[id].append(tmp)
-            sample_counter += batch_size
-            length -= 1
-
-        assert length == 0, (
-            f"The iterator has reported the length of {orig_length} "
-            f"but provided {orig_length - length} iterations."
-        )
-        if not auto_reset:
-            dali_train_iter.reset()
-        for id in range(pipes_number):
-            img_ids_list[id] = np.concatenate(img_ids_list[id])
-            out_set.append(set(img_ids_list[id]))
-
-        ret = check_iterator_results(
-            pad,
-            pipes_number,
-            shards_num,
-            out_set,
-            last_batch_policy,
-            img_ids_list,
-            ids,
-            data_set_size,
-            sample_counter,
-            per_gpu_counter,
-            stick_to_shard,
-            epoch_counter,
-            rounded_shard_size,
-        )
-        (ids, sample_counter, per_gpu_counter, epoch_counter, rounded_shard_size) = ret
-
-
-@attr("mxnet")
-def test_mxnet_iterator_pass_reader_name():
-    for shards_num in [3, 5, 17]:
-        for batch_size in [3, 5, 7]:
-            for stick_to_shard in [False, True]:
-                for pad in [True, False]:
-                    for last_batch_policy in [
-                        LastBatchPolicy.PARTIAL,
-                        LastBatchPolicy.FILL,
-                        LastBatchPolicy.DROP,
-                    ]:
-                        for iters in [1, 2, 3, 2 * shards_num]:
-                            for pipes_number in [1, shards_num]:
-                                yield (
-                                    check_mxnet_iterator_pass_reader_name,
-                                    shards_num,
-                                    pipes_number,
-                                    batch_size,
-                                    stick_to_shard,
-                                    pad,
-                                    iters,
-                                    last_batch_policy,
-                                    False,
-                                )
-
-
-@attr("mxnet")
-def test_mxnet_iterator_pass_reader_name_autoreset():
-    for auto_reset in [True, False]:
-        yield (
-            check_mxnet_iterator_pass_reader_name,
-            3,
-            1,
-            3,
-            False,
-            True,
-            3,
-            LastBatchPolicy.DROP,
-            auto_reset,
-        )
-
-
-@attr("gluon")
-def test_gluon_iterator_last_batch_no_pad_last_batch():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    num_gpus = 1
-    batch_size = 100
-
-    pipes, data_size = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=True,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=False,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = GluonIterator(
-        pipes, size=pipes[0].epoch_size("Reader"), last_batch_policy=LastBatchPolicy.FILL
-    )
-
-    img_ids_list, img_ids_list_set, mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x[0].squeeze(-1).asnumpy(), lambda x: 0, data_size
-    )
-
-    assert len(img_ids_list) > data_size
-    assert len(img_ids_list_set) == data_size
-    assert len(set(mirrored_data)) != 1
-
-
-@attr("gluon")
-def test_gluon_iterator_last_batch_pad_last_batch():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    num_gpus = 1
-    batch_size = 100
-
-    pipes, data_size = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=True,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=True,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = GluonIterator(
-        pipes, size=pipes[0].epoch_size("Reader"), last_batch_policy=LastBatchPolicy.FILL
-    )
-
-    img_ids_list, img_ids_list_set, mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x[0].squeeze(-1).asnumpy(), lambda x: 0, data_size
-    )
-
-    assert len(img_ids_list) > data_size
-    assert len(img_ids_list_set) == data_size
-    assert len(set(mirrored_data)) == 1
-
-    dali_train_iter.reset()
-    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x[0].squeeze(-1).asnumpy(), lambda x: 0, data_size
-    )
-
-    assert len(next_img_ids_list) > data_size
-    assert len(next_img_ids_list_set) == data_size
-    assert len(set(next_mirrored_data)) == 1
-
-
-@attr("gluon")
-def test_gluon_iterator_not_fill_last_batch_pad_last_batch():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    num_gpus = 1
-    batch_size = 100
-
-    pipes, data_size = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=False,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=True,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = GluonIterator(
-        pipes, size=pipes[0].epoch_size("Reader"), last_batch_policy=LastBatchPolicy.PARTIAL
-    )
-
-    img_ids_list, img_ids_list_set, mirrored_data, _, _ = gather_ids(
-        dali_train_iter, lambda x: x[0].squeeze(-1).asnumpy(), lambda x: 0, data_size
-    )
-
-    assert len(img_ids_list) == data_size
-    assert len(img_ids_list_set) == data_size
-    assert len(set(mirrored_data)) != 1
-
-    dali_train_iter.reset()
-    next_img_ids_list, next_img_ids_list_set, next_mirrored_data, pad, remainder = gather_ids(
-        dali_train_iter, lambda x: x[0].squeeze(-1).asnumpy(), lambda x: 0, data_size
-    )
-
-    assert len(next_img_ids_list) == data_size
-    assert len(next_img_ids_list_set) == data_size
-    assert len(set(next_mirrored_data)) != 1
-
-
-@attr("gluon")
-def test_gluon_iterator_sparse_batch():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-    from mxnet.ndarray.ndarray import NDArray
-
-    num_gpus = 1
-    batch_size = 16
-
-    pipes, _ = create_pipeline(
-        lambda gpu: create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=data_sets[0],
-            random_shuffle=True,
-            stick_to_shard=False,
-            shuffle_after_epoch=False,
-            pad_last_batch=True,
-            return_labels=True,
-        ),
-        batch_size,
-        num_gpus,
-    )
-
-    dali_train_iter = GluonIterator(
-        pipes,
-        pipes[0].epoch_size("Reader"),
-        output_types=[GluonIterator.SPARSE_TAG, GluonIterator.DENSE_TAG],
-        last_batch_policy=LastBatchPolicy.FILL,
-    )
-
-    for it in dali_train_iter:
-        labels, ids = it[0]  # gpu 0
-        # labels should be a sparse batch: a list of per-sample NDArray's
-        # ids should be a dense batch: a single NDarray representing the batch
-        assert isinstance(labels, (tuple, list))
-        assert len(labels) == batch_size
-        assert isinstance(labels[0], NDArray)
-        assert isinstance(ids, NDArray)
-
-
-@attr("gluon")
-def check_gluon_iterator_pass_reader_name(
-    shards_num,
-    pipes_number,
-    batch_size,
-    stick_to_shard,
-    pad,
-    iters,
-    last_batch_policy,
-    auto_reset=False,
-):
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    pipes = [
-        create_coco_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            shard_id=id,
-            num_gpus=shards_num,
-            data_paths=data_sets[0],
-            random_shuffle=False,
-            stick_to_shard=stick_to_shard,
-            shuffle_after_epoch=False,
-            pad_last_batch=pad,
-        )
-        for id in range(pipes_number)
-    ]
-
-    data_set_size = pipes[0].reader_meta("Reader")["epoch_size"]
-    rounded_shard_size = math.ceil(math.ceil(data_set_size / shards_num) / batch_size) * batch_size
-    ids = [pipe.reader_meta("Reader")["shard_id"] for pipe in pipes]
-    per_gpu_counter = [0] * shards_num
-    epoch_counter = 0
-    sample_counter = 0
-
-    if batch_size > data_set_size // shards_num and last_batch_policy == LastBatchPolicy.DROP:
-        assert_raises(
-            RuntimeError,
-            GluonIterator,
-            pipes,
-            reader_name="Reader",
-            last_batch_policy=last_batch_policy,
-            glob="It seems that there is no data in the pipeline. This may happen "
-            "if `last_batch_policy` is set to PARTIAL and the requested "
-            "batch size is greater than the shard size.",
-        )
-        return
-    else:
-        dali_train_iter = GluonIterator(
-            pipes, reader_name="Reader", last_batch_policy=last_batch_policy, auto_reset=auto_reset
-        )
-
-    for _ in range(iters):
-        out_set = []
-        img_ids_list = [[] for _ in range(pipes_number)]
-        orig_length = length = len(dali_train_iter)
-        for it in iter(dali_train_iter):
-            for id in range(pipes_number):
-                if len(it[id][0]):
-                    tmp = it[id][0].squeeze(-1).asnumpy().copy()
-                else:
-                    tmp = np.empty([0])
-                img_ids_list[id].append(tmp)
-            sample_counter += batch_size
-            length -= 1
-
-        assert length == 0, (
-            f"The iterator has reported the length of {orig_length} "
-            f"but provided {orig_length - length} iterations."
-        )
-        if not auto_reset:
-            dali_train_iter.reset()
-        for id in range(pipes_number):
-            assert (
-                batch_size > data_set_size // shards_num
-                and last_batch_policy == LastBatchPolicy.DROP
-            ) or len(img_ids_list[id])
-            if len(img_ids_list[id]):
-                img_ids_list[id] = np.concatenate(img_ids_list[id])
-                out_set.append(set(img_ids_list[id]))
-
-        if len(out_set) == 0 and last_batch_policy == LastBatchPolicy.DROP:
-            return
-
-        ret = check_iterator_results(
-            pad,
-            pipes_number,
-            shards_num,
-            out_set,
-            last_batch_policy,
-            img_ids_list,
-            ids,
-            data_set_size,
-            sample_counter,
-            per_gpu_counter,
-            stick_to_shard,
-            epoch_counter,
-            rounded_shard_size,
-        )
-        (ids, sample_counter, per_gpu_counter, epoch_counter, rounded_shard_size) = ret
-
-
-@attr("gluon")
-def test_gluon_iterator_pass_reader_name():
-    for shards_num in [3, 5, 17]:
-        for batch_size in [3, 5, 7]:
-            for stick_to_shard in [False, True]:
-                for pad in [True, False]:
-                    for last_batch_policy in [
-                        LastBatchPolicy.PARTIAL,
-                        LastBatchPolicy.FILL,
-                        LastBatchPolicy.DROP,
-                    ]:
-                        for iters in [1, 2, 3, 2 * shards_num]:
-                            for pipes_number in [1, shards_num]:
-                                yield (
-                                    check_gluon_iterator_pass_reader_name,
-                                    shards_num,
-                                    pipes_number,
-                                    batch_size,
-                                    stick_to_shard,
-                                    pad,
-                                    iters,
-                                    last_batch_policy,
-                                    False,
-                                )
-
-
-@attr("gluon")
-def test_gluon_iterator_pass_reader_name_autoreset():
-    for auto_reset in [True, False]:
-        yield (
-            check_gluon_iterator_pass_reader_name,
-            3,
-            1,
-            3,
-            False,
-            True,
-            3,
-            LastBatchPolicy.DROP,
-            auto_reset,
-        )
-
-
 @attr("pytorch")
 def test_pytorch_iterator_last_batch_no_pad_last_batch():
     from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
@@ -1309,69 +605,6 @@ def test_ragged_iterator_sparse_list_batch(exec_dynamic):
         assert ids.is_sparse is False
 
 
-@attr("mxnet")
-def test_mxnet_iterator_feed_ndarray():
-    from nvidia.dali.plugin.mxnet import feed_ndarray as feed_ndarray
-    import mxnet as mx
-
-    num_gpus = 1
-    batch_size = 100
-    pipes, _ = create_pipeline(
-        lambda gpu: create_custom_pipeline(
-            batch_size=batch_size,
-            num_threads=4,
-            device_id=gpu,
-            num_gpus=num_gpus,
-            data_paths=image_data_set,
-        ),
-        batch_size,
-        num_gpus,
-    )
-    for gpu_id in range(num_gpus):
-        pipe = pipes[gpu_id]
-        outs = pipe.run()
-        out_data = outs[0].as_tensor()
-        with mx.Context(mx.gpu(gpu_id)):
-            arr = mx.nd.zeros(out_data.shape(), dtype=np.float32)
-            mx.base._LIB.MXNDArrayWaitToWrite(arr.handle)
-            # Using DALI's internal stream
-            feed_ndarray(out_data, arr, cuda_stream=None)
-            np.testing.assert_equal(arr.asnumpy(), outs[0].as_cpu().as_array())
-
-            arr2 = mx.nd.zeros(out_data.shape(), dtype=np.float32)
-            mx.base._LIB.MXNDArrayWaitToWrite(arr2.handle)
-            feed_ndarray(out_data, arr2, cuda_stream=0)  # Using default stream
-            np.testing.assert_equal(arr2.asnumpy(), outs[0].as_cpu().as_array())
-
-
-@attr("mxnet")
-def check_mxnet_iterator_feed_ndarray_types(data_type):
-    from nvidia.dali.plugin.mxnet import feed_ndarray as feed_ndarray
-    import mxnet as mx
-
-    shape = [3, 9]
-    if np.issubdtype(data_type, np.integer):
-        arr = np.random.randint(
-            np.iinfo(data_type).min, high=np.iinfo(data_type).max, size=shape, dtype=data_type
-        )
-    elif data_type == np.bool_:
-        arr = np.random.randint(0, high=2, size=shape, dtype=data_type)
-    else:
-        arr = np.random.randn(*shape).astype(data_type)
-    tensor = TensorCPU(arr)
-    mnt = mx.nd.empty(shape, dtype=data_type)
-    feed_ndarray(tensor, mnt)
-    assert np.all(mnt.asnumpy() == arr)
-
-
-@attr("mxnet")
-def test_mxnet_iterator_feed_ndarray_types():
-    # MXNet doesn't support int16
-    types = [np.float32, np.float64, np.float16, np.uint8, np.int8, np.bool_, np.int32, np.int64]
-    for data_type in types:
-        yield check_mxnet_iterator_feed_ndarray_types, data_type
-
-
 @attr("paddle")
 def test_paddle_iterator_feed_ndarray():
     from nvidia.dali.plugin.paddle import feed_ndarray as feed_ndarray
@@ -2100,276 +1333,6 @@ def get_data():
     assert counter == iter_limit * runs
 
 
-# MXNet
-
-
-@attr("mxnet")
-def test_stop_iteration_mxnet():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    def fw_iter(pipe, size, auto_reset):
-        return MXNetIterator(
-            pipe, [("data", MXNetIterator.DATA_TAG)], size=size, auto_reset=auto_reset
-        )
-
-    iter_name = "MXNetIterator"
-    for (
-        batch_size,
-        epochs,
-        iter_num,
-        total_iter_num,
-        auto_reset,
-        infinite,
-    ) in stop_iteration_case_generator():
-        check_stop_iter(
-            fw_iter, iter_name, batch_size, epochs, iter_num, total_iter_num, auto_reset, infinite
-        )
-
-
-@attr("mxnet")
-def test_stop_iteration_mxnet_fail_multi():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    def fw_iter(pipe, size, auto_reset):
-        return MXNetIterator(
-            pipe, [("data", MXNetIterator.DATA_TAG)], size=size, auto_reset=auto_reset
-        )
-
-    check_stop_iter_fail_multi(fw_iter)
-
-
-@attr("mxnet")
-def test_stop_iteration_mxnet_fail_single():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    def fw_iter(pipe, size, auto_reset):
-        return MXNetIterator(
-            pipe, [("data", MXNetIterator.DATA_TAG)], size=size, auto_reset=auto_reset
-        )
-
-    check_stop_iter_fail_single(fw_iter)
-
-
-@attr("mxnet")
-def test_mxnet_iterator_wrapper_first_iteration():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    check_iterator_wrapper_first_iteration(
-        MXNetIterator, [("data", MXNetIterator.DATA_TAG)], size=100
-    )
-
-
-@attr("mxnet")
-def test_mxnet_external_source_autoreset():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    check_external_source_autoreset(
-        MXNetIterator, [("data", MXNetIterator.DATA_TAG)], to_np=lambda x: x[0].data[0].asnumpy()
-    )
-
-
-@attr("mxnet")
-def test_mxnet_external_source_do_not_prepare():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    check_external_source_autoreset(
-        MXNetIterator,
-        [("data", MXNetIterator.DATA_TAG)],
-        to_np=lambda x: x[0].data[0].asnumpy(),
-        prepare_first_batch=False,
-    )
-
-
-@attr("mxnet")
-def check_mxnet_iterator_properties(prepare_ahead):
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    def data_to_np(x):
-        return x.data[0].asnumpy()
-
-    def label_to_np(x):
-        return x.label[0].asnumpy()
-
-    max_batch_size = 4
-    iter_limit = 4
-    runs = 3
-    test_data_shape = [2, 3, 4]
-    test_label_shape = [2, 7, 5]
-    i = 0
-    dataset = [
-        [
-            [
-                np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8)
-                for _ in range(max_batch_size)
-            ],
-            [
-                np.random.randint(0, 255, size=test_label_shape, dtype=np.uint8)
-                for _ in range(max_batch_size)
-            ],
-        ]
-        for _ in range(iter_limit)
-    ]
-
-    def get_data():
-        nonlocal i
-        if i == iter_limit:
-            i = 0
-            raise StopIteration
-        out = dataset[i]
-        i += 1
-        return out
-
-    pipe = Pipeline(batch_size=max_batch_size, num_threads=1, device_id=0)
-    with pipe:
-        outs = fn.external_source(source=get_data, num_outputs=2)
-    pipe.set_outputs(*outs)
-
-    it = MXNetIterator(
-        [pipe],
-        [("data", MXNetIterator.DATA_TAG), ("label", MXNetIterator.LABEL_TAG)],
-        auto_reset=True,
-        prepare_first_batch=prepare_ahead,
-    )
-    counter = 0
-    assert getattr(it, "provide_data")[0].shape == tuple([max_batch_size] + test_data_shape)
-    assert getattr(it, "provide_label")[0].shape == tuple([max_batch_size] + test_label_shape)
-    for _ in range(runs):
-        for j, data in enumerate(it):
-            assert (data_to_np(data[0]) == np.stack(dataset[j][0])).all()
-            assert (label_to_np(data[0]) == np.stack(dataset[j][1])).all()
-            assert getattr(it, "provide_data")[0].shape == tuple([max_batch_size] + test_data_shape)
-            assert getattr(it, "provide_label")[0].shape == tuple(
-                [max_batch_size] + test_label_shape
-            )
-            counter += 1
-    assert counter == iter_limit * runs
-
-
-@attr("mxnet")
-def test_mxnet_iterator_properties():
-    for prep in [True, False]:
-        yield check_mxnet_iterator_properties, prep
-
-
-@attr("mxnet")
-def test_mxnet_external_source_variable_size_pass():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    check_external_source_variable_size(
-        MXNetIterator,
-        [("data", MXNetIterator.DATA_TAG)],
-        to_np=lambda x: x.data[0].asnumpy(),
-        dynamic_shape=True,
-    )
-
-
-@attr("mxnet")
-def test_mxnet_external_source_variable_size_fail():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    assert_raises(
-        AssertionError,
-        check_external_source_variable_size,
-        MXNetIterator,
-        [("data", MXNetIterator.DATA_TAG)],
-        to_np=lambda x: x.data[0].asnumpy(),
-        iter_size=5,
-        dynamic_shape=True,
-    )
-
-
-# Gluon
-
-
-@attr("gluon")
-@params(*stop_iteration_case_generator())
-def test_stop_iteration_gluon(batch_size, epochs, iter_num, total_iter_num, auto_reset, infinite):
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    def fw_iter(pipe, size, auto_reset):
-        return GluonIterator(
-            pipe, size, output_types=[GluonIterator.DENSE_TAG], auto_reset=auto_reset
-        )
-
-    iter_name = "GluonIterator"
-    check_stop_iter(
-        fw_iter, iter_name, batch_size, epochs, iter_num, total_iter_num, auto_reset, infinite
-    )
-
-
-@attr("gluon")
-def test_stop_iteration_gluon_fail_multi():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    def fw_iter(pipe, size, auto_reset):
-        return GluonIterator(pipe, size, auto_reset=auto_reset)
-
-    check_stop_iter_fail_multi(fw_iter)
-
-
-@attr("gluon")
-def test_stop_iteration_gluon_fail_single():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    def fw_iter(pipe, size, auto_reset):
-        return GluonIterator(pipe, size=size, auto_reset=auto_reset)
-
-    check_stop_iter_fail_single(fw_iter)
-
-
-@attr("gluon")
-def test_gluon_iterator_wrapper_first_iteration():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    check_iterator_wrapper_first_iteration(
-        GluonIterator, output_types=[GluonIterator.DENSE_TAG], size=100
-    )
-
-
-@attr("gluon")
-def test_gluon_external_source_autoreset():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    check_external_source_autoreset(
-        GluonIterator, output_types=[GluonIterator.DENSE_TAG], to_np=lambda x: x[0][0].asnumpy()
-    )
-
-
-@attr("gluon")
-def test_gluon_external_source_do_not_prepare():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    check_external_source_autoreset(
-        GluonIterator,
-        output_types=[GluonIterator.DENSE_TAG],
-        to_np=lambda x: x[0][0].asnumpy(),
-        prepare_first_batch=False,
-    )
-
-
-@attr("gluon")
-def test_gluon_external_source_variable_size_pass():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    check_external_source_variable_size(
-        GluonIterator, output_types=[GluonIterator.DENSE_TAG], to_np=lambda x: x[0].asnumpy()
-    )
-
-
-@attr("gluon")
-def test_gluon_external_source_variable_size_fail():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    assert_raises(
-        AssertionError,
-        check_external_source_variable_size,
-        GluonIterator,
-        output_types=[GluonIterator.DENSE_TAG],
-        to_np=lambda x: x[0].asnumpy(),
-        iter_size=5,
-    )
-
-
 # PyTorch
 
 
@@ -2686,27 +1649,6 @@ def get_data():
     assert counter == iter_limit * runs
 
 
-@attr("mxnet")
-def test_mxnet_prepare_first_batch():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    check_prepare_first_batch(
-        MXNetIterator,
-        [("data", MXNetIterator.DATA_TAG)],
-        to_np=lambda x: x.data[0].asnumpy(),
-        dynamic_shape=True,
-    )
-
-
-@attr("gluon")
-def test_gluon_prepare_first_batch():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    check_prepare_first_batch(
-        GluonIterator, output_types=[GluonIterator.DENSE_TAG], to_np=lambda x: x[0].asnumpy()
-    )
-
-
 @attr("pytorch")
 def test_pytorch_prepare_first_batch():
     from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
@@ -2737,24 +1679,6 @@ def feed_ndarray_test_pipeline():
     return np.array([1], dtype=float)
 
 
-@attr("mxnet")
-def test_mxnet_feed_ndarray():
-    from nvidia.dali.plugin.mxnet import feed_ndarray
-    import mxnet
-
-    pipe = feed_ndarray_test_pipeline(batch_size=1, num_threads=1, device_id=0)
-    out = pipe.run()[0]
-    mxnet_tensor = mxnet.nd.empty([1], None, np.int8)
-    assert_raises(
-        AssertionError,
-        feed_ndarray,
-        out,
-        mxnet_tensor,
-        glob="The element type of DALI Tensor/TensorList doesn't match "
-        "the element type of the target MXNet NDArray",
-    )
-
-
 @attr("pytorch")
 def test_pytorch_feed_ndarray():
     from nvidia.dali.plugin.pytorch import feed_ndarray
@@ -2824,32 +1748,6 @@ def test_paddle_wrong_last_batch_policy_type():
     )
 
 
-@attr("mxnet")
-def test_mxnet_wrong_last_batch_policy_type():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    check_iterator_build_error(
-        ValueError,
-        MXNetIterator,
-        glob="Wrong type for `last_batch_policy`.",
-        output_map=[("data", MXNetIterator.DATA_TAG)],
-        last_batch_policy="FILL",
-    )
-
-
-@attr("gluon")
-def test_gluon_wrong_last_batch_policy_type():
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    check_iterator_build_error(
-        ValueError,
-        GluonIterator,
-        glob="Wrong type for `last_batch_policy`.",
-        output_types=[GluonIterator.DENSE_TAG],
-        last_batch_policy="FILL",
-    )
-
-
 @attr("jax")
 def test_jax_wrong_last_batch_policy_type():
     from nvidia.dali.plugin.jax import DALIGenericIterator as JaxIterator
@@ -2921,47 +1819,6 @@ def autoreset_iter_params():
             yield auto_reset_op, policy
 
 
-@attr("mxnet")
-@params(*autoreset_iter_params())
-def test_mxnet_autoreset_iter(auto_reset_op, policy):
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    def fw_iterator(pipeline, reader_name, auto_reset, last_batch_policy):
-        return MXNetIterator(
-            pipeline,
-            [("data", MXNetIterator.DATA_TAG)],
-            reader_name=reader_name,
-            auto_reset=auto_reset,
-            last_batch_policy=last_batch_policy,
-        )
-
-    def extract_data(x):
-        data = x.data[0].asnumpy()
-        data = data[0 : -x.pad]
-        return data
-
-    check_autoreset_iter(fw_iterator, extract_data, auto_reset_op, policy)
-
-
-@attr("gluon")
-@params(*autoreset_iter_params())
-def test_gluon_autoreset_iter(auto_reset_op, policy):
-    from nvidia.dali.plugin.mxnet import DALIGluonIterator as GluonIterator
-
-    def fw_iterator(pipeline, reader_name, auto_reset, last_batch_policy):
-        return GluonIterator(
-            pipeline,
-            reader_name=reader_name,
-            auto_reset=auto_reset,
-            last_batch_policy=last_batch_policy,
-        )
-
-    def extract_data(x):
-        return x[0].asnumpy()
-
-    check_autoreset_iter(fw_iterator, extract_data, auto_reset_op, policy)
-
-
 @attr("pytorch")
 @params(*autoreset_iter_params())
 def test_pytorch_autoreset_iter(auto_reset_op, policy):
diff --git a/dali/test/python/test_fw_iterators_detection.py b/dali/test/python/test_fw_iterators_detection.py
index ce783812e1f..a7b71ab2b54 100644
--- a/dali/test/python/test_fw_iterators_detection.py
+++ b/dali/test/python/test_fw_iterators_detection.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from nvidia.dali.pipeline import Pipeline
 
 from test_utils import get_dali_extra_path
-from nose_utils import assert_raises
+from nose_utils import assert_raises, attr, nottest
 
 DALI_EXTRA_PATH = get_dali_extra_path()
 EPOCH_SIZE = 32
@@ -54,26 +54,7 @@ def data_paths():
 ##############
 
 
-def test_mxnet_pipeline_dynamic_shape():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    root, annotations = data_paths()
-    pipeline = DetectionPipeline(BATCH_SIZE, 0, root, annotations)
-    train_loader = MXNetIterator(
-        [pipeline],
-        [
-            ("data", MXNetIterator.DATA_TAG),
-            ("bboxes", MXNetIterator.LABEL_TAG),
-            ("label", MXNetIterator.LABEL_TAG),
-        ],
-        EPOCH_SIZE,
-        auto_reset=False,
-        dynamic_shape=True,
-    )
-    for data in train_loader:
-        assert data is not None
-
-
+@attr("pytorch")
 def test_pytorch_pipeline_dynamic_shape():
     from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
 
@@ -86,6 +67,7 @@ def test_pytorch_pipeline_dynamic_shape():
         assert data is not None
 
 
+@attr("paddle")
 def test_paddle_pipeline_dynamic_shape():
     from nvidia.dali.plugin.paddle import DALIGenericIterator as PaddleIterator
 
@@ -98,31 +80,21 @@ def test_paddle_pipeline_dynamic_shape():
         assert data is not None
 
 
+@attr("pytorch")
 def test_api_fw_check1_pytorch():
     from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
 
     yield from test_api_fw_check1(PyTorchIterator, ["data", "bboxes", "label"])
 
 
-def test_api_fw_check1_mxnet():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    yield from test_api_fw_check1(
-        MXNetIterator,
-        [
-            ("data", MXNetIterator.DATA_TAG),
-            ("bboxes", MXNetIterator.LABEL_TAG),
-            ("label", MXNetIterator.LABEL_TAG),
-        ],
-    )
-
-
+@attr("paddle")
 def test_api_fw_check1_paddle():
     from nvidia.dali.plugin.paddle import DALIGenericIterator as PaddleIterator
 
     yield from test_api_fw_check1(PaddleIterator, ["data", "bboxes", "label"])
 
 
+@nottest
 def test_api_fw_check1(iter_type, data_definition):
     root, annotations = data_paths()
     pipe = DetectionPipeline(BATCH_SIZE, 0, root, annotations)
@@ -159,31 +131,21 @@ def test_api_fw_check1(iter_type, data_definition):
     yield check, iter_type
 
 
-def test_api_fw_check2_mxnet():
-    from nvidia.dali.plugin.mxnet import DALIGenericIterator as MXNetIterator
-
-    yield from test_api_fw_check2(
-        MXNetIterator,
-        [
-            ("data", MXNetIterator.DATA_TAG),
-            ("bboxes", MXNetIterator.LABEL_TAG),
-            ("label", MXNetIterator.LABEL_TAG),
-        ],
-    )
-
-
+@attr("pytorch")
 def test_api_fw_check2_pytorch():
     from nvidia.dali.plugin.pytorch import DALIGenericIterator as PyTorchIterator
 
     yield from test_api_fw_check2(PyTorchIterator, ["data", "bboxes", "label"])
 
 
+@attr("paddle")
 def test_api_fw_check2_paddle():
     from nvidia.dali.plugin.paddle import DALIGenericIterator as PaddleIterator
 
     yield from test_api_fw_check2(PaddleIterator, ["data", "bboxes", "label"])
 
 
+@nottest
 def test_api_fw_check2(iter_type, data_definition):
     root, annotations = data_paths()
 
diff --git a/dali/test/python/test_pool.py b/dali/test/python/test_pool.py
index 6f37d98c72f..d9f3ac80c00 100644
--- a/dali/test/python/test_pool.py
+++ b/dali/test/python/test_pool.py
@@ -17,10 +17,10 @@
 from contextlib import closing
 from nvidia.dali._utils.external_source_impl import get_callback_from_source
 from nvidia.dali.types import SampleInfo
-from functools import wraps
 import numpy as np
 import os
-from nose_utils import raises, with_setup
+from nose2.tools import params
+from nose_utils import raises
 
 from test_pool_utils import capture_processes, setup_function, teardown_function
 
@@ -94,28 +94,20 @@ def assert_scheduled_num(context, num_tasks):
 
 start_methods = ["fork", "spawn"]
 
-# Invoke the `fn` with all start methods. Call setup and teardown before and after the test.
+# Invoke the `fn` with all start methods. Call setUp and tearDown before and after the test.
 #
 # We do this to not repeat the pattern of:
 #
-# def check_something(start_method):
-#    ...
+# class TestPoolOneCallback:
+#   def setUp(self):
+#     setup_function()
 #
-# @with_setup(setup_function, teardown_function)
-# def test_something():
-#   for start_method in start_methods:
-#      yield check_something, start_method
-
-
-def check_pool(fn):
-    @wraps(fn)
-    def wrapper():
-        for start_method in start_methods:
-            setup_function()
-            yield fn, start_method
-            teardown_function()
-
-    return wrapper
+#   def tearDown(self):
+#       teardown_function()
+#
+# @cartesian_params(*start_methods)
+# def test_something(start_method):
+#   ...
 
 
 # ################################################################################################ #
@@ -123,61 +115,68 @@ def wrapper():
 # ################################################################################################ #
 
 
-@check_pool
-def test_pool_one_task(start_method):
-    groups = [MockGroup.from_callback(simple_callback)]
-    with create_pool(
-        groups, keep_alive_queue_size=1, num_workers=1, start_method=start_method
-    ) as pool:
-        pids = get_pids(pool)
-        pid = pids[0]
-        tasks = [(SampleInfo(0, 0, 0, 0),)]
-        work_batch = TaskArgs.make_sample(SampleRange(0, 1, 0, 0))
-        pool.schedule_batch(context_i=0, work_batch=work_batch)
-        batch = pool.receive_batch(context_i=0)
-        for task, sample in zip(tasks, batch):
-            np.testing.assert_array_equal(answer(pid, *task), sample)
-
-
-@check_pool
-def test_pool_multi_task(start_method):
-    groups = [MockGroup.from_callback(simple_callback)]
-    with create_pool(
-        groups, keep_alive_queue_size=1, num_workers=1, start_method=start_method
-    ) as pool:
-        pids = get_pids(pool)
-        pid = pids[0]
-        tasks = [(SampleInfo(i, i, 0, 0),) for i in range(10)]
-        work_batch = TaskArgs.make_sample(SampleRange(0, 10, 0, 0))
-        pool.schedule_batch(context_i=0, work_batch=work_batch)
-        batch = pool.receive_batch(context_i=0)
-        for task, sample in zip(tasks, batch):
-            np.testing.assert_array_equal(answer(pid, *task), sample)
-
-
-# Test that we can safely hold as many results as the keep_alive_queue_size
-@check_pool
-def test_pool_no_overwrite_batch(start_method):
-    groups = [MockGroup.from_callback(simple_callback, prefetch_queue_depth=0)]
-    for depth in [1, 2, 4, 8]:
+class TestPoolOneCallback:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @params(*start_methods)
+    def test_pool_one_task(self, start_method):
+        groups = [MockGroup.from_callback(simple_callback)]
         with create_pool(
-            groups, keep_alive_queue_size=depth, num_workers=1, start_method=start_method
+            groups, keep_alive_queue_size=1, num_workers=1, start_method=start_method
         ) as pool:
             pids = get_pids(pool)
             pid = pids[0]
-            work_batches = [TaskArgs.make_sample(SampleRange(i, i + 1, i, 0)) for i in range(depth)]
-            task_list = [[(SampleInfo(i, 0, i, 0),)] for i in range(depth)]
-            for i, work_batch in enumerate(work_batches):
-                pool.schedule_batch(context_i=0, work_batch=work_batch)
-            assert_scheduled_num(pool.contexts[0], depth)
-            batches = []
-            for i in range(depth):
-                batches.append(pool.receive_batch(context_i=0))
-                assert_scheduled_num(pool.contexts[0], depth - 1 - i)
-            tasks_batches = zip(task_list, batches)
-            for tasks, batch in tasks_batches:
-                for task, sample in zip(tasks, batch):
-                    np.testing.assert_array_equal(answer(pid, *task), sample)
+            tasks = [(SampleInfo(0, 0, 0, 0),)]
+            work_batch = TaskArgs.make_sample(SampleRange(0, 1, 0, 0))
+            pool.schedule_batch(context_i=0, work_batch=work_batch)
+            batch = pool.receive_batch(context_i=0)
+            for task, sample in zip(tasks, batch):
+                np.testing.assert_array_equal(answer(pid, *task), sample)
+
+    @params(*start_methods)
+    def test_pool_multi_task(self, start_method):
+        groups = [MockGroup.from_callback(simple_callback)]
+        with create_pool(
+            groups, keep_alive_queue_size=1, num_workers=1, start_method=start_method
+        ) as pool:
+            pids = get_pids(pool)
+            pid = pids[0]
+            tasks = [(SampleInfo(i, i, 0, 0),) for i in range(10)]
+            work_batch = TaskArgs.make_sample(SampleRange(0, 10, 0, 0))
+            pool.schedule_batch(context_i=0, work_batch=work_batch)
+            batch = pool.receive_batch(context_i=0)
+            for task, sample in zip(tasks, batch):
+                np.testing.assert_array_equal(answer(pid, *task), sample)
+
+    # Test that we can safely hold as many results as the keep_alive_queue_size
+    @params(*start_methods)
+    def test_pool_no_overwrite_batch(self, start_method):
+        groups = [MockGroup.from_callback(simple_callback, prefetch_queue_depth=0)]
+        for depth in [1, 2, 4, 8]:
+            with create_pool(
+                groups, keep_alive_queue_size=depth, num_workers=1, start_method=start_method
+            ) as pool:
+                pids = get_pids(pool)
+                pid = pids[0]
+                work_batches = [
+                    TaskArgs.make_sample(SampleRange(i, i + 1, i, 0)) for i in range(depth)
+                ]
+                task_list = [[(SampleInfo(i, 0, i, 0),)] for i in range(depth)]
+                for i, work_batch in enumerate(work_batches):
+                    pool.schedule_batch(context_i=0, work_batch=work_batch)
+                assert_scheduled_num(pool.contexts[0], depth)
+                batches = []
+                for i in range(depth):
+                    batches.append(pool.receive_batch(context_i=0))
+                    assert_scheduled_num(pool.contexts[0], depth - 1 - i)
+                tasks_batches = zip(task_list, batches)
+                for tasks, batch in tasks_batches:
+                    for task, sample in zip(tasks, batch):
+                        np.testing.assert_array_equal(answer(pid, *task), sample)
 
 
 # ################################################################################################ #
@@ -185,21 +184,28 @@ def test_pool_no_overwrite_batch(start_method):
 # ################################################################################################ #
 
 
-@check_pool
-def test_pool_work_split_multiple_tasks(start_method):
-    callbacks = [MockGroup.from_callback(simple_callback)]
-    with create_pool(
-        callbacks, keep_alive_queue_size=1, num_workers=2, start_method=start_method
-    ) as pool:
-        num_tasks = 16
-        pids = get_pids(pool)
-        assert len(pids) == 2
-        work_batch = TaskArgs.make_sample(SampleRange(0, num_tasks, 0, 0))
-        tasks = [(SampleInfo(i, i, 0, 0),) for i in range(num_tasks)]
-        pool.schedule_batch(context_i=0, work_batch=work_batch)
-        batch = pool.receive_batch(context_i=0)
-        for task, sample in zip(tasks, batch):
-            np.testing.assert_array_equal(answer(-1, *task)[1:], sample[1:])
+class TestPoolMultipleWorkers:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @params(*start_methods)
+    def test_pool_work_split_multiple_tasks(self, start_method):
+        callbacks = [MockGroup.from_callback(simple_callback)]
+        with create_pool(
+            callbacks, keep_alive_queue_size=1, num_workers=2, start_method=start_method
+        ) as pool:
+            num_tasks = 16
+            pids = get_pids(pool)
+            assert len(pids) == 2
+            work_batch = TaskArgs.make_sample(SampleRange(0, num_tasks, 0, 0))
+            tasks = [(SampleInfo(i, i, 0, 0),) for i in range(num_tasks)]
+            pool.schedule_batch(context_i=0, work_batch=work_batch)
+            batch = pool.receive_batch(context_i=0)
+            for task, sample in zip(tasks, batch):
+                np.testing.assert_array_equal(answer(-1, *task)[1:], sample[1:])
 
 
 # ################################################################################################ #
@@ -207,128 +213,125 @@ def test_pool_work_split_multiple_tasks(start_method):
 # ################################################################################################ #
 
 
-@check_pool
-def test_pool_iterator_dedicated_worker(start_method):
-    groups = [
-        MockGroup.from_callback(simple_callback, prefetch_queue_depth=3),
-        MockGroup.from_callback(IteratorCb(), prefetch_queue_depth=3, batch=True),
-    ]
-    with create_pool(
-        groups, keep_alive_queue_size=1, num_workers=4, start_method=start_method
-    ) as pool:
-        pids = get_pids(pool)
-        assert len(pids) == 4
-        tasks_list = []
-        samples_count = 0
-        for i in range(4):
-            tasks = [(SampleInfo(samples_count + j, j, i, 0),) for j in range(i + 1)]
-            tasks_list.append(tasks)
-            work_batch = TaskArgs.make_sample(
-                SampleRange(samples_count, samples_count + i + 1, i, 0)
-            )
-            samples_count += len(tasks)
+class TestPoolMultipleCallbacks:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @params(*start_methods)
+    def test_pool_iterator_dedicated_worker(self, start_method):
+        groups = [
+            MockGroup.from_callback(simple_callback, prefetch_queue_depth=3),
+            MockGroup.from_callback(IteratorCb(), prefetch_queue_depth=3, batch=True),
+        ]
+        with create_pool(
+            groups, keep_alive_queue_size=1, num_workers=4, start_method=start_method
+        ) as pool:
+            pids = get_pids(pool)
+            assert len(pids) == 4
+            tasks_list = []
+            samples_count = 0
+            for i in range(4):
+                tasks = [(SampleInfo(samples_count + j, j, i, 0),) for j in range(i + 1)]
+                tasks_list.append(tasks)
+                work_batch = TaskArgs.make_sample(
+                    SampleRange(samples_count, samples_count + i + 1, i, 0)
+                )
+                samples_count += len(tasks)
+                pool.schedule_batch(context_i=0, work_batch=work_batch)
+                pool.schedule_batch(context_i=1, work_batch=TaskArgs.make_batch((i,)))
+            assert pool.contexts[0].dedicated_worker_id is None
+            iter_worker_num = pool.contexts[1].dedicated_worker_id
+            iter_worker_pid = pool.pool._processes[iter_worker_num].pid
+            for i in range(4):
+                batch_0 = pool.receive_batch(context_i=0)
+                batch_1 = pool.receive_batch(context_i=1)
+                tasks = tasks_list[i]
+                assert len(batch_0) == len(tasks)
+                assert len(batch_1) == len(tasks)
+                for task, sample in zip(tasks, batch_0):
+                    np.testing.assert_array_equal(answer(-1, *task)[1:], sample[1:])
+                for sample in batch_1:
+                    np.testing.assert_array_equal(np.array([iter_worker_pid, i + 1]), sample)
+
+    @params(*start_methods)
+    def test_pool_many_ctxs(self, start_method):
+        callbacks = [simple_callback, another_callback]
+        groups = [MockGroup.from_callback(cb) for cb in callbacks]
+        with create_pool(
+            groups, keep_alive_queue_size=1, num_workers=1, start_method=start_method
+        ) as pool:
+            pids = get_pids(pool)
+            tasks = [(SampleInfo(0, 0, 0, 0),)]
+            work_batch = TaskArgs.make_sample(SampleRange(0, 1, 0, 0))
             pool.schedule_batch(context_i=0, work_batch=work_batch)
-            pool.schedule_batch(context_i=1, work_batch=TaskArgs.make_batch((i,)))
-        assert pool.contexts[0].dedicated_worker_id is None
-        iter_worker_num = pool.contexts[1].dedicated_worker_id
-        iter_worker_pid = pool.pool._processes[iter_worker_num].pid
-        for i in range(4):
+            pool.schedule_batch(context_i=1, work_batch=work_batch)
+            batch_0 = pool.receive_batch(context_i=0)
+            batch_1 = pool.receive_batch(context_i=1)
+            for task, sample, pid in zip(tasks, batch_0, pids):
+                np.testing.assert_array_equal(answer(pid, *task), sample)
+            for task, sample, pid in zip(tasks, batch_1, pids):
+                np.testing.assert_array_equal(answer(pid, *task) + 100, sample)
+
+    @params(*start_methods)
+    def test_pool_context_sync(self, start_method):
+        callbacks = [simple_callback, another_callback]
+        groups = [MockGroup.from_callback(cb, prefetch_queue_depth=3) for cb in callbacks]
+        with create_pool(
+            groups, keep_alive_queue_size=1, num_workers=4, start_method=start_method
+        ) as pool:
+            capture_processes(pool)
+            for i in range(4):
+                work_batch = TaskArgs.make_sample(SampleRange(0, 10 * (i + 1), 0, 0))
+                pool.schedule_batch(context_i=0, work_batch=work_batch)
+                pool.schedule_batch(context_i=1, work_batch=work_batch)
+            assert_scheduled_num(pool.contexts[0], 4)
+            assert_scheduled_num(pool.contexts[1], 4)
+            # pool after a reset should discard all previously scheduled tasks
+            # (and sync workers to avoid race on writing to results buffer)
+            pool.reset()
+            tasks = [(SampleInfo(1000 + j, j, 0, 1),) for j in range(5)]
+            work_batch = TaskArgs.make_sample(SampleRange(1000, 1005, 0, 1))
+            pool.schedule_batch(context_i=0, work_batch=work_batch)
+            pool.schedule_batch(context_i=1, work_batch=work_batch)
+            assert_scheduled_num(pool.contexts[0], 1)
+            assert_scheduled_num(pool.contexts[1], 1)
             batch_0 = pool.receive_batch(context_i=0)
             batch_1 = pool.receive_batch(context_i=1)
-            tasks = tasks_list[i]
             assert len(batch_0) == len(tasks)
             assert len(batch_1) == len(tasks)
             for task, sample in zip(tasks, batch_0):
                 np.testing.assert_array_equal(answer(-1, *task)[1:], sample[1:])
-            for sample in batch_1:
-                np.testing.assert_array_equal(np.array([iter_worker_pid, i + 1]), sample)
-
-
-@check_pool
-def test_pool_many_ctxs(start_method):
-    callbacks = [simple_callback, another_callback]
-    groups = [MockGroup.from_callback(cb) for cb in callbacks]
-    with create_pool(
-        groups, keep_alive_queue_size=1, num_workers=1, start_method=start_method
-    ) as pool:
-        pids = get_pids(pool)
-        pid = pids[0]
-        tasks = [(SampleInfo(0, 0, 0, 0),)]
-        work_batch = TaskArgs.make_sample(SampleRange(0, 1, 0, 0))
-        pool.schedule_batch(context_i=0, work_batch=work_batch)
-        pool.schedule_batch(context_i=1, work_batch=work_batch)
-        batch_0 = pool.receive_batch(context_i=0)
-        batch_1 = pool.receive_batch(context_i=1)
-        for task, sample, pid in zip(tasks, batch_0, pids):
-            np.testing.assert_array_equal(answer(pid, *task), sample)
-        for task, sample, pid in zip(tasks, batch_1, pids):
-            np.testing.assert_array_equal(answer(pid, *task) + 100, sample)
-
-
-@check_pool
-def test_pool_context_sync(start_method):
-    callbacks = [simple_callback, another_callback]
-    groups = [MockGroup.from_callback(cb, prefetch_queue_depth=3) for cb in callbacks]
-    with create_pool(
-        groups, keep_alive_queue_size=1, num_workers=4, start_method=start_method
-    ) as pool:
-        capture_processes(pool)
-        for i in range(4):
-            tasks = [(SampleInfo(j, 0, 0, 0),) for j in range(10 * (i + 1))]
-            work_batch = TaskArgs.make_sample(SampleRange(0, 10 * (i + 1), 0, 0))
-            pool.schedule_batch(context_i=0, work_batch=work_batch)
-            pool.schedule_batch(context_i=1, work_batch=work_batch)
-        assert_scheduled_num(pool.contexts[0], 4)
-        assert_scheduled_num(pool.contexts[1], 4)
-        # pool after a reset should discard all previously scheduled tasks
-        # (and sync workers to avoid race on writing to results buffer)
-        pool.reset()
-        tasks = [(SampleInfo(1000 + j, j, 0, 1),) for j in range(5)]
-        work_batch = TaskArgs.make_sample(SampleRange(1000, 1005, 0, 1))
-        pool.schedule_batch(context_i=0, work_batch=work_batch)
-        pool.schedule_batch(context_i=1, work_batch=work_batch)
-        assert_scheduled_num(pool.contexts[0], 1)
-        assert_scheduled_num(pool.contexts[1], 1)
-        batch_0 = pool.receive_batch(context_i=0)
-        batch_1 = pool.receive_batch(context_i=1)
-        assert len(batch_0) == len(tasks)
-        assert len(batch_1) == len(tasks)
-        for task, sample in zip(tasks, batch_0):
-            np.testing.assert_array_equal(answer(-1, *task)[1:], sample[1:])
-        for task, sample in zip(tasks, batch_1):
-            np.testing.assert_array_equal(answer(-1, *task)[1:] + 100, sample[1:])
-
-
-@with_setup(setup_function, teardown_function)
-def _test_multiple_stateful_sources_single_worker(num_workers):
-    groups = [
-        MockGroup.from_callback(IteratorCb(), batch=True),
-        MockGroup.from_callback(IteratorCb(), batch=True),
-    ]
-    with create_pool(
-        groups, keep_alive_queue_size=1, num_workers=num_workers, start_method="spawn"
-    ) as pool:
-        pids = get_pids(pool)
-        assert len(pids) == min(num_workers, len(groups))
-        pool.schedule_batch(context_i=0, work_batch=TaskArgs.make_batch((0,)))
-        pool.schedule_batch(context_i=1, work_batch=TaskArgs.make_batch((0,)))
-        iter_worker_num_0 = pool.contexts[0].dedicated_worker_id
-        iter_worker_num_1 = pool.contexts[1].dedicated_worker_id
-        iter_worker_pid_0 = pool.pool._processes[iter_worker_num_0].pid
-        iter_worker_pid_1 = pool.pool._processes[iter_worker_num_1].pid
-        batch_0 = pool.receive_batch(context_i=0)
-        batch_1 = pool.receive_batch(context_i=1)
-        np.testing.assert_array_equal(np.array([iter_worker_pid_0, 1]), batch_0[0])
-        np.testing.assert_array_equal(np.array([iter_worker_pid_1, 1]), batch_1[0])
-        if num_workers == 1:
-            assert iter_worker_pid_0 == iter_worker_pid_1
-        else:
-            assert iter_worker_pid_0 != iter_worker_pid_1
-
-
-def test_multiple_stateful_sources_single_worker():
-    for num_workers in (1, 4):
-        yield _test_multiple_stateful_sources_single_worker, num_workers
+            for task, sample in zip(tasks, batch_1):
+                np.testing.assert_array_equal(answer(-1, *task)[1:] + 100, sample[1:])
+
+    @params(1, 4)
+    def test_multiple_stateful_sources_single_worker(self, num_workers):
+        groups = [
+            MockGroup.from_callback(IteratorCb(), batch=True),
+            MockGroup.from_callback(IteratorCb(), batch=True),
+        ]
+        with create_pool(
+            groups, keep_alive_queue_size=1, num_workers=num_workers, start_method="spawn"
+        ) as pool:
+            pids = get_pids(pool)
+            assert len(pids) == min(num_workers, len(groups))
+            pool.schedule_batch(context_i=0, work_batch=TaskArgs.make_batch((0,)))
+            pool.schedule_batch(context_i=1, work_batch=TaskArgs.make_batch((0,)))
+            iter_worker_num_0 = pool.contexts[0].dedicated_worker_id
+            iter_worker_num_1 = pool.contexts[1].dedicated_worker_id
+            iter_worker_pid_0 = pool.pool._processes[iter_worker_num_0].pid
+            iter_worker_pid_1 = pool.pool._processes[iter_worker_num_1].pid
+            batch_0 = pool.receive_batch(context_i=0)
+            batch_1 = pool.receive_batch(context_i=1)
+            np.testing.assert_array_equal(np.array([iter_worker_pid_0, 1]), batch_0[0])
+            np.testing.assert_array_equal(np.array([iter_worker_pid_1, 1]), batch_1[0])
+            if num_workers == 1:
+                assert iter_worker_pid_0 == iter_worker_pid_1
+            else:
+                assert iter_worker_pid_0 != iter_worker_pid_1
 
 
 # ################################################################################################ #
@@ -340,18 +343,24 @@ def invalid_callback(i):
     return "42"
 
 
-@raises(
-    Exception,
-    glob="Unsupported callback return type. Expected NumPy array, PyTorch or "
-    "MXNet cpu tensors, DALI TensorCPU, or list or tuple of them representing sample. Got",
-)
-@with_setup(setup_function, teardown_function)
-def test_pool_invalid_return():
-    callbacks = [MockGroup.from_callback(invalid_callback)]
-    with create_pool(
-        callbacks, keep_alive_queue_size=1, num_workers=1, start_method="spawn"
-    ) as pool:
-        _ = get_pids(pool)
-        work_batch = TaskArgs.make_sample(SampleRange(0, 1, 0, 0))
-        pool.schedule_batch(context_i=0, work_batch=work_batch)
-        pool.receive_batch(context_i=0)
+class TestPoolInvalidReturn:
+    def setUp(self):
+        setup_function()
+
+    def tearDown(self):
+        teardown_function()
+
+    @raises(
+        Exception,
+        glob="Unsupported callback return type. Expected NumPy array, PyTorch or "
+        "MXNet cpu tensors, DALI TensorCPU, or list or tuple of them representing sample. Got",
+    )
+    def test_pool_invalid_return(self):
+        callbacks = [MockGroup.from_callback(invalid_callback)]
+        with create_pool(
+            callbacks, keep_alive_queue_size=1, num_workers=1, start_method="spawn"
+        ) as pool:
+            _ = get_pids(pool)
+            work_batch = TaskArgs.make_sample(SampleRange(0, 1, 0, 0))
+            pool.schedule_batch(context_i=0, work_batch=work_batch)
+            pool.receive_batch(context_i=0)
diff --git a/dali/test/python/test_pytorch_operator.py b/dali/test/python/test_pytorch_operator.py
index e1d7b7eb6e5..6e3c7f8427a 100644
--- a/dali/test/python/test_pytorch_operator.py
+++ b/dali/test/python/test_pytorch_operator.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@
 from nvidia.dali.pipeline import Pipeline
 
 from test_utils import get_dali_extra_path
+from nose_utils import attr
 
 test_data_root = get_dali_extra_path()
 images_dir = os.path.join(test_data_root, "db", "single", "jpeg")
@@ -113,6 +114,7 @@ def check_pytorch_operator(device):
             assert numpy.allclose(res2, exp2_t.numpy())
 
 
+@attr("pytorch")
 def test_pytorch_operator():
     for device in {"cpu", "gpu"}:
         yield check_pytorch_operator, device
@@ -136,6 +138,7 @@ def check_pytorch_operator_batch_processing(device):
             assert numpy.allclose(res2, exp2[i].numpy())
 
 
+@attr("pytorch")
 def test_pytorch_operator_batch_processing():
     for device in {"cpu", "gpu"}:
         yield check_pytorch_operator_batch_processing, device
diff --git a/dali/test/python/unittest.cfg b/dali/test/python/unittest.cfg
index 14aca7ebfff..d480344c6c0 100644
--- a/dali/test/python/unittest.cfg
+++ b/dali/test/python/unittest.cfg
@@ -1,5 +1,6 @@
 [unittest]
-plugins = nose2.plugins.attrib
+plugins = nose2_attrib_generators
+          nose2.plugins.attrib
           nose2.plugins.collect
           nose2.plugins.printhooks
 
diff --git a/dali/test/python/unittest_failure.cfg b/dali/test/python/unittest_failure.cfg
index 2a4c17eeed3..df8d2c528f1 100644
--- a/dali/test/python/unittest_failure.cfg
+++ b/dali/test/python/unittest_failure.cfg
@@ -1,5 +1,6 @@
 [unittest]
-plugins = nose2.plugins.attrib
+plugins = nose2_attrib_generators
+          nose2.plugins.attrib
           nose2.plugins.collect
           nose2.plugins.printhooks
 
diff --git a/dali/test/python/unittest_slow.cfg b/dali/test/python/unittest_slow.cfg
index 05fcca59ec6..476556cc7aa 100644
--- a/dali/test/python/unittest_slow.cfg
+++ b/dali/test/python/unittest_slow.cfg
@@ -1,5 +1,6 @@
 [unittest]
-plugins = nose2.plugins.attrib
+plugins = nose2_attrib_generators
+          nose2.plugins.attrib
           nose2.plugins.collect
           nose2.plugins.printhooks
 
diff --git a/qa/TL0_FW_iterators/test_paddle.sh b/qa/TL0_FW_iterators/test_paddle.sh
index 8be13feede1..473f40a0d7f 100755
--- a/qa/TL0_FW_iterators/test_paddle.sh
+++ b/qa/TL0_FW_iterators/test_paddle.sh
@@ -18,7 +18,7 @@ test_body() {
             python test_RN50_data_fw_iterators.py --framework ${fw} --gpus ${NUM_GPUS} -b 13 \
                 --workers 3 --prefetch 2 -i 2 --epochs 2 --fp16
         done
-        ${python_invoke_test} -m '(?:^|[\b_\./-])[Tt]est.*paddle*' test_fw_iterators_detection.py
+        ${python_new_invoke_test} -A 'paddle' test_fw_iterators_detection
     fi
     ${python_new_invoke_test} -A 'paddle' test_fw_iterators
 }
diff --git a/qa/TL0_FW_iterators/test_pytorch.sh b/qa/TL0_FW_iterators/test_pytorch.sh
index 6fb9e241f60..9100ebf23ca 100755
--- a/qa/TL0_FW_iterators/test_pytorch.sh
+++ b/qa/TL0_FW_iterators/test_pytorch.sh
@@ -19,7 +19,7 @@ test_body() {
                 --workers 3 --prefetch 2 -i 2 --epochs 2 --fp16
         done
     fi
-    ${python_invoke_test} -m '(?:^|[\b_\./-])[Tt]est.*pytorch*' test_fw_iterators_detection.py
+    ${python_new_invoke_test} -A 'pytorch' test_fw_iterators_detection
     ${python_new_invoke_test} -A 'pytorch' test_fw_iterators
 }
 
diff --git a/qa/TL0_cpu_only/test_nofw.sh b/qa/TL0_cpu_only/test_nofw.sh
index a7bc1051b1f..ad4ba417c89 100755
--- a/qa/TL0_cpu_only/test_nofw.sh
+++ b/qa/TL0_cpu_only/test_nofw.sh
@@ -33,9 +33,9 @@ test_body() {
     "$FULLPATH" --gtest_filter="*CpuOnly*:*CApi*/0.*-*0.UseCopyKernel:*ForceNoCopyFail:*daliOutputCopySamples"
   done
   if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
-    ${python_invoke_test} --attr '!pytorch' test_dali_cpu_only.py
+    ${python_new_invoke_test} -A '!pytorch' test_dali_cpu_only
   else
-    ${python_invoke_test} --attr '!pytorch,!numba' test_dali_cpu_only.py
+    ${python_new_invoke_test} -A '!pytorch,!numba' test_dali_cpu_only
   fi
 }
 
diff --git a/qa/TL0_cpu_only/test_pytorch.sh b/qa/TL0_cpu_only/test_pytorch.sh
index 7fea55ceac6..adff0a6944e 100755
--- a/qa/TL0_cpu_only/test_pytorch.sh
+++ b/qa/TL0_cpu_only/test_pytorch.sh
@@ -8,7 +8,7 @@ test_body() {
   # CPU only test, remove CUDA from the search path just in case
   export LD_LIBRARY_PATH=""
   export PATH=${PATH/cuda/}
-  ${python_invoke_test} --attr 'pytorch' test_dali_cpu_only.py
+  ${python_new_invoke_test} -A 'pytorch' test_dali_cpu_only
 }
 
 pushd ../..
diff --git a/qa/TL0_cpu_only/test_tf.sh b/qa/TL0_cpu_only/test_tf.sh
index 75c7721ac0d..a0abd099d92 100755
--- a/qa/TL0_cpu_only/test_tf.sh
+++ b/qa/TL0_cpu_only/test_tf.sh
@@ -11,8 +11,8 @@ test_body() {
     # CPU only test, remove CUDA from the search path just in case
     export LD_LIBRARY_PATH=""
     export PATH=${PATH/cuda/}
-    ${python_invoke_test} test_dali_tf_plugin_cpu_only.py
-    ${python_invoke_test} test_dali_tf_plugin_cpu_only_dataset.py
+    ${python_new_invoke_test} test_dali_tf_plugin_cpu_only
+    ${python_new_invoke_test} test_dali_tf_plugin_cpu_only_dataset
   fi
 }
 
diff --git a/qa/TL0_jupyter/test_nofw.sh b/qa/TL0_jupyter/test_nofw.sh
index bc4685cf507..7afe37c1af9 100755
--- a/qa/TL0_jupyter/test_nofw.sh
+++ b/qa/TL0_jupyter/test_nofw.sh
@@ -18,7 +18,7 @@ test_body() {
     # test all jupyters except one related to a particular FW,
     # and one requiring a dedicated HW (multiGPU, GDS and OF)
     # optical flow requires TU102 architecture whilst this test can be run on any GPU
-    exclude_files="multigpu\|mxnet\|tensorflow\|pytorch\|paddle\|jax\|external_input.ipynb\|numpy_reader.ipynb\|webdataset-externalsource.ipynb\|optical_flow\|python_operator\|#"
+    exclude_files="multigpu\|tensorflow\|pytorch\|paddle\|jax\|external_input.ipynb\|numpy_reader.ipynb\|webdataset-externalsource.ipynb\|optical_flow\|python_operator\|#"
 
     find * -name "*.ipynb" | sed "/${exclude_files}/d" | xargs -i jupyter nbconvert \
                     --to notebook --inplace --execute \
diff --git a/qa/TL0_multigpu/test_body.sh b/qa/TL0_multigpu/test_body.sh
index def1003325c..ba913e4eb20 100644
--- a/qa/TL0_multigpu/test_body.sh
+++ b/qa/TL0_multigpu/test_body.sh
@@ -7,7 +7,7 @@ test_py_with_framework() {
 
 test_py() {
   ${python_new_invoke_test} -s decoder -A 'multi_gpu'
-  ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba,multi_gpu' -s experimental_mode
+  ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,multi_gpu' -s experimental_mode
 }
 
 test_gtest() {
@@ -37,12 +37,12 @@ test_gtest() {
 }
 
 test_cupy() {
-    ${python_invoke_test} --attr 'multigpu' test_external_source_cupy.py
+    ${python_new_invoke_test} -A 'multigpu' test_external_source_cupy
 }
 
 
 test_pytorch() {
-    ${python_invoke_test} --attr 'multigpu' test_external_source_pytorch_gpu.py
+    ${python_new_invoke_test} -A 'multigpu' test_external_source_pytorch_gpu
     ${python_new_invoke_test} -A 'pytorch,multi_gpu' -s experimental_mode
 }
 
diff --git a/qa/TL0_plugin_manager/test.sh b/qa/TL0_plugin_manager/test.sh
index 49b9c426fde..57bfe75af06 100755
--- a/qa/TL0_plugin_manager/test.sh
+++ b/qa/TL0_plugin_manager/test.sh
@@ -12,7 +12,7 @@ if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
 fi
 
 test_body() {
-    ${python_invoke_test} test_plugin_manager.py
+    ${python_new_invoke_test} test_plugin_manager
 }
 
 pushd ../..
diff --git a/qa/TL0_python-self-test-core/test_body.sh b/qa/TL0_python-self-test-core/test_body.sh
index dd71576860c..0e4a2c30ab4 100644
--- a/qa/TL0_python-self-test-core/test_body.sh
+++ b/qa/TL0_python-self-test-core/test_body.sh
@@ -7,7 +7,7 @@ test_different_numpy_versions() {
     for test_script in $(ls test_pipeline.py \
                             test_pipeline_decorator.py \
                             test_pipeline_segmentation.py); do
-        ${python_invoke_test} ${test_script}
+        ${python_new_invoke_test} ${test_script%.py}
     done
 }
 
@@ -23,9 +23,9 @@ test_py_with_framework() {
                             test_functional_api.py \
                             test_external_source_impl_utils.py); do
         if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
-            ${python_invoke_test} --attr "!slow,!pytorch,!mxnet,!cupy" ${test_script}
+            ${python_new_invoke_test} -A "!slow,!pytorch,!cupy" ${test_script%.py}
         else
-            ${python_invoke_test} --attr "!slow,!pytorch,!mxnet,!cupy,!numba" ${test_script}
+            ${python_new_invoke_test} -A "!slow,!pytorch,!cupy,!numba" ${test_script%.py}
         fi
     done
 
@@ -35,7 +35,7 @@ test_py_with_framework() {
         ${python_new_invoke_test} -A "!slow,!pytorch,!mxnet,!cupy,!numba" test_dali_variable_batch_size
     fi
 
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy' test_backend_impl
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy' test_backend_impl
 
     if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
         ${python_new_invoke_test} -A 'numba' -s type_annotations
@@ -74,7 +74,7 @@ test_experimental_mode_torch() {
 }
 
 test_pytorch() {
-    ${python_invoke_test} --attr '!slow,pytorch' test_dali_variable_batch_size.py
+    ${python_new_invoke_test} -A '!slow,pytorch' test_dali_variable_batch_size
     test_experimental_mode_torch
     if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
         ${python_new_invoke_test} -A 'pytorch' -s type_annotations
@@ -87,18 +87,18 @@ test_pytorch() {
 
 test_checkpointing() {
     if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
-        ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba' checkpointing.test_dali_checkpointing
-        ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba' checkpointing.test_dali_stateless_operators
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' checkpointing.test_dali_checkpointing
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' checkpointing.test_dali_stateless_operators
     else
-        ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba,!sanitizer_skip' checkpointing.test_dali_checkpointing
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,!sanitizer_skip' checkpointing.test_dali_checkpointing
 
         # External source tests are slow and Python-side mostly, but let's run just one of them
-        ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba' checkpointing.test_dali_checkpointing.test_external_source_checkpointing:1
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' checkpointing.test_dali_checkpointing.test_external_source_checkpointing.1
     fi
 }
 
 test_experimental_mode() {
-    ${python_new_invoke_test}  -A '!slow,!pytorch,!mxnet,!cupy,!numba' -s experimental_mode
+    ${python_new_invoke_test}  -A '!slow,!pytorch,!cupy,!numba' -s experimental_mode
 }
 
 
diff --git a/qa/TL0_python-self-test-readers-decoders/test_body.sh b/qa/TL0_python-self-test-readers-decoders/test_body.sh
index f36881f8b03..5af20221c18 100644
--- a/qa/TL0_python-self-test-readers-decoders/test_body.sh
+++ b/qa/TL0_python-self-test-readers-decoders/test_body.sh
@@ -14,8 +14,13 @@ test_py_with_framework() {
       test_pool.py test_external_source_parallel.py test_external_source_parallel_shared_batch.py \
       test_external_source_parallel_large_sample.py \
       | sed "/$FILTER_PATTERN/d"); do
-        ${python_invoke_test} --attr '!slow,!pytorch,!mxnet,!cupy,!numba' ${test_script}
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' ${test_script%.py}
     done
+    # run this test explicitly as it needs not GPU context in the process
+    if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' test_external_source_parallel.TestParallelFork._test_parallel_fork_cpu_only
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' test_external_source_parallel_custom_serialization._test_no_pickling_in_forking_mode
+    fi
 
 
     if [ -n "$DALI_ENABLE_SANITIZERS" ]; then
diff --git a/qa/TL0_python-self-test_tegra/test_body.sh b/qa/TL0_python-self-test_tegra/test_body.sh
index 0b7500e1ff2..2dfe1ed8af0 100644
--- a/qa/TL0_python-self-test_tegra/test_body.sh
+++ b/qa/TL0_python-self-test_tegra/test_body.sh
@@ -19,19 +19,15 @@ test_py_with_framework() {
         for exclude in "${EXCLUDE_PACKAGES[@]}"; do
             grep -qiE ${exclude} ${test_script} && status=$((status+1))
         done
-        # if nose2 is used isnide the test use it
-        if grep -qiE "nose2" ${test_script}; then
-            PYTHON_TEST_CMD=${python_new_invoke_test}
-            test_script=${test_script/.py/}
-        else
-            PYTHON_TEST_CMD=${python_invoke_test}
-        fi
+        # All tests now use nose2 - strip .py extension
+        test_script=${test_script%.py}
         # execute only when no matches are found
         if [ ${status} -eq 0 ]; then
-            ${PYTHON_TEST_CMD} --attr '!slow,!pytorch,!mxnet,!cupy,!numba,!scipy' ${test_script}
+            ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,!scipy' ${test_script}
         fi
     done
 
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' test_external_source_parallel.TestParallelFork._test_parallel_fork_cpu_only
 
     XAVIER_OPERATOR_1_TESTS=""
     for test_script in $(ls operator_1/test_*.py); do
@@ -75,9 +71,9 @@ test_py_with_framework() {
         fi
     done
 
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba,!scipy' -s operator_1 $XAVIER_OPERATOR_1_TESTS
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba,!scipy' -s operator_2 $XAVIER_OPERATOR_2_TESTS
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba,!scipy' -s reader $XAVIER_READER_TESTS
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,!scipy' -s operator_1 $XAVIER_OPERATOR_1_TESTS
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,!scipy' -s operator_2 $XAVIER_OPERATOR_2_TESTS
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,!scipy' -s reader $XAVIER_READER_TESTS
 }
 
 test_py() {
diff --git a/qa/TL0_python_self_test_frameworks/test_cupy.sh b/qa/TL0_python_self_test_frameworks/test_cupy.sh
index 9d818fba357..fea0ba52859 100755
--- a/qa/TL0_python_self_test_frameworks/test_cupy.sh
+++ b/qa/TL0_python_self_test_frameworks/test_cupy.sh
@@ -4,12 +4,12 @@ pip_packages='${python_test_runner_package} numpy cupy pycuda'
 target_dir=./dali/test/python
 
 test_body() {
-    ${python_invoke_test} -m '(?:^|[\b_\./-])[Tt]est.*cupy' test_dltensor_operator.py
-    ${python_invoke_test} test_gpu_python_function_operator.py
-    ${python_invoke_test} test_backend_impl_gpu.py
-    ${python_invoke_test} test_external_source_cupy.py
-    ${python_invoke_test} --attr 'cupy' test_external_source_impl_utils.py
-    ${python_invoke_test} --attr 'cupy' test_pipeline_debug.py
+    ${python_new_invoke_test} -A 'cupy' test_dltensor_operator
+    ${python_new_invoke_test} test_gpu_python_function_operator
+    ${python_new_invoke_test} test_backend_impl_gpu
+    ${python_new_invoke_test} test_external_source_cupy
+    ${python_new_invoke_test} -A 'cupy' test_external_source_impl_utils
+    ${python_new_invoke_test} -A 'cupy' test_pipeline_debug
     ${python_new_invoke_test} -A '!slow,cupy' checkpointing.test_dali_checkpointing
     ${python_new_invoke_test} -A '!slow,cupy' checkpointing.test_dali_stateless_operators
 }
diff --git a/qa/TL0_python_self_test_frameworks/test_pytorch.sh b/qa/TL0_python_self_test_frameworks/test_pytorch.sh
index 24c8368925e..95b42abcdd3 100755
--- a/qa/TL0_python_self_test_frameworks/test_pytorch.sh
+++ b/qa/TL0_python_self_test_frameworks/test_pytorch.sh
@@ -4,19 +4,19 @@ pip_packages='${python_test_runner_package} numpy librosa torch psutil torchvisi
 target_dir=./dali/test/python
 
 test_body() {
-    ${python_invoke_test} -m '(?:^|[\b_\./-])[Tt]est.*pytorch' test_pytorch_operator.py
-    ${python_invoke_test} -m '(?:^|[\b_\./-])[Tt]est.*pytorch' test_dltensor_operator.py
-    ${python_invoke_test} test_torch_pipeline_rnnt.py
-    ${python_invoke_test} test_external_source_pytorch_cpu.py
-    ${python_invoke_test} test_external_source_pytorch_gpu.py
-    ${python_invoke_test} test_external_source_pytorch_dlpack.py
-    ${python_invoke_test} test_external_source_parallel_pytorch.py
-    ${python_invoke_test} test_backend_impl_torch_dlpack.py
-    ${python_invoke_test} test_dali_fork_torch.py
-    ${python_invoke_test} test_copy_to_external_torch.py
-    ${python_invoke_test} --attr 'pytorch' test_external_source_impl_utils.py
-    ${python_invoke_test} --attr 'pytorch' test_pipeline_debug.py
-    ${python_invoke_test} --attr 'pytorch' test_functional_api.py
+    ${python_new_invoke_test} -A 'pytorch' test_pytorch_operator
+    ${python_new_invoke_test} -A 'pytorch' test_dltensor_operator
+    ${python_new_invoke_test} test_torch_pipeline_rnnt
+    ${python_new_invoke_test} test_external_source_pytorch_cpu
+    ${python_new_invoke_test} test_external_source_pytorch_gpu
+    ${python_new_invoke_test} test_external_source_pytorch_dlpack
+    ${python_new_invoke_test} test_external_source_parallel_pytorch
+    ${python_new_invoke_test} test_backend_impl_torch_dlpack
+    ${python_new_invoke_test} test_dali_fork_torch
+    ${python_new_invoke_test} test_copy_to_external_torch
+    ${python_new_invoke_test} -A 'pytorch' test_external_source_impl_utils
+    ${python_new_invoke_test} -A 'pytorch' test_pipeline_debug
+    ${python_new_invoke_test} -A 'pytorch' test_functional_api
     ${python_new_invoke_test} -s . test_dali_proxy
 }
 
diff --git a/qa/TL0_self_test_Ampere/test.sh b/qa/TL0_self_test_Ampere/test.sh
index 1e5b9acdbbb..25ba0acd219 100644
--- a/qa/TL0_self_test_Ampere/test.sh
+++ b/qa/TL0_self_test_Ampere/test.sh
@@ -35,7 +35,7 @@ test_body() {
   # test Optical Flow
   ${python_new_invoke_test} -s operator_1 test_optical_flow
   ${python_new_invoke_test} -s checkpointing test_dali_stateless_operators.test_optical_flow_stateless
-  ${python_invoke_test} test_dali_variable_batch_size.py:test_optical_flow
+  ${python_new_invoke_test} test_dali_variable_batch_size.test_optical_flow
 }
 
 pushd ../..
diff --git a/qa/TL0_tensorflow_plugin/test.sh b/qa/TL0_tensorflow_plugin/test.sh
index 5cfa62aac89..1e2e080cba1 100755
--- a/qa/TL0_tensorflow_plugin/test.sh
+++ b/qa/TL0_tensorflow_plugin/test.sh
@@ -19,11 +19,11 @@ test_body() {
     pip uninstall -y `pip list | grep nvidia-dali-tf-plugin | cut -d " " -f1` || true
 
     # No plugin installed, should fail
-    ${python_invoke_test} test_dali_tf_plugin.py:TestDaliTfPluginLoadFail
+    ${python_new_invoke_test} test_dali_tf_plugin.TestDaliTfPluginLoadFail
 
     # Installing "current" dali tf (built against installed TF)
     pip install ../../../nvidia_dali_tf_plugin*.tar.gz --no-build-isolation
-    ${python_invoke_test} test_dali_tf_plugin.py:TestDaliTfPluginLoadOk
+    ${python_new_invoke_test} test_dali_tf_plugin.TestDaliTfPluginLoadOk
 
     # Installing "current" dali tf (built against installed TF) - force rebuild without DALI using internal stubs
     # and then install DALI again
@@ -31,23 +31,23 @@ test_body() {
     pip uninstall -y `pip list | grep nvidia-dali | cut -d " " -f1` || true
     DALI_TF_ALWAYS_BUILD=1 pip install --no-deps ../../../nvidia_dali_tf_plugin*.tar.gz --no-build-isolation
     pip install ../../../nvidia_dali_*.whl
-    ${python_invoke_test} test_dali_tf_plugin.py:TestDaliTfPluginLoadOk
+    ${python_new_invoke_test} test_dali_tf_plugin.TestDaliTfPluginLoadOk
 
     # DALI TF run
-    ${python_invoke_test} test_dali_tf_plugin_run.py
+    ${python_new_invoke_test} test_dali_tf_plugin_run
 
     # DALI TF DATASET run
-    ${python_invoke_test} test_dali_tf_dataset.py
-    ${python_invoke_test} test_dali_tf_conditionals.py
+    ${python_new_invoke_test} test_dali_tf_dataset
+    ${python_new_invoke_test} test_dali_tf_conditionals
     ${python_new_invoke_test} checkpointing.test_dali_checkpointing_tf_plugin
     if [ -z "$DALI_ENABLE_SANITIZERS" ]; then
-        ${python_invoke_test} test_dali_tf_dataset_shape.py
-        ${python_invoke_test} test_dali_tf_dataset_eager.py
-        ${python_invoke_test} test_dali_tf_dataset_graph.py
+        ${python_new_invoke_test} test_dali_tf_dataset_shape
+        ${python_new_invoke_test} test_dali_tf_dataset_eager
+        ${python_new_invoke_test} test_dali_tf_dataset_graph
     fi
 
     # DALI TF + dynamic executor
-    ${python_invoke_test} test_dali_tf_exec2.py
+    ${python_new_invoke_test} test_dali_tf_exec2
 }
 
 pushd ../..
diff --git a/qa/TL0_tensorflow_plugin_conda/test.sh b/qa/TL0_tensorflow_plugin_conda/test.sh
index a683786d110..2e11d50c5a5 100755
--- a/qa/TL0_tensorflow_plugin_conda/test.sh
+++ b/qa/TL0_tensorflow_plugin_conda/test.sh
@@ -10,15 +10,15 @@ prolog=(enable_conda)
 epilog=(disable_conda)
 
 test_body() {
-    ${python_invoke_test} test_dali_tf_plugin.py:TestDaliTfPluginLoadOk
+    ${python_new_invoke_test} test_dali_tf_plugin.TestDaliTfPluginLoadOk
 
     # DALI TF run
-    ${python_invoke_test} test_dali_tf_plugin_run.py
+    ${python_new_invoke_test} test_dali_tf_plugin_run
 
     # DALI TF DATASET run
-    ${python_invoke_test} test_dali_tf_dataset.py
+    ${python_new_invoke_test} test_dali_tf_dataset
 
-    ${python_invoke_test} test_dali_tf_dataset_shape.py
+    ${python_new_invoke_test} test_dali_tf_dataset_shape
 }
 
 pushd ../..
diff --git a/qa/TL0_video_plugin/test.sh b/qa/TL0_video_plugin/test.sh
index 8b16451f26b..1562aa40081 100755
--- a/qa/TL0_video_plugin/test.sh
+++ b/qa/TL0_video_plugin/test.sh
@@ -28,7 +28,7 @@ test_body() {
         pip install -v ../../../nvidia_dali_video*.tar.gz --no-build-isolation
 
     # Check that the plugin can be loaded
-    ${python_invoke_test} test_dali_video_plugin.py:TestDaliVideoPluginLoadOk
+    ${python_new_invoke_test} test_dali_video_plugin.TestDaliVideoPluginLoadOk
 
     ${python_new_invoke_test} -s . test_dali_video_plugin_decoder
 }
diff --git a/qa/TL0_videoreader_test/test.sh b/qa/TL0_videoreader_test/test.sh
index 3969cb31ef5..71d8e733374 100755
--- a/qa/TL0_videoreader_test/test.sh
+++ b/qa/TL0_videoreader_test/test.sh
@@ -47,11 +47,13 @@ test_body() {
     python video_label_example.py
 
     echo $(pwd)
-    ${python_invoke_test} ../../../../dali/test/python/test_video_pipeline.py
-    ${python_invoke_test} ../../../../dali/test/python/test_video_reader_resize.py
+    pushd ../../../../dali/test/python/
 
-    cd ../../../../dali/test/python/
+    ${python_new_invoke_test} test_video_pipeline
+    ${python_new_invoke_test} test_video_reader_resize
     ${python_new_invoke_test} test_video_reader
+
+    popd
 }
 
 pushd ../..
diff --git a/qa/TL1_jupyter_conda/test_nofw.sh b/qa/TL1_jupyter_conda/test_nofw.sh
index c53d79b2838..7612339825c 100755
--- a/qa/TL1_jupyter_conda/test_nofw.sh
+++ b/qa/TL1_jupyter_conda/test_nofw.sh
@@ -24,7 +24,7 @@ test_body() {
     # test all jupyters except one related to a particular FW,
     # and one requiring a dedicated HW (multiGPU, GDS and OF)
     # optical flow requires TU102 architecture whilst this test can be run on any GPU
-    exclude_files="multigpu\|mxnet\|tensorflow\|pytorch\|paddle\|jax\|external_input.ipynb\|numpy_reader.ipynb\|webdataset-externalsource.ipynb\|optical_flow\|python_operator\|#"
+    exclude_files="multigpu\|tensorflow\|pytorch\|paddle\|jax\|external_input.ipynb\|numpy_reader.ipynb\|webdataset-externalsource.ipynb\|optical_flow\|python_operator\|#"
 
     find * -name "*.ipynb" | sed "/${exclude_files}/d" | xargs -i jupyter nbconvert \
                     --to notebook --inplace --execute \
diff --git a/qa/TL1_naive_histogram/test.sh b/qa/TL1_naive_histogram/test.sh
index 727155a97b9..1d230506185 100755
--- a/qa/TL1_naive_histogram/test.sh
+++ b/qa/TL1_naive_histogram/test.sh
@@ -12,7 +12,7 @@ do_once() {
 test_body() {
     pushd $(pwd)/docs/examples/custom_operations/custom_operator/naive_histogram
     (mkdir build && cd build && cmake .. && make -j"$(grep ^processor /proc/cpuinfo | wc -l)")
-    ${python_invoke_test} test_naive_histogram.py
+    ${python_new_invoke_test} test_naive_histogram
     popd
 }
 
diff --git a/qa/TL1_python-self-test-slow/test.sh b/qa/TL1_python-self-test-slow/test.sh
index 5fcc7129a30..5b0def1e846 100755
--- a/qa/TL1_python-self-test-slow/test.sh
+++ b/qa/TL1_python-self-test-slow/test.sh
@@ -7,7 +7,7 @@ test_body() {
     for test_script in $(ls test_pipeline.py test_pipeline_debug.py test_pipeline_debug_resnet50.py \
                             test_pipeline_decorator.py test_pipeline_multichannel.py test_pipeline_segmentation.py \
                             test_functional_api.py); do
-        ${python_invoke_test} --attr 'slow' ${test_script}
+        ${python_new_invoke_test} -A 'slow' ${test_script%.py}
     done
 
     ${python_new_invoke_test} -A "slow" test_backend_impl
diff --git a/qa/TL1_python-self-test_conda/test_body.sh b/qa/TL1_python-self-test_conda/test_body.sh
index 495d55fc32e..e926bce7b89 100644
--- a/qa/TL1_python-self-test_conda/test_body.sh
+++ b/qa/TL1_python-self-test_conda/test_body.sh
@@ -2,15 +2,15 @@
 
 test_py_with_framework() {
     for test_script in $(ls test_pipeline*.py test_external_source_dali.py test_external_source_numpy.py test_external_source_parallel_garbage_collection_order.py test_functional_api.py); do
-        ${python_invoke_test} --attr '!slow,!pytorch,!mxnet,!cupy,!numba' ${test_script}
+        ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' ${test_script%.py}
     done
 
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy' test_backend_impl
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy' test_backend_impl
 
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba' -s operator_1
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba' -s operator_2
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba' -s reader
-    ${python_new_invoke_test} -A '!slow,!pytorch,!mxnet,!cupy,!numba,!jpeg_scans_limit' -s decoder
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' -s operator_1
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' -s operator_2
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba' -s reader
+    ${python_new_invoke_test} -A '!slow,!pytorch,!cupy,!numba,!jpeg_scans_limit' -s decoder
 }
 
 test_jpeg_scan_limit() {
diff --git a/qa/TL1_tensorflow_dataset/test_impl.sh b/qa/TL1_tensorflow_dataset/test_impl.sh
index 0b01d50ed9f..c8d3c90f652 100755
--- a/qa/TL1_tensorflow_dataset/test_impl.sh
+++ b/qa/TL1_tensorflow_dataset/test_impl.sh
@@ -13,13 +13,13 @@ test_body() {
     is_compatible=$(python -c 'import nvidia.dali.plugin.tf as dali_tf; print(dali_tf.dataset_compatible_tensorflow())')
     if [ $is_compatible = 'True' ]; then
         # DALI TF DATASET run
-        ${python_invoke_test} test_dali_tf_dataset_graph.py:_test_tf_dataset_other_gpu
-        ${python_invoke_test} test_dali_tf_dataset_graph.py:_test_tf_dataset_multigpu_manual_placement
-        ${python_invoke_test} test_dali_tf_dataset_eager.py:_test_tf_dataset_other_gpu
-        ${python_invoke_test} test_dali_tf_dataset_eager.py:_test_tf_dataset_multigpu_manual_placement
-        ${python_invoke_test} test_dali_tf_dataset_eager.py:_test_tf_dataset_multigpu_mirrored_strategy
-        ${python_invoke_test} test_dali_tf_dataset_mnist_eager.py
-        ${python_invoke_test} test_dali_tf_dataset_mnist_graph.py
+        ${python_new_invoke_test} test_dali_tf_dataset_graph._test_tf_dataset_other_gpu
+        ${python_new_invoke_test} test_dali_tf_dataset_graph._test_tf_dataset_multigpu_manual_placement
+        ${python_new_invoke_test} test_dali_tf_dataset_eager._test_tf_dataset_other_gpu
+        ${python_new_invoke_test} test_dali_tf_dataset_eager._test_tf_dataset_multigpu_manual_placement
+        ${python_new_invoke_test} test_dali_tf_dataset_eager._test_tf_dataset_multigpu_mirrored_strategy
+        ${python_new_invoke_test} test_dali_tf_dataset_mnist_eager
+        ${python_new_invoke_test} test_dali_tf_dataset_mnist_graph
 
         # DALI TF Notebooks run
         pushd ../../../docs/examples/frameworks/tensorflow/
diff --git a/qa/TL1_tensorflow_plugin/test.sh b/qa/TL1_tensorflow_plugin/test.sh
index 60cdeba6636..dbab1909960 100755
--- a/qa/TL1_tensorflow_plugin/test.sh
+++ b/qa/TL1_tensorflow_plugin/test.sh
@@ -9,16 +9,16 @@ test_body() {
 
 
     # No plugin installed, should fail
-    ${python_invoke_test} test_dali_tf_plugin.py:TestDaliTfPluginLoadFail
+    ${python_new_invoke_test} test_dali_tf_plugin.TestDaliTfPluginLoadFail
 
     # Remove the old and installing "current" dali tf (built against installed TF)
     pip uninstall -y `pip list | grep nvidia-dali-tf-plugin | cut -d " " -f1` || true
 
     pip install --upgrade ../../../nvidia_dali_tf_plugin*.tar.gz --no-build-isolation
-    ${python_invoke_test} test_dali_tf_plugin.py:TestDaliTfPluginLoadOk
+    ${python_new_invoke_test} test_dali_tf_plugin.TestDaliTfPluginLoadOk
 
     # DALI TF run
-    ${python_invoke_test} test_dali_tf_plugin_run.py
+    ${python_new_invoke_test} test_dali_tf_plugin_run
 }
 
 pushd ../..
diff --git a/qa/nose_wrapper/__main__.py b/qa/nose_wrapper/__main__.py
deleted file mode 100644
index 1d5eb1977f0..00000000000
--- a/qa/nose_wrapper/__main__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import sys
-
-# before running the test we add dali/test/python to the python path
-import nose_utils  # noqa:F401  - for Python 3.10
-from nose.core import run_exit
-import inspect
-
-if sys.version_info >= (3, 11):
-
-    def legacy_getargspec(fun):
-        args, varargs, varkw, defaults, *_ = inspect.getfullargspec(fun)
-        return (args, varargs, varkw, defaults)
-
-    inspect.getargspec = legacy_getargspec
-
-if sys.argv[0].endswith("__main__.py"):
-    sys.argv[0] = "%s -m nose_wrapper" % sys.executable
-
-run_exit()
diff --git a/qa/test_template_impl.sh b/qa/test_template_impl.sh
index 5229870f9cd..4047c44e6e2 100755
--- a/qa/test_template_impl.sh
+++ b/qa/test_template_impl.sh
@@ -17,20 +17,13 @@ source $topdir/qa/setup_test_common.sh
 
 # Set runner for python tests
 export PYTHONPATH=${PYTHONPATH}:$topdir/qa:$topdir/dali/test/python
-python_test_runner_package="nose nose2 nose-timer nose2-test-timer"
-# use DALI nose wrapper to patch nose to support Python 3.10
-python_test_runner="python -m nose_wrapper"
-python_test_args="--verbose --with-timer --timer-top-n 20 -s"
-python_invoke_test="${python_test_runner} ${python_test_args}"
-
-# New framework for Python Tests
-# During the transition we run both
-# When all tests are ported old will be removed
+python_test_runner_package="nose2 nose2-test-timer"
+# Python test runner (nose2)
 python_new_test_runner="python -m nose2"
 python_new_test_args="--verbose --plugin=nose2_test_timer.plugin --with-timer --timer-color --timer-top-n 20"
 python_new_invoke_test="${python_new_test_runner} ${python_new_test_args}"
 
-# Set proper CUDA version for packages, like MXNet, requiring it
+# Set proper CUDA version for packages, requiring it
 pip_packages=$(eval "echo \"${pip_packages}\"" | sed "s/##CUDA_VERSION##/${CUDA_VERSION}/")
 last_config_index=$($topdir/qa/setup_packages.py -n -u $pip_packages --cuda ${CUDA_VERSION})