From 6b718077ddba0a61b3ec7084e64e1c186fcbd4bd Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Fri, 24 Jan 2025 14:05:56 +0200
Subject: [PATCH 01/13] Use global command queues

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/backend/include/zero_pipeline.hpp     | 15 +++-
 .../src/backend/src/zero_pipeline.cpp         | 88 ++++++++++++++++---
 .../include/intel_npu/common/igraph.hpp       |  7 +-
 .../intel_npu/src/common/src/igraph.cpp       | 34 +++----
 .../include/ze_graph_ext_wrappers.hpp         |  4 +-
 .../src/compiler_adapter/src/driver_graph.cpp | 17 +---
 .../src/compiler_adapter/src/plugin_graph.cpp | 17 +---
 .../src/ze_graph_ext_wrappers.cpp             |  9 +-
 .../intel_npu/utils/zero/zero_utils.hpp       | 72 +++++++++++++++
 .../intel_npu/utils/zero/zero_wrappers.hpp    | 32 ++++++-
 .../src/utils/src/zero/zero_wrappers.cpp      | 66 ++++++++++++--
 11 files changed, 271 insertions(+), 90 deletions(-)
diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
index 6baabc55b435ce..ae5783d281f3d5 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -27,7 +27,7 @@ struct Pipeline {
 
     Pipeline(const Pipeline&) = delete;
     Pipeline& operator=(const Pipeline&) = delete;
-    virtual ~Pipeline() = default;
+    ~Pipeline();
 
     void push();
     void pull();
@@ -40,6 +40,9 @@ struct Pipeline {
     void closeCommandListIndex(size_t command_list_index);
 
 protected:
+    void getCommandQueue();
+
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
     std::shared_ptr<IGraph> _graph;
     const Config _config;
     const uint32_t _id;
@@ -54,14 +57,22 @@ struct Pipeline {
      */
     size_t _number_of_command_lists;
 
+    CommandQueueFactory _command_queue_factory;
     std::shared_ptr<CommandQueue> _command_queue;
     std::vector<std::unique_ptr<CommandList>> _command_lists;
     std::vector<std::unique_ptr<Fence>> _fences;
     std::shared_ptr<EventPool> _event_pool;
     std::vector<std::shared_ptr<Event>> _events;
-    bool sync_output_with_fences_ = true;
+    bool _sync_output_with_fences = true;
     std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
     Logger _logger;
+
+    uint32_t _group_ordinal;
+    bool _fences_are_created = false;
+    std::mutex _mutex;
+    bool _turbo = false;
+    ze_command_queue_priority_t _ze_queue_priority;
+    std::optional<ze_command_queue_workload_type_t> _ze_workload_type = std::nullopt;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index 9f55897193aeeb..7ce0d636d255a3 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -26,7 +26,8 @@ Pipeline::Pipeline(const Config& config,
                    const std::vector<std::vector<std::shared_ptr<ov::ITensor>>>& input_tensors,
                    const std::vector<std::shared_ptr<ov::ITensor>>& output_tensors,
                    uint32_t group_ordinal)
-    : _graph(graph),
+    : _init_structs(init_structs),
+      _graph(graph),
       _config(config),
       _id(_graph->get_unique_id()),
       _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1),
@@ -35,7 +36,8 @@ Pipeline::Pipeline(const Config& config,
                                       init_structs->getContext(),
                                       _number_of_command_lists ? static_cast<uint32_t>(_number_of_command_lists) : 1)},
       _npu_profiling(npu_profiling),
-      _logger("Pipeline", _config.get<LOG_LEVEL>()) {
+      _logger("Pipeline", _config.get<LOG_LEVEL>()),
+      _group_ordinal(group_ordinal) {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline");
     _logger.debug("Pipeline - initialize started");
 
@@ -43,9 +45,15 @@ Pipeline::Pipeline(const Config& config,
         profiling_query.create(profiling_pool._handle);
     }
 
+    if (_config.has<TURBO>()) {
+        _turbo = _config.get<TURBO>();
+    }
+
+    _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get<MODEL_PRIORITY>());
+
     _command_lists.reserve(_number_of_command_lists);
     _events.reserve(_number_of_command_lists);
-    _fences.reserve(_number_of_command_lists);
+    _fences.resize(_number_of_command_lists);
     _logger.debug("Pipeline - emplace_back _event_pool and _command_queue");
     for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.emplace_back(
@@ -53,7 +61,6 @@ Pipeline::Pipeline(const Config& config,
                                           group_ordinal,
                                           init_structs->getMutableCommandListVersion() ? true : false));
         _events.emplace_back(std::make_shared<Event>(_event_pool, static_cast<uint32_t>(i)));
-        _fences.emplace_back(std::make_unique<Fence>(*_graph->get_command_queue()));
     }
 
     for (size_t i = 0; i < _number_of_command_lists; i++) {
@@ -138,7 +145,7 @@ Pipeline::Pipeline(const Config& config,
         }
 
         // appendBarrier used in L0 as well
-        if (!sync_output_with_fences_) {
+        if (!_sync_output_with_fences) {
             _command_lists.at(i)->appendBarrier();
             _events.at(i)->AppendSignalEvent(*_command_lists.at(i));
         }
@@ -147,9 +154,54 @@ Pipeline::Pipeline(const Config& config,
     _logger.debug("Pipeline - initialize completed");
 }
 
+void Pipeline::getCommandQueue() {
+    _logger.debug("Pipeline - getCommandQueue() started");
+    std::lock_guard<std::mutex> lock(_mutex);
+
+    _command_queue = _command_queue_factory.getCommandQueue(_init_structs,
+                                                            _ze_queue_priority,
+                                                            _graph->get_ze_workload_type(),
+                                                            _group_ordinal,
+                                                            _turbo);
+
+    if (_ze_workload_type != _graph->get_ze_workload_type()) {
+        if (_ze_workload_type.has_value()) {
+            // fences created for the old command queue shall be destroyed and make new ones
+            if (_sync_output_with_fences) {
+                _logger.debug("Pipeline - getCommandQueue() - destroy old fences");
+                for (size_t i = 0; i < _number_of_command_lists; i++) {
+                    _fences[i].reset();
+                }
+            }
+
+            _logger.debug("Pipeline - getCommandQueue() - free command queue");
+            _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+
+            _fences_are_created = false;
+        }
+
+        _ze_workload_type = _graph->get_ze_workload_type();
+    }
+
+    if (!_fences_are_created) {
+        if (_sync_output_with_fences) {
+            _logger.debug("Pipeline - getCommandQueue() - create new fences");
+            for (size_t i = 0; i < _number_of_command_lists; i++) {
+                _fences[i] = std::make_unique<Fence>(*_command_queue);
+            }
+        }
+
+        _fences_are_created = true;
+    }
+
+    _logger.debug("Pipeline - getCommandQueue() completed");
+}
+
 void Pipeline::push() {
     _logger.debug("Pipeline - push() started");
 
+    getCommandQueue();
+
     if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
         if (_id) {
             auto previousIndex = _graph->get_last_submitted_id();
@@ -164,10 +216,10 @@ void Pipeline::push() {
 
     for (size_t i = 0; i < _command_lists.size(); ++i) {
         OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push");
-        if (sync_output_with_fences_) {
-            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i), *_fences.at(i));
+        if (_sync_output_with_fences) {
+            _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i));
         } else {
-            _graph->get_command_queue()->executeCommandList(*_command_lists.at(i));
+            _command_queue->executeCommandList(*_command_lists.at(i));
         }
     }
 
@@ -179,7 +231,7 @@ void Pipeline::pull() {
     OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull");
 
     for (size_t i = 0; i < _command_lists.size(); ++i) {
-        if (sync_output_with_fences_) {
+        if (_sync_output_with_fences) {
             _fences.at(i)->hostSynchronize();
         } else {
             _events.at(i)->hostSynchronize();
@@ -194,17 +246,17 @@ void Pipeline::pull() {
 };
 
 void Pipeline::reset() const {
-    _logger.debug("Pipeline - rest() started");
+    _logger.debug("Pipeline - reset() started");
 
     for (size_t i = 0; i < _command_lists.size(); ++i) {
-        if (sync_output_with_fences_) {
+        if (_sync_output_with_fences) {
             _fences.at(i)->reset();
         } else {
             _events.at(i)->reset();
         }
     }
 
-    _logger.debug("Pipeline - rest() completed");
+    _logger.debug("Pipeline - reset() completed");
 };
 
 void Pipeline::updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size) {
@@ -257,4 +309,16 @@ void Pipeline::closeCommandListIndex(size_t command_list_index) {
     _command_lists.at(command_list_index)->close();
 };
 
+Pipeline::~Pipeline() {
+    // fences shall be destroyed before the command queue is destroyed
+    if (_sync_output_with_fences) {
+        for (size_t i = 0; i < _number_of_command_lists; i++) {
+            _fences[i].reset();
+        }
+    }
+
+    _command_queue.reset();
+    _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+}
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
index ec4d7091ac6345..8b4124bcfc6154 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
@@ -42,9 +42,8 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
 
     const std::vector<ArgumentDescriptor>& get_input_descriptors() const;
     const std::vector<ArgumentDescriptor>& get_output_descriptors() const;
-    const std::shared_ptr<CommandQueue>& get_command_queue() const;
 
-    void set_workload_type(const ov::WorkloadType workloadType) const;
+    void set_workload_type(const ov::WorkloadType workloadType);
 
     std::mutex& get_mutex();
 
@@ -56,6 +55,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     uint32_t get_last_submitted_id() const;
 
     const std::optional<std::size_t> get_batch_size() const;
+    const std::optional<ze_command_queue_workload_type_t> get_ze_workload_type() const;
 
 protected:
     /**
@@ -83,7 +83,6 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     std::vector<ArgumentDescriptor> _input_descriptors;
     std::vector<ArgumentDescriptor> _output_descriptors;
 
-    std::shared_ptr<CommandQueue> _command_queue;
     std::vector<std::shared_ptr<Event>> _last_submitted_event;
 
     // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the
@@ -101,6 +100,8 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
      */
     std::optional<std::size_t> _batch_size = std::nullopt;
 
+    std::optional<ze_command_queue_workload_type_t> _ze_workload_type = std::nullopt;
+
     Logger _logger;
 };
 
diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp
index f641813e44c0e7..ce54b53ea20432 100644
--- a/src/plugins/intel_npu/src/common/src/igraph.cpp
+++ b/src/plugins/intel_npu/src/common/src/igraph.cpp
@@ -21,7 +21,11 @@ IGraph::IGraph(ze_graph_handle_t handle,
     : _handle(handle),
       _metadata(std::move(metadata)),
       _blobPtr(std::move(blobPtr)),
-      _logger("IGraph", config.get<LOG_LEVEL>()) {}
+      _logger("IGraph", config.get<LOG_LEVEL>()) {
+    if (config.has<WORKLOAD_TYPE>()) {
+        set_workload_type(config.get<WORKLOAD_TYPE>());
+    }
+}
 
 const NetworkMetadata& IGraph::get_metadata() const {
     return _metadata;
@@ -43,28 +47,8 @@ const std::vector<ArgumentDescriptor>& IGraph::get_output_descriptors() const {
     return _output_descriptors;
 }
 
-const std::shared_ptr<CommandQueue>& IGraph::get_command_queue() const {
-    return _command_queue;
-}
-
-void IGraph::set_workload_type(const ov::WorkloadType workloadType) const {
-    if (_command_queue == nullptr) {
-        return;
-    }
-
-    ze_command_queue_workload_type_t zeWorkloadType;
-    switch (workloadType) {
-    case ov::WorkloadType::DEFAULT:
-        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT;
-        break;
-    case ov::WorkloadType::EFFICIENT:
-        zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND;
-        break;
-    default:
-        OPENVINO_THROW("Unknown value for WorkloadType!");
-    }
-
-    _command_queue->setWorkloadType(zeWorkloadType);
+void IGraph::set_workload_type(const ov::WorkloadType workloadType) {
+    _ze_workload_type = zeroUtils::toZeQueueWorkloadType(workloadType);
 }
 
 std::mutex& IGraph::get_mutex() {
@@ -153,4 +137,8 @@ const std::optional<std::size_t> IGraph::get_batch_size() const {
     return _batch_size;
 }
 
+const std::optional<ze_command_queue_workload_type_t> IGraph::get_ze_workload_type() const {
+    return _ze_workload_type;
+}
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
index df538521d856f1..a9290d296822c7 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
@@ -48,7 +48,7 @@ class ZeGraphExtWrappers {
 
     void setGraphArgumentValue(ze_graph_handle_t graphHandle, uint32_t argi_, const void* argv) const;
 
-    void initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const;
+    void initializeGraph(ze_graph_handle_t graphHandle) const;
 
 private:
     std::unordered_set<std::string> getQueryResultFromSupportedLayers(
@@ -60,7 +60,7 @@ class ZeGraphExtWrappers {
                      std::vector<IODescriptor>& inputs,
                      std::vector<IODescriptor>& outputs) const;
 
-    void initialize_graph_through_command_list(ze_graph_handle_t graphHandle, const Config& config) const;
+    void initialize_graph_through_command_list(ze_graph_handle_t graphHandle) const;
 
     std::shared_ptr<ZeroInitStructsHolder> _zeroInitStruct;
     uint32_t _graphExtVersion;
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
index 48ae84a6c841ea..97f77ca644dc08 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
@@ -103,23 +103,8 @@ void DriverGraph::initialize(const Config& config) {
     deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
     THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties",
                                 zeDeviceGetProperties(_zeroInitStruct->getDevice(), &deviceProperties));
-    auto groupOrdinal = zeroUtils::findGroupOrdinal(_zeroInitStruct->getDevice(), deviceProperties);
 
-    bool turbo = false;
-    if (config.has<TURBO>()) {
-        turbo = config.get<TURBO>();
-    }
-
-    _command_queue = std::make_shared<CommandQueue>(_zeroInitStruct,
-                                                    zeroUtils::toZeQueuePriority(config.get<MODEL_PRIORITY>()),
-                                                    groupOrdinal,
-                                                    turbo);
-
-    if (config.has<WORKLOAD_TYPE>()) {
-        set_workload_type(config.get<WORKLOAD_TYPE>());
-    }
-
-    _zeGraphExt->initializeGraph(_handle, config);
+    _zeGraphExt->initializeGraph(_handle);
 
     _logger.debug("Graph initialize finish");
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
index 726a1196b7c88b..3c491fd81fb1c8 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp
@@ -103,23 +103,8 @@ void PluginGraph::initialize(const Config& config) {
     deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
     THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties",
                                 zeDeviceGetProperties(_zeroInitStruct->getDevice(), &deviceProperties));
-    auto groupOrdinal = zeroUtils::findGroupOrdinal(_zeroInitStruct->getDevice(), deviceProperties);
 
-    bool turbo = false;
-    if (config.has<TURBO>()) {
-        turbo = config.get<TURBO>();
-    }
-
-    _command_queue = std::make_shared<CommandQueue>(_zeroInitStruct,
-                                                    zeroUtils::toZeQueuePriority(config.get<MODEL_PRIORITY>()),
-                                                    groupOrdinal,
-                                                    turbo);
-
-    if (config.has<WORKLOAD_TYPE>()) {
-        set_workload_type(config.get<WORKLOAD_TYPE>());
-    }
-
-    _zeGraphExt->initializeGraph(_handle, config);
+    _zeGraphExt->initializeGraph(_handle);
 
     if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
         _batch_size = get_batch_size(_metadata);
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
index d5e793d4fff9fe..0a13cc075be601 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
@@ -160,10 +160,10 @@ void ZeGraphExtWrappers::setGraphArgumentValue(ze_graph_handle_t graphHandle, ui
     THROW_ON_FAIL_FOR_LEVELZERO_EXT("zeGraphSetArgumentValue", result, _zeroInitStruct->getGraphDdiTable());
 }
 
-void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const {
+void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle) const {
     if (_zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8) {
         _logger.debug("Use initialize_graph_through_command_list for ext version smaller than 1.8");
-        initialize_graph_through_command_list(graphHandle, config);
+        initialize_graph_through_command_list(graphHandle);
     } else {
         _logger.debug("Initialize graph based on graph properties for ext version larger than 1.8");
         ze_graph_properties_2_t properties = {};
@@ -177,13 +177,12 @@ void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle, const Co
         }
 
         if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) {
-            initialize_graph_through_command_list(graphHandle, config);
+            initialize_graph_through_command_list(graphHandle);
         }
     }
 }
 
-void ZeGraphExtWrappers::initialize_graph_through_command_list(ze_graph_handle_t graphHandle,
-                                                               const Config& config) const {
+void ZeGraphExtWrappers::initialize_graph_through_command_list(ze_graph_handle_t graphHandle) const {
     ze_device_properties_t deviceProperties = {};
     deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
     THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties",
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
index e68eb0200a09ce..8af0dcd2e1d9a3 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp
@@ -8,6 +8,8 @@
 #include <ze_api.h>
 #include <ze_graph_ext.h>
 
+#include <optional>
+
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_api.hpp"
 #include "intel_npu/utils/zero/zero_result.hpp"
@@ -15,6 +17,29 @@
 
 namespace intel_npu {
 
+enum priority {
+    NORMAL,
+    LOW,
+    HIGH,
+
+    PRIORITY_COUNT
+};
+
+enum turbo {
+    DISABLED,
+    ENABLED,
+
+    TURBO_COUNT
+};
+
+enum workload {
+    NOT_SET,
+    DEFAULT,
+    EFFICIENT,
+
+    WORKLOAD_COUNT
+};
+
 struct ArgumentDescriptor {
     ze_graph_argument_properties_3_t info;
     uint32_t idx;
@@ -50,6 +75,42 @@ namespace zeroUtils {
                        ze_result_to_description(result)); \
     }
 
+static inline priority toPriorityEnum(const ze_command_queue_priority_t& val) {
+    switch (val) {
+    case ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW:
+        return priority::LOW;
+    case ZE_COMMAND_QUEUE_PRIORITY_NORMAL:
+        return priority::NORMAL;
+    case ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH:
+        return priority::HIGH;
+    default:
+        OPENVINO_THROW("Incorrect queue priority.");
+    }
+}
+
+static inline turbo toTurboEnum(bool val) {
+    if (val) {
+        return turbo::ENABLED;
+    }
+
+    return turbo::DISABLED;
+}
+
+static inline workload toWorkloadEnum(const std::optional<ze_command_queue_workload_type_t>& val) {
+    if (!val.has_value()) {
+        return workload::NOT_SET;
+    }
+
+    switch (*val) {
+    case ZE_WORKLOAD_TYPE_DEFAULT:
+        return workload::DEFAULT;
+    case ZE_WORKLOAD_TYPE_BACKGROUND:
+        return workload::EFFICIENT;
+    default:
+        OPENVINO_THROW("Incorrect workload type.");
+    }
+}
+
 static inline ze_command_queue_priority_t toZeQueuePriority(const ov::hint::Priority& val) {
     switch (val) {
     case ov::hint::Priority::LOW:
@@ -63,6 +124,17 @@ static inline ze_command_queue_priority_t toZeQueuePriority(const ov::hint::Prio
     }
 }
 
+static inline ze_command_queue_workload_type_t toZeQueueWorkloadType(const ov::WorkloadType& val) {
+    switch (val) {
+    case ov::WorkloadType::DEFAULT:
+        return ZE_WORKLOAD_TYPE_DEFAULT;
+    case ov::WorkloadType::EFFICIENT:
+        return ZE_WORKLOAD_TYPE_BACKGROUND;
+    default:
+        OPENVINO_THROW("Unknown value for WorkloadType.");
+    }
+}
+
 static inline std::size_t precisionToSize(const ze_graph_argument_precision_t val) {
     switch (val) {
     case ZE_GRAPH_ARGUMENT_PRECISION_INT4:
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index c2041d678b0c42..a515f14c40b882 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -118,7 +118,7 @@ class Fence {
 class CommandQueue {
 public:
     CommandQueue() = delete;
-    CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+    CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                  const ze_command_queue_priority_t& priority,
                  const uint32_t& group_ordinal,
                  bool turbo = false);
@@ -129,18 +129,44 @@ class CommandQueue {
 
     void executeCommandList(CommandList& command_list) const;
     void executeCommandList(CommandList& command_list, Fence& fence) const;
-    void setWorkloadType(ze_command_queue_workload_type_t workloadType) const;
+    void setWorkloadType(ze_command_queue_workload_type_t workload_type) const;
     ~CommandQueue();
     inline ze_command_queue_handle_t handle() const {
         return _handle;
     }
 
 private:
-    std::shared_ptr<ZeroInitStructsHolder> _initStructs;
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
 
     Logger _log;
 
     ze_command_queue_handle_t _handle = nullptr;
 };
 
+static std::array<std::array<std::array<std::shared_ptr<CommandQueue>, workload::WORKLOAD_COUNT>, turbo::TURBO_COUNT>,
+                  priority::PRIORITY_COUNT>
+    _gloabal_command_queues;
+
+class CommandQueueFactory {
+public:
+    CommandQueueFactory();
+    CommandQueueFactory(const CommandQueueFactory& other) = delete;
+    CommandQueueFactory(CommandQueueFactory&& other) = delete;
+    void operator=(const CommandQueueFactory&) = delete;
+    void operator=(CommandQueueFactory&&) = delete;
+
+    std::shared_ptr<CommandQueue>& getCommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+                                                   const ze_command_queue_priority_t& priority,
+                                                   const std::optional<ze_command_queue_workload_type_t>& workloadType,
+                                                   const uint32_t& group_ordinal,
+                                                   bool turbo);
+
+    void freeCommandQueue(const ze_command_queue_priority_t& priority,
+                          const std::optional<ze_command_queue_workload_type_t>& workloadType,
+                          bool turbo);
+
+private:
+    Logger _log;
+};
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index 4868d6326c5fe4..ac998975bcb96c 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -22,6 +22,8 @@ EventPool::~EventPool() {
     if (ZE_RESULT_SUCCESS != result) {
         _log.error("zeEventPoolDestroy failed %#X", uint64_t(result));
     }
+
+    _handle = nullptr;
 }
 
 Event::Event(const std::shared_ptr<EventPool>& event_pool, uint32_t event_index)
@@ -53,6 +55,8 @@ Event::~Event() {
     if (ZE_RESULT_SUCCESS != result) {
         _log.error("zeEventDestroy failed %#X", uint64_t(result));
     }
+
+    _handle = nullptr;
 }
 
 CommandList::CommandList(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
@@ -107,6 +111,8 @@ CommandList::~CommandList() {
     if (ZE_RESULT_SUCCESS != result) {
         _log.error("zeCommandListDestroy failed %#X", uint64_t(result));
     }
+
+    _handle = nullptr;
 }
 void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_value) const {
     ze_mutable_graph_argument_exp_desc_t desc = {
@@ -128,17 +134,17 @@ void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_v
                                 zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t));
 }
 
-CommandQueue::CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+CommandQueue::CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                            const ze_command_queue_priority_t& priority,
                            const uint32_t& group_ordinal,
                            bool turbo)
-    : _initStructs(initStructs),
+    : _init_structs(init_structs),
       _log("CommandQueue", Logger::global().level()) {
     ze_command_queue_desc_t queue_desc =
         {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority};
 
     if (turbo) {
-        if (_initStructs->getCommandQueueDdiTable().version()) {
+        if (_init_structs->getCommandQueueDdiTable().version()) {
             ze_command_queue_desc_npu_ext_t turbo_cfg = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC_NPU_EXT, nullptr, turbo};
             queue_desc.pNext = &turbo_cfg;
         } else {
@@ -148,7 +154,7 @@ CommandQueue::CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& initStr
 
     THROW_ON_FAIL_FOR_LEVELZERO(
         "zeCommandQueueCreate",
-        zeCommandQueueCreate(_initStructs->getContext(), _initStructs->getDevice(), &queue_desc, &_handle));
+        zeCommandQueueCreate(_init_structs->getContext(), _init_structs->getDevice(), &queue_desc, &_handle));
 }
 void CommandQueue::executeCommandList(CommandList& command_list) const {
     THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists",
@@ -159,10 +165,11 @@ void CommandQueue::executeCommandList(CommandList& command_list, Fence& fence) c
                                 zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle()));
 }
 
-void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workloadType) const {
-    if (_initStructs->getCommandQueueDdiTable().version()) {
-        THROW_ON_FAIL_FOR_LEVELZERO("zeSetWorkloadType",
-                                    _initStructs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workloadType));
+void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workload_type) const {
+    if (_init_structs->getCommandQueueDdiTable().version()) {
+        THROW_ON_FAIL_FOR_LEVELZERO(
+            "zeSetWorkloadType",
+            _init_structs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workload_type));
     } else {
         OPENVINO_THROW("The WorkloadType property is not supported by the current Driver Version!");
     }
@@ -173,6 +180,8 @@ CommandQueue::~CommandQueue() {
     if (ZE_RESULT_SUCCESS != result) {
         _log.error("zeCommandQueueDestroy failed %#X", uint64_t(result));
     }
+
+    _handle = nullptr;
 }
 
 Fence::Fence(const CommandQueue& command_queue) : _log("Fence", Logger::global().level()) {
@@ -190,6 +199,47 @@ Fence::~Fence() {
     if (ZE_RESULT_SUCCESS != result) {
         _log.error("zeFenceDestroy failed %#X", uint64_t(result));
     }
+
+    _handle = nullptr;
+}
+
+CommandQueueFactory::CommandQueueFactory() : _log("CommandQueue", Logger::global().level()) {}
+std::shared_ptr<CommandQueue>& CommandQueueFactory::getCommandQueue(
+    const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+    const ze_command_queue_priority_t& priority,
+    const std::optional<ze_command_queue_workload_type_t>& workloadType,
+    const uint32_t& group_ordinal,
+    bool turbo) {
+    if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                               [zeroUtils::toWorkloadEnum(workloadType)] == nullptr) {
+        _log.debug("Create new command queue");
+        _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                               [zeroUtils::toWorkloadEnum(workloadType)] =
+                                   std::make_shared<CommandQueue>(init_structs, priority, group_ordinal, turbo);
+
+        if (zeroUtils::toWorkloadEnum(workloadType) != workload::NOT_SET) {
+            _log.debug("Set workload type");
+            _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                                   [zeroUtils::toWorkloadEnum(workloadType)]
+                                       ->setWorkloadType(*workloadType);
+        }
+    }
+
+    return _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                                  [zeroUtils::toWorkloadEnum(workloadType)];
+}
+void CommandQueueFactory::freeCommandQueue(const ze_command_queue_priority_t& priority,
+                                           const std::optional<ze_command_queue_workload_type_t>& workloadType,
+                                           bool turbo) {
+    if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                               [zeroUtils::toWorkloadEnum(workloadType)]
+                                   .use_count() == 1) {
+        _log.debug("Destroy command queue");
+
+        _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                               [zeroUtils::toWorkloadEnum(workloadType)]
+                                   .reset();
+    }
 }
 
 }  // namespace intel_npu

From 629f02f89ed65e45698eb7a3430cbf32638d5c51 Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Mon, 27 Jan 2025 11:26:33 +0200
Subject: [PATCH 02/13] Adding test case

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../internal/overload/compile_and_infer.hpp   | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
index e44329c5de56c8..c4388978b730e0 100644
--- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
@@ -206,6 +206,47 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeUpdateAfterCompilation
     }
 }
 
+TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeUpdateAfterCompilationWithMultipleInfers) {
+    if (isCommandQueueExtSupported()) {
+        OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+
+        auto secondCompiledModel = core->compile_model(function, target_device, configuration);
+
+        ov::InferRequest req1, req2, req3;
+        OV_ASSERT_NO_THROW(req1 = execNet.create_infer_request());
+        OV_ASSERT_NO_THROW(req3 = secondCompiledModel.create_infer_request());
+        bool isCalled = false;
+        OV_ASSERT_NO_THROW(req1.set_callback([&](std::exception_ptr exception_ptr) {
+            ASSERT_EQ(exception_ptr, nullptr);
+            isCalled = true;
+        }));
+        OV_ASSERT_NO_THROW(req1.start_async());
+        OV_ASSERT_NO_THROW(req1.wait());
+        ASSERT_TRUE(isCalled);
+
+        OV_ASSERT_NO_THROW(req3.infer());
+
+        ov::AnyMap modelConfiguration;
+        modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
+        OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
+        ASSERT_EQ(execNet.get_property(workload_type.name()).as<WorkloadType>(), WorkloadType::DEFAULT);
+        OV_ASSERT_NO_THROW(req2 = execNet.create_infer_request());
+        OV_ASSERT_NO_THROW(req2.infer());
+
+        modelConfiguration[workload_type.name()] = WorkloadType::EFFICIENT;
+        OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
+        ASSERT_EQ(execNet.get_property(workload_type.name()).as<WorkloadType>(), WorkloadType::EFFICIENT);
+        isCalled = false;
+        OV_ASSERT_NO_THROW(req2.set_callback([&](std::exception_ptr exception_ptr) {
+            ASSERT_EQ(exception_ptr, nullptr);
+            isCalled = true;
+        }));
+        OV_ASSERT_NO_THROW(req2.start_async());
+        OV_ASSERT_NO_THROW(req2.wait());
+        ASSERT_TRUE(isCalled);
+    }
+}
+
 using OVCompileAndInferRequestTurbo = OVCompileAndInferRequest;
 
 TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {

From 458de270db8e85196b06090c6e4f994e0f53aa1d Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Mon, 27 Jan 2025 14:47:42 +0200
Subject: [PATCH 03/13] Destroy pipeline if it was created but workload type is
 not supported

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/backend/src/zero_pipeline.cpp          | 14 ++++++++------
 .../intel_npu/utils/zero/zero_wrappers.hpp     | 11 ++++++-----
 .../src/utils/src/zero/zero_wrappers.cpp       | 18 +++++++++++++-----
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index 7ce0d636d255a3..6f6c64f1b89605 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -311,14 +311,16 @@ void Pipeline::closeCommandListIndex(size_t command_list_index) {
 
 Pipeline::~Pipeline() {
     // fences shall be destroyed before the command queue is destroyed
-    if (_sync_output_with_fences) {
-        for (size_t i = 0; i < _number_of_command_lists; i++) {
-            _fences[i].reset();
+    if (_command_queue) {
+        if (_sync_output_with_fences) {
+            for (size_t i = 0; i < _number_of_command_lists; i++) {
+                _fences[i].reset();
+            }
         }
-    }
 
-    _command_queue.reset();
-    _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+        _command_queue.reset();
+        _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+    }
 }
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index a515f14c40b882..b5a7a92faf37cd 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -155,11 +155,12 @@ class CommandQueueFactory {
     void operator=(const CommandQueueFactory&) = delete;
     void operator=(CommandQueueFactory&&) = delete;
 
-    std::shared_ptr<CommandQueue>& getCommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
-                                                   const ze_command_queue_priority_t& priority,
-                                                   const std::optional<ze_command_queue_workload_type_t>& workloadType,
-                                                   const uint32_t& group_ordinal,
-                                                   bool turbo);
+    const std::shared_ptr<CommandQueue>& getCommandQueue(
+        const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+        const ze_command_queue_priority_t& priority,
+        const std::optional<ze_command_queue_workload_type_t>& workloadType,
+        const uint32_t& group_ordinal,
+        bool turbo);
 
     void freeCommandQueue(const ze_command_queue_priority_t& priority,
                           const std::optional<ze_command_queue_workload_type_t>& workloadType,
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index ac998975bcb96c..acaf9aad037233 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -204,7 +204,7 @@ Fence::~Fence() {
 }
 
 CommandQueueFactory::CommandQueueFactory() : _log("CommandQueue", Logger::global().level()) {}
-std::shared_ptr<CommandQueue>& CommandQueueFactory::getCommandQueue(
+const std::shared_ptr<CommandQueue>& CommandQueueFactory::getCommandQueue(
     const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
     const ze_command_queue_priority_t& priority,
     const std::optional<ze_command_queue_workload_type_t>& workloadType,
@@ -218,10 +218,18 @@ std::shared_ptr<CommandQueue>& CommandQueueFactory::getCommandQueue(
                                    std::make_shared<CommandQueue>(init_structs, priority, group_ordinal, turbo);
 
         if (zeroUtils::toWorkloadEnum(workloadType) != workload::NOT_SET) {
-            _log.debug("Set workload type");
-            _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                                   [zeroUtils::toWorkloadEnum(workloadType)]
-                                       ->setWorkloadType(*workloadType);
+            try {
+                _log.debug("Set workload type");
+                _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                                       [zeroUtils::toWorkloadEnum(workloadType)]
+                                           ->setWorkloadType(*workloadType);
+            } catch (const std::exception& ex) {
+                _log.debug("Destroy pipeline if workload type is not supported!");
+                _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
+                                       [zeroUtils::toWorkloadEnum(workloadType)]
+                                           .reset();
+                OPENVINO_THROW(ex.what());
+            }
         }
     }
 

From f7c0aa2c80c51e2c788350dc14b469b29ee0d277 Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Mon, 27 Jan 2025 14:48:31 +0200
Subject: [PATCH 04/13] Update tests, command queue is created and set at the
 first infer

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../internal/overload/compile_and_infer.hpp   | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
index c4388978b730e0..e3775ab13385bc 100644
--- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
@@ -100,11 +100,12 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) {
             return property == workload_type.name();
         });
 
+    OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+
+    ov::InferRequest req;
+
     if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(workloadTypeSupported);
-        ov::InferRequest req;
-        OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
-
         const auto properties = execNet.get_property(supported_properties.name()).as<std::vector<PropertyName>>();
         ASSERT_TRUE(std::any_of(properties.begin(), properties.end(), [](const PropertyName& property) {
             return property == workload_type.name();
@@ -120,8 +121,9 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) {
         OV_ASSERT_NO_THROW(req.wait());
         ASSERT_TRUE(is_called);
     } else {
+        OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         ASSERT_FALSE(workloadTypeSupported);
-        OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration),
+        OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(),
                                       ov::Exception,
                                       "WorkloadType property is not supported by the current Driver Version!");
     }
@@ -137,10 +139,11 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) {
             return property == workload_type.name();
         });
 
+    ov::InferRequest req;
+
     if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(workloadTypeSupported);
         OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
-        ov::InferRequest req;
         OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         bool is_called = false;
         OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) {
@@ -151,8 +154,10 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) {
         OV_ASSERT_NO_THROW(req.wait());
         ASSERT_TRUE(is_called);
     } else {
+        OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         ASSERT_FALSE(workloadTypeSupported);
-        OV_EXPECT_THROW_HAS_SUBSTRING(execNet.set_property(modelConfiguration),
+        OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
+        OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(),
                                       ov::Exception,
                                       "WorkloadType property is not supported by the current Driver Version!");
     }
@@ -164,9 +169,9 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
     ov::AnyMap modelConfiguration;
     modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
     OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration));
+    ov::InferRequest req;
 
     if (isCommandQueueExtSupported()) {
-        ov::InferRequest req;
         OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         bool is_called = false;
         OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) {
@@ -177,7 +182,8 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) {
         OV_ASSERT_NO_THROW(req.wait());
         ASSERT_TRUE(is_called);
     } else {
-        OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(),
+        OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
+        OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(),
                                       ov::Exception,
                                       "WorkloadType property is not supported by the current Driver Version!");
     }
@@ -258,12 +264,13 @@ TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {
             return property == intel_npu::turbo.name();
         });
 
+    ov::InferRequest req;
+
     if (isCommandQueueExtSupported()) {
         ASSERT_TRUE(isTurboSupported);
         OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
         auto turbosetting_compiled_model = execNet.get_property(intel_npu::turbo.name());
         OV_ASSERT_NO_THROW(turbosetting_compiled_model = true);
-        ov::InferRequest req;
         OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
         bool is_called = false;
         OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) {
@@ -274,17 +281,9 @@ TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {
         OV_ASSERT_NO_THROW(req.wait());
         ASSERT_TRUE(is_called);
     } else {
-        auto cr_ex = configuration.find(intel_npu::defer_weights_load.name());
-        if (cr_ex->second.as<bool>() == false) {
-            OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration),
-                                          ov::Exception,
-                                          "Turbo is not supported by the current driver");
-        } else {
-            OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
-            OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(),
-                                          ov::Exception,
-                                          "Turbo is not supported by the current driver");
-        }
+        OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration));
+        OV_ASSERT_NO_THROW(req = execNet.create_infer_request());
+        OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(), ov::Exception, "Turbo is not supported by the current driver");
     }
 }
 

From 619016b465a79fe2eee192c5f8ef24c2f706d65e Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Mon, 27 Jan 2025 15:21:42 +0200
Subject: [PATCH 05/13] Update the names of the variables, methods, classes

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/backend/include/zero_pipeline.hpp     |  3 +-
 .../src/backend/src/zero_pipeline.cpp         | 36 +++++++-------
 .../include/intel_npu/common/igraph.hpp       |  2 +-
 .../intel_npu/utils/zero/zero_wrappers.hpp    | 20 ++++----
 .../src/utils/src/zero/zero_wrappers.cpp      | 48 +++++++++----------
 5 files changed, 54 insertions(+), 55 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
index ae5783d281f3d5..85a190891a1532 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -57,7 +57,7 @@ struct Pipeline {
      */
     size_t _number_of_command_lists;
 
-    CommandQueueFactory _command_queue_factory;
+    CommandQueueManager _command_queue_manager;
     std::shared_ptr<CommandQueue> _command_queue;
     std::vector<std::unique_ptr<CommandList>> _command_lists;
     std::vector<std::unique_ptr<Fence>> _fences;
@@ -68,7 +68,6 @@ struct Pipeline {
     Logger _logger;
 
     uint32_t _group_ordinal;
-    bool _fences_are_created = false;
     std::mutex _mutex;
     bool _turbo = false;
     ze_command_queue_priority_t _ze_queue_priority;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index 6f6c64f1b89605..0c79bc089f59be 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -32,8 +32,8 @@ Pipeline::Pipeline(const Config& config,
       _id(_graph->get_unique_id()),
       _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1),
       _event_pool{
-          std::make_shared<EventPool>(init_structs->getDevice(),
-                                      init_structs->getContext(),
+          std::make_shared<EventPool>(_init_structs->getDevice(),
+                                      _init_structs->getContext(),
                                       _number_of_command_lists ? static_cast<uint32_t>(_number_of_command_lists) : 1)},
       _npu_profiling(npu_profiling),
       _logger("Pipeline", _config.get<LOG_LEVEL>()),
@@ -57,9 +57,9 @@ Pipeline::Pipeline(const Config& config,
     _logger.debug("Pipeline - emplace_back _event_pool and _command_queue");
     for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.emplace_back(
-            std::make_unique<CommandList>(init_structs,
+            std::make_unique<CommandList>(_init_structs,
                                           group_ordinal,
-                                          init_structs->getMutableCommandListVersion() ? true : false));
+                                          _init_structs->getMutableCommandListVersion() ? true : false));
         _events.emplace_back(std::make_shared<Event>(_event_pool, static_cast<uint32_t>(i)));
     }
 
@@ -158,7 +158,7 @@ void Pipeline::getCommandQueue() {
     _logger.debug("Pipeline - getCommandQueue() started");
     std::lock_guard<std::mutex> lock(_mutex);
 
-    _command_queue = _command_queue_factory.getCommandQueue(_init_structs,
+    _command_queue = _command_queue_manager.getCommandQueue(_init_structs,
                                                             _ze_queue_priority,
                                                             _graph->get_ze_workload_type(),
                                                             _group_ordinal,
@@ -170,28 +170,26 @@ void Pipeline::getCommandQueue() {
             if (_sync_output_with_fences) {
                 _logger.debug("Pipeline - getCommandQueue() - destroy old fences");
                 for (size_t i = 0; i < _number_of_command_lists; i++) {
-                    _fences[i].reset();
+                    if (_fences[i] != nullptr) {
+                        _fences[i].reset();
+                    }
                 }
             }
 
             _logger.debug("Pipeline - getCommandQueue() - free command queue");
-            _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
-
-            _fences_are_created = false;
+            _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
         }
 
         _ze_workload_type = _graph->get_ze_workload_type();
     }
 
-    if (!_fences_are_created) {
-        if (_sync_output_with_fences) {
-            _logger.debug("Pipeline - getCommandQueue() - create new fences");
-            for (size_t i = 0; i < _number_of_command_lists; i++) {
+    if (_sync_output_with_fences) {
+        _logger.debug("Pipeline - getCommandQueue() - create new fences");
+        for (size_t i = 0; i < _number_of_command_lists; i++) {
+            if (_fences[i] == nullptr) {
                 _fences[i] = std::make_unique<Fence>(*_command_queue);
             }
         }
-
-        _fences_are_created = true;
     }
 
     _logger.debug("Pipeline - getCommandQueue() completed");
@@ -310,16 +308,18 @@ void Pipeline::closeCommandListIndex(size_t command_list_index) {
 };
 
 Pipeline::~Pipeline() {
-    // fences shall be destroyed before the command queue is destroyed
     if (_command_queue) {
         if (_sync_output_with_fences) {
+            // fences shall be destroyed before the command queue is destroyed
             for (size_t i = 0; i < _number_of_command_lists; i++) {
-                _fences[i].reset();
+                if (_fences[i] != nullptr) {
+                    _fences[i].reset();
+                }
             }
         }
 
         _command_queue.reset();
-        _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+        _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
     }
 }
 
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
index 8b4124bcfc6154..efb5b6b8978cfc 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp
@@ -44,6 +44,7 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     const std::vector<ArgumentDescriptor>& get_output_descriptors() const;
 
     void set_workload_type(const ov::WorkloadType workloadType);
+    const std::optional<ze_command_queue_workload_type_t> get_ze_workload_type() const;
 
     std::mutex& get_mutex();
 
@@ -55,7 +56,6 @@ class IGraph : public std::enable_shared_from_this<IGraph> {
     uint32_t get_last_submitted_id() const;
 
     const std::optional<std::size_t> get_batch_size() const;
-    const std::optional<ze_command_queue_workload_type_t> get_ze_workload_type() const;
 
 protected:
     /**
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index b5a7a92faf37cd..327ab6ce553809 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -61,7 +61,7 @@ class CommandList {
 public:
     friend class CommandQueue;
     CommandList() = delete;
-    CommandList(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+    CommandList(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                 const uint32_t& group_ordinal,
                 bool mtci_is_supported = false);
     CommandList(const CommandList&) = delete;
@@ -85,7 +85,7 @@ class CommandList {
     }
 
 private:
-    std::shared_ptr<ZeroInitStructsHolder> _initStructs;
+    std::shared_ptr<ZeroInitStructsHolder> _init_structs;
 
     Logger _log;
 
@@ -147,23 +147,23 @@ static std::array<std::array<std::array<std::shared_ptr<CommandQueue>, workload:
                   priority::PRIORITY_COUNT>
     _gloabal_command_queues;
 
-class CommandQueueFactory {
+class CommandQueueManager {
 public:
-    CommandQueueFactory();
-    CommandQueueFactory(const CommandQueueFactory& other) = delete;
-    CommandQueueFactory(CommandQueueFactory&& other) = delete;
-    void operator=(const CommandQueueFactory&) = delete;
-    void operator=(CommandQueueFactory&&) = delete;
+    CommandQueueManager();
+    CommandQueueManager(const CommandQueueManager& other) = delete;
+    CommandQueueManager(CommandQueueManager&& other) = delete;
+    void operator=(const CommandQueueManager&) = delete;
+    void operator=(CommandQueueManager&&) = delete;
 
     const std::shared_ptr<CommandQueue>& getCommandQueue(
         const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
         const ze_command_queue_priority_t& priority,
-        const std::optional<ze_command_queue_workload_type_t>& workloadType,
+        const std::optional<ze_command_queue_workload_type_t>& workload_type,
         const uint32_t& group_ordinal,
         bool turbo);
 
     void freeCommandQueue(const ze_command_queue_priority_t& priority,
-                          const std::optional<ze_command_queue_workload_type_t>& workloadType,
+                          const std::optional<ze_command_queue_workload_type_t>& workload_type,
                           bool turbo);
 
 private:
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index acaf9aad037233..79e6d3da85d015 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -59,16 +59,16 @@ Event::~Event() {
     _handle = nullptr;
 }
 
-CommandList::CommandList(const std::shared_ptr<ZeroInitStructsHolder>& initStructs,
+CommandList::CommandList(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
                          const uint32_t& group_ordinal,
                          bool mtci_is_supported)
-    : _initStructs(initStructs),
+    : _init_structs(init_structs),
       _log("CommandList", Logger::global().level()) {
     ze_mutable_command_list_exp_desc_t mutable_desc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC, nullptr, 0};
     ze_command_list_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, &mutable_desc, group_ordinal, 0};
     THROW_ON_FAIL_FOR_LEVELZERO(
         "zeCommandListCreate",
-        zeCommandListCreate(_initStructs->getContext(), _initStructs->getDevice(), &desc, &_handle));
+        zeCommandListCreate(_init_structs->getContext(), _init_structs->getDevice(), &desc, &_handle));
 
     if (mtci_is_supported) {
         ze_mutable_command_id_exp_desc_t mutableCmdIdDesc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC,
@@ -87,14 +87,14 @@ void CommandList::appendMemoryCopy(void* dst, const void* src, const std::size_t
 }
 void CommandList::appendGraphInitialize(const ze_graph_handle_t& graph_handle) const {
     ze_result_t result =
-        _initStructs->getGraphDdiTable().pfnAppendGraphInitialize(_handle, graph_handle, nullptr, 0, nullptr);
-    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphInitialize", result, _initStructs->getGraphDdiTable());
+        _init_structs->getGraphDdiTable().pfnAppendGraphInitialize(_handle, graph_handle, nullptr, 0, nullptr);
+    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphInitialize", result, _init_structs->getGraphDdiTable());
 }
 void CommandList::appendGraphExecute(const ze_graph_handle_t& graph_handle,
                                      const ze_graph_profiling_query_handle_t& profiling_query_handle) const {
-    ze_result_t result = _initStructs->getGraphDdiTable()
+    ze_result_t result = _init_structs->getGraphDdiTable()
                              .pfnAppendGraphExecute(_handle, graph_handle, profiling_query_handle, nullptr, 0, nullptr);
-    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphExecute", result, _initStructs->getGraphDdiTable());
+    THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphExecute", result, _init_structs->getGraphDdiTable());
 }
 void CommandList::appendNpuTimestamp(uint64_t* timestamp_buff) const {
     THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWriteGlobalTimestamp",
@@ -116,9 +116,9 @@ CommandList::~CommandList() {
 }
 void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_value) const {
     ze_mutable_graph_argument_exp_desc_t desc = {
-        (ZE_MAJOR_VERSION(_initStructs->getZeDrvApiVersion()) > 1 ||
-         (ZE_MAJOR_VERSION(_initStructs->getZeDrvApiVersion()) == 1 &&
-          ZE_MINOR_VERSION(_initStructs->getZeDrvApiVersion()) >= 11))
+        (ZE_MAJOR_VERSION(_init_structs->getZeDrvApiVersion()) > 1 ||
+         (ZE_MAJOR_VERSION(_init_structs->getZeDrvApiVersion()) == 1 &&
+          ZE_MINOR_VERSION(_init_structs->getZeDrvApiVersion()) >= 11))
             ? ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC
             : static_cast<ze_structure_type_t>(ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC_DEPRECATED),
         nullptr,
@@ -203,30 +203,30 @@ Fence::~Fence() {
     _handle = nullptr;
 }
 
-CommandQueueFactory::CommandQueueFactory() : _log("CommandQueue", Logger::global().level()) {}
-const std::shared_ptr<CommandQueue>& CommandQueueFactory::getCommandQueue(
+CommandQueueManager::CommandQueueManager() : _log("CommandQueue", Logger::global().level()) {}
+const std::shared_ptr<CommandQueue>& CommandQueueManager::getCommandQueue(
     const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
     const ze_command_queue_priority_t& priority,
-    const std::optional<ze_command_queue_workload_type_t>& workloadType,
+    const std::optional<ze_command_queue_workload_type_t>& workload_type,
     const uint32_t& group_ordinal,
     bool turbo) {
     if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                               [zeroUtils::toWorkloadEnum(workloadType)] == nullptr) {
+                               [zeroUtils::toWorkloadEnum(workload_type)] == nullptr) {
         _log.debug("Create new command queue");
         _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                               [zeroUtils::toWorkloadEnum(workloadType)] =
+                               [zeroUtils::toWorkloadEnum(workload_type)] =
                                    std::make_shared<CommandQueue>(init_structs, priority, group_ordinal, turbo);
 
-        if (zeroUtils::toWorkloadEnum(workloadType) != workload::NOT_SET) {
+        if (zeroUtils::toWorkloadEnum(workload_type) != workload::NOT_SET) {
             try {
                 _log.debug("Set workload type");
                 _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                                       [zeroUtils::toWorkloadEnum(workloadType)]
-                                           ->setWorkloadType(*workloadType);
+                                       [zeroUtils::toWorkloadEnum(workload_type)]
+                                           ->setWorkloadType(*workload_type);
             } catch (const std::exception& ex) {
                 _log.debug("Destroy pipeline if workload type is not supported!");
                 _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                                       [zeroUtils::toWorkloadEnum(workloadType)]
+                                       [zeroUtils::toWorkloadEnum(workload_type)]
                                            .reset();
                 OPENVINO_THROW(ex.what());
             }
@@ -234,18 +234,18 @@ const std::shared_ptr<CommandQueue>& CommandQueueFactory::getCommandQueue(
     }
 
     return _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                                  [zeroUtils::toWorkloadEnum(workloadType)];
+                                  [zeroUtils::toWorkloadEnum(workload_type)];
 }
-void CommandQueueFactory::freeCommandQueue(const ze_command_queue_priority_t& priority,
-                                           const std::optional<ze_command_queue_workload_type_t>& workloadType,
+void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& priority,
+                                           const std::optional<ze_command_queue_workload_type_t>& workload_type,
                                            bool turbo) {
     if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                               [zeroUtils::toWorkloadEnum(workloadType)]
+                               [zeroUtils::toWorkloadEnum(workload_type)]
                                    .use_count() == 1) {
         _log.debug("Destroy command queue");
 
         _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
-                               [zeroUtils::toWorkloadEnum(workloadType)]
+                               [zeroUtils::toWorkloadEnum(workload_type)]
                                    .reset();
     }
 }

From 3cbf804bd5cc853a204fa0c51d20d486c248772f Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Tue, 28 Jan 2025 14:13:22 +0200
Subject: [PATCH 06/13] Create a static instance for CommandQueueManager class
 and lock get and free methods

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/backend/include/zero_pipeline.hpp     |  1 -
 .../src/backend/src/zero_pipeline.cpp         | 56 ++++++++++---------
 .../intel_npu/utils/zero/zero_wrappers.hpp    |  6 ++
 .../src/utils/src/zero/zero_wrappers.cpp      |  9 ++-
 4 files changed, 43 insertions(+), 29 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
index 85a190891a1532..29069f0a0cf8cc 100644
--- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
+++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -57,7 +57,6 @@ struct Pipeline {
      */
     size_t _number_of_command_lists;
 
-    CommandQueueManager _command_queue_manager;
     std::shared_ptr<CommandQueue> _command_queue;
     std::vector<std::unique_ptr<CommandList>> _command_lists;
     std::vector<std::unique_ptr<Fence>> _fences;
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index 0c79bc089f59be..7ae3db67433a0c 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -156,38 +156,40 @@ Pipeline::Pipeline(const Config& config,
 
 void Pipeline::getCommandQueue() {
     _logger.debug("Pipeline - getCommandQueue() started");
-    std::lock_guard<std::mutex> lock(_mutex);
-
-    _command_queue = _command_queue_manager.getCommandQueue(_init_structs,
-                                                            _ze_queue_priority,
-                                                            _graph->get_ze_workload_type(),
-                                                            _group_ordinal,
-                                                            _turbo);
-
-    if (_ze_workload_type != _graph->get_ze_workload_type()) {
-        if (_ze_workload_type.has_value()) {
-            // fences created for the old command queue shall be destroyed and make new ones
-            if (_sync_output_with_fences) {
-                _logger.debug("Pipeline - getCommandQueue() - destroy old fences");
-                for (size_t i = 0; i < _number_of_command_lists; i++) {
-                    if (_fences[i] != nullptr) {
-                        _fences[i].reset();
+
+    _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs,
+                                                                        _ze_queue_priority,
+                                                                        _graph->get_ze_workload_type(),
+                                                                        _group_ordinal,
+                                                                        _turbo);
+    {
+        std::lock_guard<std::mutex> lock(_mutex);
+
+        if (_ze_workload_type != _graph->get_ze_workload_type()) {
+            if (_ze_workload_type.has_value()) {
+                // fences created for the old command queue shall be destroyed and make new ones
+                if (_sync_output_with_fences) {
+                    for (size_t i = 0; i < _number_of_command_lists; i++) {
+                        if (_fences[i] != nullptr) {
+                            _logger.debug("Pipeline - getCommandQueue() - destroy old fence");
+                            _fences[i].reset();
+                        }
                     }
                 }
+
+                _logger.debug("Pipeline - getCommandQueue() - free command queue");
+                CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
             }
 
-            _logger.debug("Pipeline - getCommandQueue() - free command queue");
-            _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+            _ze_workload_type = _graph->get_ze_workload_type();
         }
 
-        _ze_workload_type = _graph->get_ze_workload_type();
-    }
-
-    if (_sync_output_with_fences) {
-        _logger.debug("Pipeline - getCommandQueue() - create new fences");
-        for (size_t i = 0; i < _number_of_command_lists; i++) {
-            if (_fences[i] == nullptr) {
-                _fences[i] = std::make_unique<Fence>(*_command_queue);
+        if (_sync_output_with_fences) {
+            for (size_t i = 0; i < _number_of_command_lists; i++) {
+                if (_fences[i] == nullptr) {
+                    _logger.debug("Pipeline - getCommandQueue() - create new fence");
+                    _fences[i] = std::make_unique<Fence>(*_command_queue);
+                }
             }
         }
     }
@@ -319,7 +321,7 @@ Pipeline::~Pipeline() {
         }
 
         _command_queue.reset();
-        _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+        CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
     }
 }
 
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index 327ab6ce553809..623bfd701546ed 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -6,6 +6,8 @@
 
 #include <ze_api.h>
 
+#include <mutex>
+
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
 #include "intel_npu/utils/zero/zero_types.hpp"
@@ -155,6 +157,8 @@ class CommandQueueManager {
     void operator=(const CommandQueueManager&) = delete;
     void operator=(CommandQueueManager&&) = delete;
 
+    static CommandQueueManager& getInstance();
+
     const std::shared_ptr<CommandQueue>& getCommandQueue(
         const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
         const ze_command_queue_priority_t& priority,
@@ -168,6 +172,8 @@ class CommandQueueManager {
 
 private:
     Logger _log;
+
+    std::mutex _mutex;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index 79e6d3da85d015..fa83e89bd95e56 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -204,12 +204,18 @@ Fence::~Fence() {
 }
 
 CommandQueueManager::CommandQueueManager() : _log("CommandQueue", Logger::global().level()) {}
+CommandQueueManager& CommandQueueManager::getInstance() {
+    static CommandQueueManager instance;
+    return instance;
+}
 const std::shared_ptr<CommandQueue>& CommandQueueManager::getCommandQueue(
     const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
     const ze_command_queue_priority_t& priority,
     const std::optional<ze_command_queue_workload_type_t>& workload_type,
     const uint32_t& group_ordinal,
     bool turbo) {
+    std::lock_guard<std::mutex> lock(_mutex);
+
     if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)] == nullptr) {
         _log.debug("Create new command queue");
@@ -239,11 +245,12 @@ const std::shared_ptr<CommandQueue>& CommandQueueManager::getCommandQueue(
 void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& priority,
                                            const std::optional<ze_command_queue_workload_type_t>& workload_type,
                                            bool turbo) {
+    std::lock_guard<std::mutex> lock(_mutex);
+
     if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)]
                                    .use_count() == 1) {
         _log.debug("Destroy command queue");
-
         _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)]
                                    .reset();

From d9002d0948a1efc7b691208b62793f408ebfe900 Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Tue, 28 Jan 2025 14:13:33 +0200
Subject: [PATCH 07/13] Add new func test

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../functional/behavior/infer_request_run.hpp | 30 +++++++++++++++++++
 .../internal/overload/compile_and_infer.hpp   | 12 ++++++++
 2 files changed, 42 insertions(+)

diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
index ab53a442c16cda..54590a7abe513f 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
@@ -170,6 +170,36 @@ TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsSyncInfers) {
     }
 }
 
+TEST_P(InferRequestRunTests, MultipleCompiledModelsTestsSyncInfers) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    // Load CNNNetwork to target plugins
+    const int no_of_iterations = 256;
+    std::array<ov::CompiledModel, no_of_iterations> compiled_models;
+
+    for (int i = 0; i < no_of_iterations; ++i) {
+        OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration));
+    }
+
+    // Create InferRequests
+    std::array<ov::InferRequest, no_of_iterations> infer_reqs;
+    std::array<std::thread, no_of_iterations> infer_reqs_threads;
+    for (int i = 0; i < no_of_iterations; ++i) {
+        OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request());
+    }
+
+    for (int i = 0; i < no_of_iterations; ++i) {
+        infer_reqs_threads[i] = std::thread([&infer_reqs, i]() -> void {
+            OV_ASSERT_NO_THROW(infer_reqs[i].infer());
+            infer_reqs[i] = {};
+        });
+    }
+
+    for (int i = 0; i < no_of_iterations; ++i) {
+        infer_reqs_threads[i].join();
+    }
+}
+
 TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsAsyncInfers) {
     // Skip test according to plugin specific disabledTestPatterns() (if any)
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
index e3775ab13385bc..877eb6628f7645 100644
--- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
@@ -250,6 +250,18 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeUpdateAfterCompilation
         OV_ASSERT_NO_THROW(req2.start_async());
         OV_ASSERT_NO_THROW(req2.wait());
         ASSERT_TRUE(isCalled);
+
+        req1 = {};
+        req2 = {};
+        req3 = {};
+
+        OV_ASSERT_NO_THROW(req1 = execNet.create_infer_request());
+        OV_ASSERT_NO_THROW(req2 = secondCompiledModel.create_infer_request());
+        OV_ASSERT_NO_THROW(req1.infer());
+        OV_ASSERT_NO_THROW(req3 = execNet.create_infer_request());
+        OV_ASSERT_NO_THROW(req2.infer());
+        OV_ASSERT_NO_THROW(req3.infer());
+        OV_ASSERT_NO_THROW(req3.infer());
     }
 }
 

From 20e261c025d2419a88e5e49386bf8ad92b0115ee Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Wed, 29 Jan 2025 10:59:25 +0200
Subject: [PATCH 08/13] Print correct error message

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/utils/src/zero/zero_wrappers.cpp      | 82 ++++++++++---------
 1 file changed, 44 insertions(+), 38 deletions(-)

diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index fa83e89bd95e56..7f3d5b46976526 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -14,8 +14,8 @@ EventPool::EventPool(ze_device_handle_t device_handle, const ze_context_handle_t
                                             nullptr,
                                             ZE_EVENT_POOL_FLAG_HOST_VISIBLE,
                                             event_count};
-    THROW_ON_FAIL_FOR_LEVELZERO("zeEventPoolCreate",
-                                zeEventPoolCreate(context, &event_pool_desc, 1, &device_handle, &_handle));
+    auto result = zeEventPoolCreate(context, &event_pool_desc, 1, &device_handle, &_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeEventPoolCreate", result);
 }
 EventPool::~EventPool() {
     auto result = zeEventPoolDestroy(_handle);
@@ -30,25 +30,28 @@ Event::Event(const std::shared_ptr<EventPool>& event_pool, uint32_t event_index)
     : _event_pool(event_pool),
       _log("Event", Logger::global().level()) {
     ze_event_desc_t event_desc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, event_index, 0, 0};
-    THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", zeEventCreate(_event_pool->handle(), &event_desc, &_handle));
+    auto result = zeEventCreate(_event_pool->handle(), &event_desc, &_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", result);
 }
 void Event::AppendSignalEvent(CommandList& command_list) const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendSignalEvent",
-                                zeCommandListAppendSignalEvent(command_list.handle(), _handle));
+    auto result = zeCommandListAppendSignalEvent(command_list.handle(), _handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendSignalEvent", result);
 }
 void Event::AppendWaitOnEvent(CommandList& command_list) {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWaitOnEvents",
-                                zeCommandListAppendWaitOnEvents(command_list.handle(), 1, &_handle));
+    auto result = zeCommandListAppendWaitOnEvents(command_list.handle(), 1, &_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWaitOnEvents", result);
 }
 void Event::AppendEventReset(CommandList& command_list) const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendEventReset",
-                                zeCommandListAppendEventReset(command_list.handle(), _handle));
+    auto result = zeCommandListAppendEventReset(command_list.handle(), _handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendEventReset", result);
 }
 void Event::hostSynchronize() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostSynchronize", zeEventHostSynchronize(_handle, UINT64_MAX));
+    auto result = zeEventHostSynchronize(_handle, UINT64_MAX);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostSynchronize", result);
 }
 void Event::reset() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostReset", zeEventHostReset(_handle));
+    auto result = zeEventHostReset(_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostReset", result);
 }
 Event::~Event() {
     auto result = zeEventDestroy(_handle);
@@ -66,24 +69,24 @@ CommandList::CommandList(const std::shared_ptr<ZeroInitStructsHolder>& init_stru
       _log("CommandList", Logger::global().level()) {
     ze_mutable_command_list_exp_desc_t mutable_desc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC, nullptr, 0};
     ze_command_list_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, &mutable_desc, group_ordinal, 0};
-    THROW_ON_FAIL_FOR_LEVELZERO(
-        "zeCommandListCreate",
-        zeCommandListCreate(_init_structs->getContext(), _init_structs->getDevice(), &desc, &_handle));
+    auto result = zeCommandListCreate(_init_structs->getContext(), _init_structs->getDevice(), &desc, &_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListCreate", result);
 
     if (mtci_is_supported) {
         ze_mutable_command_id_exp_desc_t mutableCmdIdDesc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC,
                                                              nullptr,
                                                              ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENT};
-        THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListGetNextCommandIdExp",
-                                    zeCommandListGetNextCommandIdExp(_handle, &mutableCmdIdDesc, &_command_id));
+        result = zeCommandListGetNextCommandIdExp(_handle, &mutableCmdIdDesc, &_command_id);
+        THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListGetNextCommandIdExp", result);
     }
 }
 void CommandList::reset() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListReset", zeCommandListReset(_handle));
+    auto result = zeCommandListReset(_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListReset", result);
 }
 void CommandList::appendMemoryCopy(void* dst, const void* src, const std::size_t size) const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendMemoryCopy",
-                                zeCommandListAppendMemoryCopy(_handle, dst, src, size, nullptr, 0, nullptr));
+    auto result = zeCommandListAppendMemoryCopy(_handle, dst, src, size, nullptr, 0, nullptr);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendMemoryCopy", result);
 }
 void CommandList::appendGraphInitialize(const ze_graph_handle_t& graph_handle) const {
     ze_result_t result =
@@ -97,14 +100,16 @@ void CommandList::appendGraphExecute(const ze_graph_handle_t& graph_handle,
     THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphExecute", result, _init_structs->getGraphDdiTable());
 }
 void CommandList::appendNpuTimestamp(uint64_t* timestamp_buff) const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWriteGlobalTimestamp",
-                                zeCommandListAppendWriteGlobalTimestamp(_handle, timestamp_buff, nullptr, 0, nullptr));
+    auto result = zeCommandListAppendWriteGlobalTimestamp(_handle, timestamp_buff, nullptr, 0, nullptr);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWriteGlobalTimestamp", result);
 }
 void CommandList::appendBarrier() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendBarrier", zeCommandListAppendBarrier(_handle, nullptr, 0, nullptr));
+    auto result = zeCommandListAppendBarrier(_handle, nullptr, 0, nullptr);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendBarrier", result);
 }
 void CommandList::close() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListClose", zeCommandListClose(_handle));
+    auto result = zeCommandListClose(_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListClose", result);
 }
 CommandList::~CommandList() {
     auto result = zeCommandListDestroy(_handle);
@@ -130,8 +135,8 @@ void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_v
                                                                   &desc,
                                                                   0};
 
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListUpdateMutableCommandsExp",
-                                zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t));
+    auto result = zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListUpdateMutableCommandsExp", result);
 }
 
 CommandQueue::CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
@@ -152,24 +157,22 @@ CommandQueue::CommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_st
         }
     }
 
-    THROW_ON_FAIL_FOR_LEVELZERO(
-        "zeCommandQueueCreate",
-        zeCommandQueueCreate(_init_structs->getContext(), _init_structs->getDevice(), &queue_desc, &_handle));
+    auto result = zeCommandQueueCreate(_init_structs->getContext(), _init_structs->getDevice(), &queue_desc, &_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueCreate", result);
 }
 void CommandQueue::executeCommandList(CommandList& command_list) const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists",
-                                zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, nullptr));
+    auto result = zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, nullptr);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", result);
 }
 void CommandQueue::executeCommandList(CommandList& command_list, Fence& fence) const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists",
-                                zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle()));
+    auto result = zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle());
+    THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", result);
 }
 
 void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workload_type) const {
     if (_init_structs->getCommandQueueDdiTable().version()) {
-        THROW_ON_FAIL_FOR_LEVELZERO(
-            "zeSetWorkloadType",
-            _init_structs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workload_type));
+        auto result = _init_structs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workload_type);
+        THROW_ON_FAIL_FOR_LEVELZERO("zeSetWorkloadType", result);
     } else {
         OPENVINO_THROW("The WorkloadType property is not supported by the current Driver Version!");
     }
@@ -186,13 +189,16 @@ CommandQueue::~CommandQueue() {
 
 Fence::Fence(const CommandQueue& command_queue) : _log("Fence", Logger::global().level()) {
     ze_fence_desc_t fence_desc = {ZE_STRUCTURE_TYPE_FENCE_DESC, nullptr, 0};
-    THROW_ON_FAIL_FOR_LEVELZERO("zeFenceCreate", zeFenceCreate(command_queue.handle(), &fence_desc, &_handle));
+    auto result = zeFenceCreate(command_queue.handle(), &fence_desc, &_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeFenceCreate", result);
 }
 void Fence::reset() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeFenceReset", zeFenceReset(_handle));
+    auto result = zeFenceReset(_handle);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeFenceReset", result);
 }
 void Fence::hostSynchronize() const {
-    THROW_ON_FAIL_FOR_LEVELZERO("zeFenceHostSynchronize", zeFenceHostSynchronize(_handle, UINT64_MAX));
+    auto result = zeFenceHostSynchronize(_handle, UINT64_MAX);
+    THROW_ON_FAIL_FOR_LEVELZERO("zeFenceHostSynchronize", result);
 }
 Fence::~Fence() {
     auto result = zeFenceDestroy(_handle);

From f20f391ac37b9ddcb7a18c486f0afc736d42db80 Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Wed, 29 Jan 2025 14:16:40 +0200
Subject: [PATCH 09/13] Run test only on newer drivers

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../functional/behavior/infer_request_run.cpp |  6 ++
 .../functional/behavior/infer_request_run.hpp | 62 ++++++++++---------
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
index f30fa2bb1416a3..c369b4b6eafa23 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp
@@ -37,6 +37,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
                                             ::testing::ValuesIn(configsInferRequestRunTests)),
                          InferRequestRunTests::getTestCaseName);
 
+INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest,
+                         InferRunTestsOnNewerDrivers,
+                         ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configsInferRequestRunTests)),
+                         InferRequestRunTests::getTestCaseName);
+
 const std::vector<ov::AnyMap> batchingConfigs = {
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::PLUGIN)},
     {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::COMPILER)},
diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
index 54590a7abe513f..5bf1c6522bb32e 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
@@ -170,36 +170,6 @@ TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsSyncInfers) {
     }
 }
 
-TEST_P(InferRequestRunTests, MultipleCompiledModelsTestsSyncInfers) {
-    // Skip test according to plugin specific disabledTestPatterns() (if any)
-    SKIP_IF_CURRENT_TEST_IS_DISABLED()
-    // Load CNNNetwork to target plugins
-    const int no_of_iterations = 256;
-    std::array<ov::CompiledModel, no_of_iterations> compiled_models;
-
-    for (int i = 0; i < no_of_iterations; ++i) {
-        OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration));
-    }
-
-    // Create InferRequests
-    std::array<ov::InferRequest, no_of_iterations> infer_reqs;
-    std::array<std::thread, no_of_iterations> infer_reqs_threads;
-    for (int i = 0; i < no_of_iterations; ++i) {
-        OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request());
-    }
-
-    for (int i = 0; i < no_of_iterations; ++i) {
-        infer_reqs_threads[i] = std::thread([&infer_reqs, i]() -> void {
-            OV_ASSERT_NO_THROW(infer_reqs[i].infer());
-            infer_reqs[i] = {};
-        });
-    }
-
-    for (int i = 0; i < no_of_iterations; ++i) {
-        infer_reqs_threads[i].join();
-    }
-}
-
 TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsAsyncInfers) {
     // Skip test according to plugin specific disabledTestPatterns() (if any)
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
@@ -1091,6 +1061,38 @@ TEST_P(SetShapeInferRunTests, checkResultsAfterStateTensorsReallocation) {
     }
 }
 
+using InferRunTestsOnNewerDrivers = InferRequestRunTests;
+
+TEST_P(InferRunTestsOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+    // Load CNNNetwork to target plugins
+    const int no_of_iterations = 256;
+    std::array<ov::CompiledModel, no_of_iterations> compiled_models;
+
+    for (int i = 0; i < no_of_iterations; ++i) {
+        OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration));
+    }
+
+    // Create InferRequests
+    std::array<ov::InferRequest, no_of_iterations> infer_reqs;
+    std::array<std::thread, no_of_iterations> infer_reqs_threads;
+    for (int i = 0; i < no_of_iterations; ++i) {
+        OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request());
+    }
+
+    for (int i = 0; i < no_of_iterations; ++i) {
+        infer_reqs_threads[i] = std::thread([&infer_reqs, i]() -> void {
+            OV_ASSERT_NO_THROW(infer_reqs[i].infer());
+            infer_reqs[i] = {};
+        });
+    }
+
+    for (int i = 0; i < no_of_iterations; ++i) {
+        infer_reqs_threads[i].join();
+    }
+}
+
 }  // namespace behavior
 }  // namespace test
 }  // namespace ov

From 4ad00282e4dce7c240294b0c81fb44764ff3aecd Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Wed, 29 Jan 2025 16:00:00 +0200
Subject: [PATCH 10/13] Create event pool and events only if they are used

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/backend/src/zero_pipeline.cpp         | 27 +++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index 7ae3db67433a0c..c6286709704947 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -31,10 +31,6 @@ Pipeline::Pipeline(const Config& config,
       _config(config),
       _id(_graph->get_unique_id()),
       _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1),
-      _event_pool{
-          std::make_shared<EventPool>(_init_structs->getDevice(),
-                                      _init_structs->getContext(),
-                                      _number_of_command_lists ? static_cast<uint32_t>(_number_of_command_lists) : 1)},
       _npu_profiling(npu_profiling),
       _logger("Pipeline", _config.get<LOG_LEVEL>()),
       _group_ordinal(group_ordinal) {
@@ -51,16 +47,31 @@ Pipeline::Pipeline(const Config& config,
 
     _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get<MODEL_PRIORITY>());
 
+    OPENVINO_ASSERT(_sync_output_with_fences || !_config.get<RUN_INFERENCES_SEQUENTIALLY>(),
+                    "In-order execution doesn't work in case synchronization of the inferences is done using events");
+
+    if (!_sync_output_with_fences || _config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
+        _event_pool =
+            std::make_shared<EventPool>(_init_structs->getDevice(),
+                                        _init_structs->getContext(),
+                                        _number_of_command_lists ? static_cast<uint32_t>(_number_of_command_lists) : 1);
+
+        _events.reserve(_number_of_command_lists);
+        for (size_t i = 0; i < _number_of_command_lists; i++) {
+            _events.emplace_back(std::make_shared<Event>(_event_pool, static_cast<uint32_t>(i)));
+        }
+    }
+
     _command_lists.reserve(_number_of_command_lists);
-    _events.reserve(_number_of_command_lists);
-    _fences.resize(_number_of_command_lists);
-    _logger.debug("Pipeline - emplace_back _event_pool and _command_queue");
     for (size_t i = 0; i < _number_of_command_lists; i++) {
         _command_lists.emplace_back(
             std::make_unique<CommandList>(_init_structs,
                                           group_ordinal,
                                           _init_structs->getMutableCommandListVersion() ? true : false));
-        _events.emplace_back(std::make_shared<Event>(_event_pool, static_cast<uint32_t>(i)));
+    }
+
+    if (_sync_output_with_fences) {
+        _fences.resize(_number_of_command_lists);
     }
 
     for (size_t i = 0; i < _number_of_command_lists; i++) {

From 135da9aa66e83cf75216206db5866c3f86d73cba Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Wed, 29 Jan 2025 17:27:35 +0200
Subject: [PATCH 11/13] Add new test case for changing priority, turbo and
 workload type

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../functional/behavior/infer_request_run.hpp |  2 -
 .../ov_infer_request/compile_and_infer.cpp    |  7 ++
 .../internal/overload/compile_and_infer.hpp   | 82 +++++++++++++++++++
 3 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
index 5bf1c6522bb32e..2889bf04f1dc2f 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp
@@ -1066,7 +1066,6 @@ using InferRunTestsOnNewerDrivers = InferRequestRunTests;
 TEST_P(InferRunTestsOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) {
     // Skip test according to plugin specific disabledTestPatterns() (if any)
     SKIP_IF_CURRENT_TEST_IS_DISABLED()
-    // Load CNNNetwork to target plugins
     const int no_of_iterations = 256;
     std::array<ov::CompiledModel, no_of_iterations> compiled_models;
 
@@ -1074,7 +1073,6 @@ TEST_P(InferRunTestsOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) {
         OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration));
     }
 
-    // Create InferRequests
     std::array<ov::InferRequest, no_of_iterations> infer_reqs;
     std::array<std::thread, no_of_iterations> infer_reqs_threads;
     for (int i = 0; i < no_of_iterations; ++i) {
diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
index 5a77908adabd0c..b0318d9b8f25f7 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp
@@ -31,4 +31,11 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
                                                 {ov::intel_npu::defer_weights_load(false)}})),
                          ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequestTurbo>);
 
+INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests,
+                         OVCompileAndInferRequesOnNewerDrivers,
+                         ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)),
+                                            ::testing::Values(ov::test::utils::DEVICE_NPU),
+                                            ::testing::ValuesIn(configs)),
+                         ov::test::utils::appendPlatformTypeTestName<OVCompileAndInferRequest>);
+
 }  // namespace
diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
index 877eb6628f7645..aa555cdf97dd55 100644
--- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp
@@ -5,8 +5,11 @@
 #include <gtest/gtest.h>
 
 #include <algorithm>
+#include <array>
 #include <common_test_utils/test_assertions.hpp>
+#include <exception>
 #include <sstream>
+#include <thread>
 
 #include "base/ov_behavior_test_utils.hpp"
 #include "intel_npu/config/common.hpp"
@@ -299,6 +302,85 @@ TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) {
     }
 }
 
+using OVCompileAndInferRequesOnNewerDrivers = OVCompileAndInferRequest;
+
+TEST_P(OVCompileAndInferRequesOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) {
+    // Skip test according to plugin specific disabledTestPatterns() (if any)
+    SKIP_IF_CURRENT_TEST_IS_DISABLED()
+
+    auto supportedProperties = core->get_property("NPU", supported_properties.name()).as<std::vector<PropertyName>>();
+    bool isTurboSupported =
+        std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) {
+            return property == intel_npu::turbo.name();
+        });
+
+    if (isCommandQueueExtSupported()) {
+        ASSERT_TRUE(isTurboSupported);
+
+        const int no_of_iterations = 256;
+        std::array<ov::CompiledModel, no_of_iterations> compiled_models;
+
+        for (int i = 0; i < no_of_iterations; ++i) {
+            if (i % 4) {
+                configuration[intel_npu::turbo.name()] = false;
+            } else {
+                configuration[intel_npu::turbo.name()] = true;
+            }
+
+            if (i % 5 == 1) {
+                configuration[workload_type.name()] = WorkloadType::DEFAULT;
+            } else if (i % 5 == 2) {
+                configuration[workload_type.name()] = WorkloadType::EFFICIENT;
+            }
+
+            if (i % 3 == 0) {
+                configuration[ov::hint::model_priority.name()] = ov::hint::Priority::LOW;
+            } else if (i % 3 == 1) {
+                configuration[ov::hint::model_priority.name()] = ov::hint::Priority::MEDIUM;
+            } else if (i % 3 == 2) {
+                configuration[ov::hint::model_priority.name()] = ov::hint::Priority::HIGH;
+            }
+
+            OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(function, target_device, configuration));
+        }
+
+        std::array<ov::InferRequest, no_of_iterations> infer_reqs;
+        std::array<std::thread, no_of_iterations> infer_reqs_threads;
+        for (int i = 0; i < no_of_iterations; ++i) {
+            OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request());
+        }
+
+        for (int i = 0; i < no_of_iterations; ++i) {
+            infer_reqs_threads[i] = std::thread([&compiled_models, &infer_reqs, i]() -> void {
+                OV_ASSERT_NO_THROW(infer_reqs[i].infer());
+
+                ov::AnyMap modelConfiguration;
+                if (i % 5 == 0) {
+                    modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
+                    OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration));
+                } else if (i % 5 == 1) {
+                    modelConfiguration[workload_type.name()] = WorkloadType::EFFICIENT;
+                    OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration));
+                } else if (i % 5 == 2) {
+                    modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT;
+                    OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration));
+                } else if (i % 5 == 3) {
+                    modelConfiguration[workload_type.name()] = WorkloadType::EFFICIENT;
+                    OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration));
+                }
+
+                OV_ASSERT_NO_THROW(infer_reqs[i].infer());
+
+                infer_reqs[i] = {};
+            });
+        }
+
+        for (int i = 0; i < no_of_iterations; ++i) {
+            infer_reqs_threads[i].join();
+        }
+    }
+}
+
 }  // namespace behavior
 }  // namespace test
 }  // namespace ov

From ad75cf1a839c137f65da8fc49b1096e6f9bb97ee Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Wed, 29 Jan 2025 18:28:44 +0200
Subject: [PATCH 12/13] Destroy pipeline even when use count is 0

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../include/intel_npu/utils/zero/zero_wrappers.hpp    | 11 +++++------
 .../intel_npu/src/utils/src/zero/zero_wrappers.cpp    |  4 ++--
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index 623bfd701546ed..1f5ad106b53530 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -159,12 +159,11 @@ class CommandQueueManager {
 
     static CommandQueueManager& getInstance();
 
-    const std::shared_ptr<CommandQueue>& getCommandQueue(
-        const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
-        const ze_command_queue_priority_t& priority,
-        const std::optional<ze_command_queue_workload_type_t>& workload_type,
-        const uint32_t& group_ordinal,
-        bool turbo);
+    std::shared_ptr<CommandQueue> getCommandQueue(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
+                                                  const ze_command_queue_priority_t& priority,
+                                                  const std::optional<ze_command_queue_workload_type_t>& workload_type,
+                                                  const uint32_t& group_ordinal,
+                                                  bool turbo);
 
     void freeCommandQueue(const ze_command_queue_priority_t& priority,
                           const std::optional<ze_command_queue_workload_type_t>& workload_type,
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index 7f3d5b46976526..522144ff0d97ea 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -214,7 +214,7 @@ CommandQueueManager& CommandQueueManager::getInstance() {
     static CommandQueueManager instance;
     return instance;
 }
-const std::shared_ptr<CommandQueue>& CommandQueueManager::getCommandQueue(
+std::shared_ptr<CommandQueue> CommandQueueManager::getCommandQueue(
     const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
     const ze_command_queue_priority_t& priority,
     const std::optional<ze_command_queue_workload_type_t>& workload_type,
@@ -255,7 +255,7 @@ void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& pr
 
     if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)]
-                                   .use_count() == 1) {
+                                   .use_count() <= 1) {
         _log.debug("Destroy command queue");
         _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)]

From 9c9cc88e21646e919b3d11fe2d6b946649100ad4 Mon Sep 17 00:00:00 2001
From: Bogdan Pereanu <bogdan.pereanu@intel.com>
Date: Thu, 30 Jan 2025 07:57:06 +0200
Subject: [PATCH 13/13] Make sure that the pipeline is still alive when fences
 are destroyed

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
---
 .../src/backend/src/zero_pipeline.cpp         | 76 +++++++++++--------
 .../intel_npu/utils/zero/zero_wrappers.hpp    |  8 +-
 .../src/utils/src/zero/zero_wrappers.cpp      |  4 +-
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
index c6286709704947..3cf9b205df2abd 100644
--- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
+++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -41,12 +41,6 @@ Pipeline::Pipeline(const Config& config,
         profiling_query.create(profiling_pool._handle);
     }
 
-    if (_config.has<TURBO>()) {
-        _turbo = _config.get<TURBO>();
-    }
-
-    _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get<MODEL_PRIORITY>());
-
     OPENVINO_ASSERT(_sync_output_with_fences || !_config.get<RUN_INFERENCES_SEQUENTIALLY>(),
                     "In-order execution doesn't work in case synchronization of the inferences is done using events");
 
@@ -70,8 +64,29 @@ Pipeline::Pipeline(const Config& config,
                                           _init_structs->getMutableCommandListVersion() ? true : false));
     }
 
+    _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get<MODEL_PRIORITY>());
+
+    if (_config.has<TURBO>()) {
+        _turbo = _config.get<TURBO>();
+    }
+
+    if (config.has<WORKLOAD_TYPE>()) {
+        _ze_workload_type = zeroUtils::toZeQueueWorkloadType(config.get<WORKLOAD_TYPE>());
+    }
+
+    _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs,
+                                                                        _ze_queue_priority,
+                                                                        _graph->get_ze_workload_type(),
+                                                                        _group_ordinal,
+                                                                        _turbo);
+
     if (_sync_output_with_fences) {
         _fences.resize(_number_of_command_lists);
+
+        for (size_t i = 0; i < _number_of_command_lists; i++) {
+            _logger.debug("Pipeline - getCommandQueue() - create new fence");
+            _fences[i] = std::make_unique<Fence>(*_command_queue);
+        }
     }
 
     for (size_t i = 0; i < _number_of_command_lists; i++) {
@@ -168,41 +183,36 @@ Pipeline::Pipeline(const Config& config,
 void Pipeline::getCommandQueue() {
     _logger.debug("Pipeline - getCommandQueue() started");
 
-    _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs,
-                                                                        _ze_queue_priority,
-                                                                        _graph->get_ze_workload_type(),
-                                                                        _group_ordinal,
-                                                                        _turbo);
-    {
-        std::lock_guard<std::mutex> lock(_mutex);
-
-        if (_ze_workload_type != _graph->get_ze_workload_type()) {
-            if (_ze_workload_type.has_value()) {
-                // fences created for the old command queue shall be destroyed and make new ones
-                if (_sync_output_with_fences) {
-                    for (size_t i = 0; i < _number_of_command_lists; i++) {
-                        if (_fences[i] != nullptr) {
-                            _logger.debug("Pipeline - getCommandQueue() - destroy old fence");
-                            _fences[i].reset();
-                        }
-                    }
-                }
+    std::lock_guard<std::mutex> lock(_mutex);
 
-                _logger.debug("Pipeline - getCommandQueue() - free command queue");
-                CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+    if (_ze_workload_type != _graph->get_ze_workload_type()) {
+        // fences created for the old command queue shall be destroyed and make new ones
+        if (_sync_output_with_fences) {
+            for (size_t i = 0; i < _number_of_command_lists; i++) {
+                if (_fences[i] != nullptr) {
+                    _logger.debug("Pipeline - getCommandQueue() - destroy old fence");
+                    _fences[i].reset();
+                }
             }
-
-            _ze_workload_type = _graph->get_ze_workload_type();
         }
 
+        _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs,
+                                                                            _ze_queue_priority,
+                                                                            _graph->get_ze_workload_type(),
+                                                                            _group_ordinal,
+                                                                            _turbo);
+
         if (_sync_output_with_fences) {
             for (size_t i = 0; i < _number_of_command_lists; i++) {
-                if (_fences[i] == nullptr) {
-                    _logger.debug("Pipeline - getCommandQueue() - create new fence");
-                    _fences[i] = std::make_unique<Fence>(*_command_queue);
-                }
+                _logger.debug("Pipeline - getCommandQueue() - create new fence");
+                _fences[i] = std::make_unique<Fence>(*_command_queue);
             }
         }
+
+        _logger.debug("Pipeline - getCommandQueue() - free previous command queue");
+        CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo);
+
+        _ze_workload_type = _graph->get_ze_workload_type();
     }
 
     _logger.debug("Pipeline - getCommandQueue() completed");
diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
index 1f5ad106b53530..d85725c530fb14 100644
--- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
+++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp
@@ -145,10 +145,6 @@ class CommandQueue {
     ze_command_queue_handle_t _handle = nullptr;
 };
 
-static std::array<std::array<std::array<std::shared_ptr<CommandQueue>, workload::WORKLOAD_COUNT>, turbo::TURBO_COUNT>,
-                  priority::PRIORITY_COUNT>
-    _gloabal_command_queues;
-
 class CommandQueueManager {
 public:
     CommandQueueManager();
@@ -173,6 +169,10 @@ class CommandQueueManager {
     Logger _log;
 
     std::mutex _mutex;
+
+    std::array<std::array<std::array<std::shared_ptr<CommandQueue>, workload::WORKLOAD_COUNT>, turbo::TURBO_COUNT>,
+               priority::PRIORITY_COUNT>
+        _gloabal_command_queues;
 };
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
index 522144ff0d97ea..a6d7fe812c4169 100644
--- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
+++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp
@@ -236,7 +236,7 @@ std::shared_ptr<CommandQueue> CommandQueueManager::getCommandQueue(
                                        [zeroUtils::toWorkloadEnum(workload_type)]
                                            ->setWorkloadType(*workload_type);
             } catch (const std::exception& ex) {
-                _log.debug("Destroy pipeline if workload type is not supported!");
+                _log.error("Destroy pipeline if workload type is not supported!");
                 _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                        [zeroUtils::toWorkloadEnum(workload_type)]
                                            .reset();
@@ -255,7 +255,7 @@ void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& pr
 
     if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)]
-                                   .use_count() <= 1) {
+                                   .use_count() == 1) {
         _log.debug("Destroy command queue");
         _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)]
                                [zeroUtils::toWorkloadEnum(workload_type)]