From 6b718077ddba0a61b3ec7084e64e1c186fcbd4bd Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Fri, 24 Jan 2025 14:05:56 +0200 Subject: [PATCH 01/13] Use global command queues Signed-off-by: Bogdan Pereanu --- .../src/backend/include/zero_pipeline.hpp | 15 +++- .../src/backend/src/zero_pipeline.cpp | 88 ++++++++++++++++--- .../include/intel_npu/common/igraph.hpp | 7 +- .../intel_npu/src/common/src/igraph.cpp | 34 +++---- .../include/ze_graph_ext_wrappers.hpp | 4 +- .../src/compiler_adapter/src/driver_graph.cpp | 17 +--- .../src/compiler_adapter/src/plugin_graph.cpp | 17 +--- .../src/ze_graph_ext_wrappers.cpp | 9 +- .../intel_npu/utils/zero/zero_utils.hpp | 72 +++++++++++++++ .../intel_npu/utils/zero/zero_wrappers.hpp | 32 ++++++- .../src/utils/src/zero/zero_wrappers.cpp | 66 ++++++++++++-- 11 files changed, 271 insertions(+), 90 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index 6baabc55b435ce..ae5783d281f3d5 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -27,7 +27,7 @@ struct Pipeline { Pipeline(const Pipeline&) = delete; Pipeline& operator=(const Pipeline&) = delete; - virtual ~Pipeline() = default; + ~Pipeline(); void push(); void pull(); @@ -40,6 +40,9 @@ struct Pipeline { void closeCommandListIndex(size_t command_list_index); protected: + void getCommandQueue(); + + std::shared_ptr _init_structs; std::shared_ptr _graph; const Config _config; const uint32_t _id; @@ -54,14 +57,22 @@ struct Pipeline { */ size_t _number_of_command_lists; + CommandQueueFactory _command_queue_factory; std::shared_ptr _command_queue; std::vector> _command_lists; std::vector> _fences; std::shared_ptr _event_pool; std::vector> _events; - bool sync_output_with_fences_ = true; + bool _sync_output_with_fences = true; std::shared_ptr _npu_profiling; Logger _logger; + + uint32_t _group_ordinal; + bool _fences_are_created = false; + std::mutex _mutex; + bool _turbo = false; + ze_command_queue_priority_t _ze_queue_priority; + std::optional _ze_workload_type = std::nullopt; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 9f55897193aeeb..7ce0d636d255a3 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -26,7 +26,8 @@ Pipeline::Pipeline(const Config& config, const std::vector>>& input_tensors, const std::vector>& output_tensors, uint32_t group_ordinal) - : _graph(graph), + : _init_structs(init_structs), + _graph(graph), _config(config), _id(_graph->get_unique_id()), _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1), @@ -35,7 +36,8 @@ Pipeline::Pipeline(const Config& config, init_structs->getContext(), _number_of_command_lists ? static_cast(_number_of_command_lists) : 1)}, _npu_profiling(npu_profiling), - _logger("Pipeline", _config.get()) { + _logger("Pipeline", _config.get()), + _group_ordinal(group_ordinal) { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline"); _logger.debug("Pipeline - initialize started"); @@ -43,9 +45,15 @@ Pipeline::Pipeline(const Config& config, profiling_query.create(profiling_pool._handle); } + if (_config.has()) { + _turbo = _config.get(); + } + + _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get()); + _command_lists.reserve(_number_of_command_lists); _events.reserve(_number_of_command_lists); - _fences.reserve(_number_of_command_lists); + _fences.resize(_number_of_command_lists); _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < _number_of_command_lists; i++) { _command_lists.emplace_back( @@ -53,7 +61,6 @@ Pipeline::Pipeline(const Config& config, group_ordinal, init_structs->getMutableCommandListVersion() ? true : false)); _events.emplace_back(std::make_shared(_event_pool, static_cast(i))); - _fences.emplace_back(std::make_unique(*_graph->get_command_queue())); } for (size_t i = 0; i < _number_of_command_lists; i++) { @@ -138,7 +145,7 @@ Pipeline::Pipeline(const Config& config, } // appendBarrier used in L0 as well - if (!sync_output_with_fences_) { + if (!_sync_output_with_fences) { _command_lists.at(i)->appendBarrier(); _events.at(i)->AppendSignalEvent(*_command_lists.at(i)); } @@ -147,9 +154,54 @@ Pipeline::Pipeline(const Config& config, _logger.debug("Pipeline - initialize completed"); } +void Pipeline::getCommandQueue() { + _logger.debug("Pipeline - getCommandQueue() started"); + std::lock_guard lock(_mutex); + + _command_queue = _command_queue_factory.getCommandQueue(_init_structs, + _ze_queue_priority, + _graph->get_ze_workload_type(), + _group_ordinal, + _turbo); + + if (_ze_workload_type != _graph->get_ze_workload_type()) { + if (_ze_workload_type.has_value()) { + // fences created for the old command queue shall be destroyed and make new ones + if (_sync_output_with_fences) { + _logger.debug("Pipeline - getCommandQueue() - destroy old fences"); + for (size_t i = 0; i < _number_of_command_lists; i++) { + _fences[i].reset(); + } + } + + _logger.debug("Pipeline - getCommandQueue() - free command queue"); + _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + + _fences_are_created = false; + } + + _ze_workload_type = _graph->get_ze_workload_type(); + } + + if (!_fences_are_created) { + if (_sync_output_with_fences) { + _logger.debug("Pipeline - getCommandQueue() - create new fences"); + for (size_t i = 0; i < _number_of_command_lists; i++) { + _fences[i] = std::make_unique(*_command_queue); + } + } + + _fences_are_created = true; + } + + _logger.debug("Pipeline - getCommandQueue() completed"); +} + void Pipeline::push() { _logger.debug("Pipeline - push() started"); + getCommandQueue(); + if (_config.get()) { if (_id) { auto previousIndex = _graph->get_last_submitted_id(); @@ -164,10 +216,10 @@ void Pipeline::push() { for (size_t i = 0; i < _command_lists.size(); ++i) { OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push"); - if (sync_output_with_fences_) { - _graph->get_command_queue()->executeCommandList(*_command_lists.at(i), *_fences.at(i)); + if (_sync_output_with_fences) { + _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i)); } else { - _graph->get_command_queue()->executeCommandList(*_command_lists.at(i)); + _command_queue->executeCommandList(*_command_lists.at(i)); } } @@ -179,7 +231,7 @@ void Pipeline::pull() { OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull"); for (size_t i = 0; i < _command_lists.size(); ++i) { - if (sync_output_with_fences_) { + if (_sync_output_with_fences) { _fences.at(i)->hostSynchronize(); } else { _events.at(i)->hostSynchronize(); @@ -194,17 +246,17 @@ void Pipeline::pull() { }; void Pipeline::reset() const { - _logger.debug("Pipeline - rest() started"); + _logger.debug("Pipeline - reset() started"); for (size_t i = 0; i < _command_lists.size(); ++i) { - if (sync_output_with_fences_) { + if (_sync_output_with_fences) { _fences.at(i)->reset(); } else { _events.at(i)->reset(); } } - _logger.debug("Pipeline - rest() completed"); + _logger.debug("Pipeline - reset() completed"); }; void Pipeline::updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size) { @@ -257,4 +309,16 @@ void Pipeline::closeCommandListIndex(size_t command_list_index) { _command_lists.at(command_list_index)->close(); }; +Pipeline::~Pipeline() { + // fences shall be destroyed before the command queue is destroyed + if (_sync_output_with_fences) { + for (size_t i = 0; i < _number_of_command_lists; i++) { + _fences[i].reset(); + } + } + + _command_queue.reset(); + _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index ec4d7091ac6345..8b4124bcfc6154 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -42,9 +42,8 @@ class IGraph : public std::enable_shared_from_this { const std::vector& get_input_descriptors() const; const std::vector& get_output_descriptors() const; - const std::shared_ptr& get_command_queue() const; - void set_workload_type(const ov::WorkloadType workloadType) const; + void set_workload_type(const ov::WorkloadType workloadType); std::mutex& get_mutex(); @@ -56,6 +55,7 @@ class IGraph : public std::enable_shared_from_this { uint32_t get_last_submitted_id() const; const std::optional get_batch_size() const; + const std::optional get_ze_workload_type() const; protected: /** @@ -83,7 +83,6 @@ class IGraph : public std::enable_shared_from_this { std::vector _input_descriptors; std::vector _output_descriptors; - std::shared_ptr _command_queue; std::vector> _last_submitted_event; // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the @@ -101,6 +100,8 @@ class IGraph : public std::enable_shared_from_this { */ std::optional _batch_size = std::nullopt; + std::optional _ze_workload_type = std::nullopt; + Logger _logger; }; diff --git a/src/plugins/intel_npu/src/common/src/igraph.cpp b/src/plugins/intel_npu/src/common/src/igraph.cpp index f641813e44c0e7..ce54b53ea20432 100644 --- a/src/plugins/intel_npu/src/common/src/igraph.cpp +++ b/src/plugins/intel_npu/src/common/src/igraph.cpp @@ -21,7 +21,11 @@ IGraph::IGraph(ze_graph_handle_t handle, : _handle(handle), _metadata(std::move(metadata)), _blobPtr(std::move(blobPtr)), - _logger("IGraph", config.get()) {} + _logger("IGraph", config.get()) { + if (config.has()) { + set_workload_type(config.get()); + } +} const NetworkMetadata& IGraph::get_metadata() const { return _metadata; @@ -43,28 +47,8 @@ const std::vector& IGraph::get_output_descriptors() const { return _output_descriptors; } -const std::shared_ptr& IGraph::get_command_queue() const { - return _command_queue; -} - -void IGraph::set_workload_type(const ov::WorkloadType workloadType) const { - if (_command_queue == nullptr) { - return; - } - - ze_command_queue_workload_type_t zeWorkloadType; - switch (workloadType) { - case ov::WorkloadType::DEFAULT: - zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT; - break; - case ov::WorkloadType::EFFICIENT: - zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND; - break; - default: - OPENVINO_THROW("Unknown value for WorkloadType!"); - } - - _command_queue->setWorkloadType(zeWorkloadType); +void IGraph::set_workload_type(const ov::WorkloadType workloadType) { + _ze_workload_type = zeroUtils::toZeQueueWorkloadType(workloadType); } std::mutex& IGraph::get_mutex() { @@ -153,4 +137,8 @@ const std::optional IGraph::get_batch_size() const { return _batch_size; } +const std::optional IGraph::get_ze_workload_type() const { + return _ze_workload_type; +} + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index df538521d856f1..a9290d296822c7 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -48,7 +48,7 @@ class ZeGraphExtWrappers { void setGraphArgumentValue(ze_graph_handle_t graphHandle, uint32_t argi_, const void* argv) const; - void initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const; + void initializeGraph(ze_graph_handle_t graphHandle) const; private: std::unordered_set getQueryResultFromSupportedLayers( @@ -60,7 +60,7 @@ class ZeGraphExtWrappers { std::vector& inputs, std::vector& outputs) const; - void initialize_graph_through_command_list(ze_graph_handle_t graphHandle, const Config& config) const; + void initialize_graph_through_command_list(ze_graph_handle_t graphHandle) const; std::shared_ptr _zeroInitStruct; uint32_t _graphExtVersion; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 48ae84a6c841ea..97f77ca644dc08 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -103,23 +103,8 @@ void DriverGraph::initialize(const Config& config) { deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", zeDeviceGetProperties(_zeroInitStruct->getDevice(), &deviceProperties)); - auto groupOrdinal = zeroUtils::findGroupOrdinal(_zeroInitStruct->getDevice(), deviceProperties); - bool turbo = false; - if (config.has()) { - turbo = config.get(); - } - - _command_queue = std::make_shared(_zeroInitStruct, - zeroUtils::toZeQueuePriority(config.get()), - groupOrdinal, - turbo); - - if (config.has()) { - set_workload_type(config.get()); - } - - _zeGraphExt->initializeGraph(_handle, config); + _zeGraphExt->initializeGraph(_handle); _logger.debug("Graph initialize finish"); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index 726a1196b7c88b..3c491fd81fb1c8 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -103,23 +103,8 @@ void PluginGraph::initialize(const Config& config) { deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", zeDeviceGetProperties(_zeroInitStruct->getDevice(), &deviceProperties)); - auto groupOrdinal = zeroUtils::findGroupOrdinal(_zeroInitStruct->getDevice(), deviceProperties); - bool turbo = false; - if (config.has()) { - turbo = config.get(); - } - - _command_queue = std::make_shared(_zeroInitStruct, - zeroUtils::toZeQueuePriority(config.get()), - groupOrdinal, - turbo); - - if (config.has()) { - set_workload_type(config.get()); - } - - _zeGraphExt->initializeGraph(_handle, config); + _zeGraphExt->initializeGraph(_handle); if (config.get() != ov::intel_npu::BatchMode::COMPILER) { _batch_size = get_batch_size(_metadata); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index d5e793d4fff9fe..0a13cc075be601 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -160,10 +160,10 @@ void ZeGraphExtWrappers::setGraphArgumentValue(ze_graph_handle_t graphHandle, ui THROW_ON_FAIL_FOR_LEVELZERO_EXT("zeGraphSetArgumentValue", result, _zeroInitStruct->getGraphDdiTable()); } -void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle, const Config& config) const { +void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle) const { if (_zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8) { _logger.debug("Use initialize_graph_through_command_list for ext version smaller than 1.8"); - initialize_graph_through_command_list(graphHandle, config); + initialize_graph_through_command_list(graphHandle); } else { _logger.debug("Initialize graph based on graph properties for ext version larger than 1.8"); ze_graph_properties_2_t properties = {}; @@ -177,13 +177,12 @@ void ZeGraphExtWrappers::initializeGraph(ze_graph_handle_t graphHandle, const Co } if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) { - initialize_graph_through_command_list(graphHandle, config); + initialize_graph_through_command_list(graphHandle); } } } -void ZeGraphExtWrappers::initialize_graph_through_command_list(ze_graph_handle_t graphHandle, - const Config& config) const { +void ZeGraphExtWrappers::initialize_graph_through_command_list(ze_graph_handle_t graphHandle) const { ze_device_properties_t deviceProperties = {}; deviceProperties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp index e68eb0200a09ce..8af0dcd2e1d9a3 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp @@ -8,6 +8,8 @@ #include #include +#include + #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_api.hpp" #include "intel_npu/utils/zero/zero_result.hpp" @@ -15,6 +17,29 @@ namespace intel_npu { +enum priority { + NORMAL, + LOW, + HIGH, + + PRIORITY_COUNT +}; + +enum turbo { + DISABLED, + ENABLED, + + TURBO_COUNT +}; + +enum workload { + NOT_SET, + DEFAULT, + EFFICIENT, + + WORKLOAD_COUNT +}; + struct ArgumentDescriptor { ze_graph_argument_properties_3_t info; uint32_t idx; @@ -50,6 +75,42 @@ namespace zeroUtils { ze_result_to_description(result)); \ } +static inline priority toPriorityEnum(const ze_command_queue_priority_t& val) { + switch (val) { + case ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW: + return priority::LOW; + case ZE_COMMAND_QUEUE_PRIORITY_NORMAL: + return priority::NORMAL; + case ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH: + return priority::HIGH; + default: + OPENVINO_THROW("Incorrect queue priority."); + } +} + +static inline turbo toTurboEnum(bool val) { + if (val) { + return turbo::ENABLED; + } + + return turbo::DISABLED; +} + +static inline workload toWorkloadEnum(const std::optional& val) { + if (!val.has_value()) { + return workload::NOT_SET; + } + + switch (*val) { + case ZE_WORKLOAD_TYPE_DEFAULT: + return workload::DEFAULT; + case ZE_WORKLOAD_TYPE_BACKGROUND: + return workload::EFFICIENT; + default: + OPENVINO_THROW("Incorrect workload type."); + } +} + static inline ze_command_queue_priority_t toZeQueuePriority(const ov::hint::Priority& val) { switch (val) { case ov::hint::Priority::LOW: @@ -63,6 +124,17 @@ static inline ze_command_queue_priority_t toZeQueuePriority(const ov::hint::Prio } } +static inline ze_command_queue_workload_type_t toZeQueueWorkloadType(const ov::WorkloadType& val) { + switch (val) { + case ov::WorkloadType::DEFAULT: + return ZE_WORKLOAD_TYPE_DEFAULT; + case ov::WorkloadType::EFFICIENT: + return ZE_WORKLOAD_TYPE_BACKGROUND; + default: + OPENVINO_THROW("Unknown value for WorkloadType."); + } +} + static inline std::size_t precisionToSize(const ze_graph_argument_precision_t val) { switch (val) { case ZE_GRAPH_ARGUMENT_PRECISION_INT4: diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp index c2041d678b0c42..a515f14c40b882 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp @@ -118,7 +118,7 @@ class Fence { class CommandQueue { public: CommandQueue() = delete; - CommandQueue(const std::shared_ptr& initStructs, + CommandQueue(const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, const uint32_t& group_ordinal, bool turbo = false); @@ -129,18 +129,44 @@ class CommandQueue { void executeCommandList(CommandList& command_list) const; void executeCommandList(CommandList& command_list, Fence& fence) const; - void setWorkloadType(ze_command_queue_workload_type_t workloadType) const; + void setWorkloadType(ze_command_queue_workload_type_t workload_type) const; ~CommandQueue(); inline ze_command_queue_handle_t handle() const { return _handle; } private: - std::shared_ptr _initStructs; + std::shared_ptr _init_structs; Logger _log; ze_command_queue_handle_t _handle = nullptr; }; +static std::array, workload::WORKLOAD_COUNT>, turbo::TURBO_COUNT>, + priority::PRIORITY_COUNT> + _gloabal_command_queues; + +class CommandQueueFactory { +public: + CommandQueueFactory(); + CommandQueueFactory(const CommandQueueFactory& other) = delete; + CommandQueueFactory(CommandQueueFactory&& other) = delete; + void operator=(const CommandQueueFactory&) = delete; + void operator=(CommandQueueFactory&&) = delete; + + std::shared_ptr& getCommandQueue(const std::shared_ptr& init_structs, + const ze_command_queue_priority_t& priority, + const std::optional& workloadType, + const uint32_t& group_ordinal, + bool turbo); + + void freeCommandQueue(const ze_command_queue_priority_t& priority, + const std::optional& workloadType, + bool turbo); + +private: + Logger _log; +}; + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index 4868d6326c5fe4..ac998975bcb96c 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -22,6 +22,8 @@ EventPool::~EventPool() { if (ZE_RESULT_SUCCESS != result) { _log.error("zeEventPoolDestroy failed %#X", uint64_t(result)); } + + _handle = nullptr; } Event::Event(const std::shared_ptr& event_pool, uint32_t event_index) @@ -53,6 +55,8 @@ Event::~Event() { if (ZE_RESULT_SUCCESS != result) { _log.error("zeEventDestroy failed %#X", uint64_t(result)); } + + _handle = nullptr; } CommandList::CommandList(const std::shared_ptr& initStructs, @@ -107,6 +111,8 @@ CommandList::~CommandList() { if (ZE_RESULT_SUCCESS != result) { _log.error("zeCommandListDestroy failed %#X", uint64_t(result)); } + + _handle = nullptr; } void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_value) const { ze_mutable_graph_argument_exp_desc_t desc = { @@ -128,17 +134,17 @@ void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_v zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t)); } -CommandQueue::CommandQueue(const std::shared_ptr& initStructs, +CommandQueue::CommandQueue(const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, const uint32_t& group_ordinal, bool turbo) - : _initStructs(initStructs), + : _init_structs(init_structs), _log("CommandQueue", Logger::global().level()) { ze_command_queue_desc_t queue_desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, group_ordinal, 0, 0, ZE_COMMAND_QUEUE_MODE_DEFAULT, priority}; if (turbo) { - if (_initStructs->getCommandQueueDdiTable().version()) { + if (_init_structs->getCommandQueueDdiTable().version()) { ze_command_queue_desc_npu_ext_t turbo_cfg = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC_NPU_EXT, nullptr, turbo}; queue_desc.pNext = &turbo_cfg; } else { @@ -148,7 +154,7 @@ CommandQueue::CommandQueue(const std::shared_ptr& initStr THROW_ON_FAIL_FOR_LEVELZERO( "zeCommandQueueCreate", - zeCommandQueueCreate(_initStructs->getContext(), _initStructs->getDevice(), &queue_desc, &_handle)); + zeCommandQueueCreate(_init_structs->getContext(), _init_structs->getDevice(), &queue_desc, &_handle)); } void CommandQueue::executeCommandList(CommandList& command_list) const { THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", @@ -159,10 +165,11 @@ void CommandQueue::executeCommandList(CommandList& command_list, Fence& fence) c zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle())); } -void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workloadType) const { - if (_initStructs->getCommandQueueDdiTable().version()) { - THROW_ON_FAIL_FOR_LEVELZERO("zeSetWorkloadType", - _initStructs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workloadType)); +void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workload_type) const { + if (_init_structs->getCommandQueueDdiTable().version()) { + THROW_ON_FAIL_FOR_LEVELZERO( + "zeSetWorkloadType", + _init_structs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workload_type)); } else { OPENVINO_THROW("The WorkloadType property is not supported by the current Driver Version!"); } @@ -173,6 +180,8 @@ CommandQueue::~CommandQueue() { if (ZE_RESULT_SUCCESS != result) { _log.error("zeCommandQueueDestroy failed %#X", uint64_t(result)); } + + _handle = nullptr; } Fence::Fence(const CommandQueue& command_queue) : _log("Fence", Logger::global().level()) { @@ -190,6 +199,47 @@ Fence::~Fence() { if (ZE_RESULT_SUCCESS != result) { _log.error("zeFenceDestroy failed %#X", uint64_t(result)); } + + _handle = nullptr; +} + +CommandQueueFactory::CommandQueueFactory() : _log("CommandQueue", Logger::global().level()) {} +std::shared_ptr& CommandQueueFactory::getCommandQueue( + const std::shared_ptr& init_structs, + const ze_command_queue_priority_t& priority, + const std::optional& workloadType, + const uint32_t& group_ordinal, + bool turbo) { + if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] == nullptr) { + _log.debug("Create new command queue"); + _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] = + std::make_shared(init_structs, priority, group_ordinal, turbo); + + if (zeroUtils::toWorkloadEnum(workloadType) != workload::NOT_SET) { + _log.debug("Set workload type"); + _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] + ->setWorkloadType(*workloadType); + } + } + + return _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)]; +} +void CommandQueueFactory::freeCommandQueue(const ze_command_queue_priority_t& priority, + const std::optional& workloadType, + bool turbo) { + if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] + .use_count() == 1) { + _log.debug("Destroy command queue"); + + _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] + .reset(); + } } } // namespace intel_npu From 629f02f89ed65e45698eb7a3430cbf32638d5c51 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Mon, 27 Jan 2025 11:26:33 +0200 Subject: [PATCH 02/13] Adding test case Signed-off-by: Bogdan Pereanu --- .../internal/overload/compile_and_infer.hpp | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp index e44329c5de56c8..c4388978b730e0 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp @@ -206,6 +206,47 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeUpdateAfterCompilation } } +TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeUpdateAfterCompilationWithMultipleInfers) { + if (isCommandQueueExtSupported()) { + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + + auto secondCompiledModel = core->compile_model(function, target_device, configuration); + + ov::InferRequest req1, req2, req3; + OV_ASSERT_NO_THROW(req1 = execNet.create_infer_request()); + OV_ASSERT_NO_THROW(req3 = secondCompiledModel.create_infer_request()); + bool isCalled = false; + OV_ASSERT_NO_THROW(req1.set_callback([&](std::exception_ptr exception_ptr) { + ASSERT_EQ(exception_ptr, nullptr); + isCalled = true; + })); + OV_ASSERT_NO_THROW(req1.start_async()); + OV_ASSERT_NO_THROW(req1.wait()); + ASSERT_TRUE(isCalled); + + OV_ASSERT_NO_THROW(req3.infer()); + + ov::AnyMap modelConfiguration; + modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; + OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); + ASSERT_EQ(execNet.get_property(workload_type.name()).as(), WorkloadType::DEFAULT); + OV_ASSERT_NO_THROW(req2 = execNet.create_infer_request()); + OV_ASSERT_NO_THROW(req2.infer()); + + modelConfiguration[workload_type.name()] = WorkloadType::EFFICIENT; + OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); + ASSERT_EQ(execNet.get_property(workload_type.name()).as(), WorkloadType::EFFICIENT); + isCalled = false; + OV_ASSERT_NO_THROW(req2.set_callback([&](std::exception_ptr exception_ptr) { + ASSERT_EQ(exception_ptr, nullptr); + isCalled = true; + })); + OV_ASSERT_NO_THROW(req2.start_async()); + OV_ASSERT_NO_THROW(req2.wait()); + ASSERT_TRUE(isCalled); + } +} + using OVCompileAndInferRequestTurbo = OVCompileAndInferRequest; TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) { From 458de270db8e85196b06090c6e4f994e0f53aa1d Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Mon, 27 Jan 2025 14:47:42 +0200 Subject: [PATCH 03/13] Destroy pipeline if it was created but workload type is not supported Signed-off-by: Bogdan Pereanu --- .../src/backend/src/zero_pipeline.cpp | 14 ++++++++------ .../intel_npu/utils/zero/zero_wrappers.hpp | 11 ++++++----- .../src/utils/src/zero/zero_wrappers.cpp | 18 +++++++++++++----- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 7ce0d636d255a3..6f6c64f1b89605 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -311,14 +311,16 @@ void Pipeline::closeCommandListIndex(size_t command_list_index) { Pipeline::~Pipeline() { // fences shall be destroyed before the command queue is destroyed - if (_sync_output_with_fences) { - for (size_t i = 0; i < _number_of_command_lists; i++) { - _fences[i].reset(); + if (_command_queue) { + if (_sync_output_with_fences) { + for (size_t i = 0; i < _number_of_command_lists; i++) { + _fences[i].reset(); + } } - } - _command_queue.reset(); - _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + _command_queue.reset(); + _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + } } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp index a515f14c40b882..b5a7a92faf37cd 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp @@ -155,11 +155,12 @@ class CommandQueueFactory { void operator=(const CommandQueueFactory&) = delete; void operator=(CommandQueueFactory&&) = delete; - std::shared_ptr& getCommandQueue(const std::shared_ptr& init_structs, - const ze_command_queue_priority_t& priority, - const std::optional& workloadType, - const uint32_t& group_ordinal, - bool turbo); + const std::shared_ptr& getCommandQueue( + const std::shared_ptr& init_structs, + const ze_command_queue_priority_t& priority, + const std::optional& workloadType, + const uint32_t& group_ordinal, + bool turbo); void freeCommandQueue(const ze_command_queue_priority_t& priority, const std::optional& workloadType, diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index ac998975bcb96c..acaf9aad037233 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -204,7 +204,7 @@ Fence::~Fence() { } CommandQueueFactory::CommandQueueFactory() : _log("CommandQueue", Logger::global().level()) {} -std::shared_ptr& CommandQueueFactory::getCommandQueue( +const std::shared_ptr& CommandQueueFactory::getCommandQueue( const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, const std::optional& workloadType, @@ -218,10 +218,18 @@ std::shared_ptr& CommandQueueFactory::getCommandQueue( std::make_shared(init_structs, priority, group_ordinal, turbo); if (zeroUtils::toWorkloadEnum(workloadType) != workload::NOT_SET) { - _log.debug("Set workload type"); - _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] - ->setWorkloadType(*workloadType); + try { + _log.debug("Set workload type"); + _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] + ->setWorkloadType(*workloadType); + } catch (const std::exception& ex) { + _log.debug("Destroy pipeline if workload type is not supported!"); + _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] + [zeroUtils::toWorkloadEnum(workloadType)] + .reset(); + OPENVINO_THROW(ex.what()); + } } } From f7c0aa2c80c51e2c788350dc14b469b29ee0d277 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Mon, 27 Jan 2025 14:48:31 +0200 Subject: [PATCH 04/13] Update tests, command queue is created and set at the first infer Signed-off-by: Bogdan Pereanu --- .../internal/overload/compile_and_infer.hpp | 39 +++++++++---------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp index c4388978b730e0..e3775ab13385bc 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp @@ -100,11 +100,12 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) { return property == workload_type.name(); }); + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + + ov::InferRequest req; + if (isCommandQueueExtSupported()) { ASSERT_TRUE(workloadTypeSupported); - ov::InferRequest req; - OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); - const auto properties = execNet.get_property(supported_properties.name()).as>(); ASSERT_TRUE(std::any_of(properties.begin(), properties.end(), [](const PropertyName& property) { return property == workload_type.name(); @@ -120,8 +121,9 @@ TEST_P(OVCompileAndInferRequest, PluginWorkloadType) { OV_ASSERT_NO_THROW(req.wait()); ASSERT_TRUE(is_called); } else { + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); ASSERT_FALSE(workloadTypeSupported); - OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration), + OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(), ov::Exception, "WorkloadType property is not supported by the current Driver Version!"); } @@ -137,10 +139,11 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) { return property == workload_type.name(); }); + ov::InferRequest req; + if (isCommandQueueExtSupported()) { ASSERT_TRUE(workloadTypeSupported); OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); - ov::InferRequest req; OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); bool is_called = false; OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { @@ -151,8 +154,10 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadType) { OV_ASSERT_NO_THROW(req.wait()); ASSERT_TRUE(is_called); } else { + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); ASSERT_FALSE(workloadTypeSupported); - OV_EXPECT_THROW_HAS_SUBSTRING(execNet.set_property(modelConfiguration), + OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); + OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(), ov::Exception, "WorkloadType property is not supported by the current Driver Version!"); } @@ -164,9 +169,9 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) { ov::AnyMap modelConfiguration; modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; OV_ASSERT_NO_THROW(execNet.set_property(modelConfiguration)); + ov::InferRequest req; if (isCommandQueueExtSupported()) { - ov::InferRequest req; OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); bool is_called = false; OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { @@ -177,7 +182,8 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeDelayedExecutor) { OV_ASSERT_NO_THROW(req.wait()); ASSERT_TRUE(is_called); } else { - OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(), + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); + OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(), ov::Exception, "WorkloadType property is not supported by the current Driver Version!"); } @@ -258,12 +264,13 @@ TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) { return property == intel_npu::turbo.name(); }); + ov::InferRequest req; + if (isCommandQueueExtSupported()) { ASSERT_TRUE(isTurboSupported); OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); auto turbosetting_compiled_model = execNet.get_property(intel_npu::turbo.name()); OV_ASSERT_NO_THROW(turbosetting_compiled_model = true); - ov::InferRequest req; OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); bool is_called = false; OV_ASSERT_NO_THROW(req.set_callback([&](std::exception_ptr exception_ptr) { @@ -274,17 +281,9 @@ TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) { OV_ASSERT_NO_THROW(req.wait()); ASSERT_TRUE(is_called); } else { - auto cr_ex = configuration.find(intel_npu::defer_weights_load.name()); - if (cr_ex->second.as() == false) { - OV_EXPECT_THROW_HAS_SUBSTRING(core->compile_model(function, target_device, configuration), - ov::Exception, - "Turbo is not supported by the current driver"); - } else { - OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); - OV_EXPECT_THROW_HAS_SUBSTRING(execNet.create_infer_request(), - ov::Exception, - "Turbo is not supported by the current driver"); - } + OV_ASSERT_NO_THROW(execNet = core->compile_model(function, target_device, configuration)); + OV_ASSERT_NO_THROW(req = execNet.create_infer_request()); + OV_EXPECT_THROW_HAS_SUBSTRING(req.infer(), ov::Exception, "Turbo is not supported by the current driver"); } } From 619016b465a79fe2eee192c5f8ef24c2f706d65e Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Mon, 27 Jan 2025 15:21:42 +0200 Subject: [PATCH 05/13] Update the names of the variables, methods, classes Signed-off-by: Bogdan Pereanu --- .../src/backend/include/zero_pipeline.hpp | 3 +- .../src/backend/src/zero_pipeline.cpp | 36 +++++++------- .../include/intel_npu/common/igraph.hpp | 2 +- .../intel_npu/utils/zero/zero_wrappers.hpp | 20 ++++---- .../src/utils/src/zero/zero_wrappers.cpp | 48 +++++++++---------- 5 files changed, 54 insertions(+), 55 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index ae5783d281f3d5..85a190891a1532 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -57,7 +57,7 @@ struct Pipeline { */ size_t _number_of_command_lists; - CommandQueueFactory _command_queue_factory; + CommandQueueManager _command_queue_manager; std::shared_ptr _command_queue; std::vector> _command_lists; std::vector> _fences; @@ -68,7 +68,6 @@ struct Pipeline { Logger _logger; uint32_t _group_ordinal; - bool _fences_are_created = false; std::mutex _mutex; bool _turbo = false; ze_command_queue_priority_t _ze_queue_priority; diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 6f6c64f1b89605..0c79bc089f59be 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -32,8 +32,8 @@ Pipeline::Pipeline(const Config& config, _id(_graph->get_unique_id()), _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1), _event_pool{ - std::make_shared(init_structs->getDevice(), - init_structs->getContext(), + std::make_shared(_init_structs->getDevice(), + _init_structs->getContext(), _number_of_command_lists ? static_cast(_number_of_command_lists) : 1)}, _npu_profiling(npu_profiling), _logger("Pipeline", _config.get()), @@ -57,9 +57,9 @@ Pipeline::Pipeline(const Config& config, _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < _number_of_command_lists; i++) { _command_lists.emplace_back( - std::make_unique(init_structs, + std::make_unique(_init_structs, group_ordinal, - init_structs->getMutableCommandListVersion() ? true : false)); + _init_structs->getMutableCommandListVersion() ? true : false)); _events.emplace_back(std::make_shared(_event_pool, static_cast(i))); } @@ -158,7 +158,7 @@ void Pipeline::getCommandQueue() { _logger.debug("Pipeline - getCommandQueue() started"); std::lock_guard lock(_mutex); - _command_queue = _command_queue_factory.getCommandQueue(_init_structs, + _command_queue = _command_queue_manager.getCommandQueue(_init_structs, _ze_queue_priority, _graph->get_ze_workload_type(), _group_ordinal, @@ -170,28 +170,26 @@ void Pipeline::getCommandQueue() { if (_sync_output_with_fences) { _logger.debug("Pipeline - getCommandQueue() - destroy old fences"); for (size_t i = 0; i < _number_of_command_lists; i++) { - _fences[i].reset(); + if (_fences[i] != nullptr) { + _fences[i].reset(); + } } } _logger.debug("Pipeline - getCommandQueue() - free command queue"); - _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); - - _fences_are_created = false; + _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); } _ze_workload_type = _graph->get_ze_workload_type(); } - if (!_fences_are_created) { - if (_sync_output_with_fences) { - _logger.debug("Pipeline - getCommandQueue() - create new fences"); - for (size_t i = 0; i < _number_of_command_lists; i++) { + if (_sync_output_with_fences) { + _logger.debug("Pipeline - getCommandQueue() - create new fences"); + for (size_t i = 0; i < _number_of_command_lists; i++) { + if (_fences[i] == nullptr) { _fences[i] = std::make_unique(*_command_queue); } } - - _fences_are_created = true; } _logger.debug("Pipeline - getCommandQueue() completed"); @@ -310,16 +308,18 @@ void Pipeline::closeCommandListIndex(size_t command_list_index) { }; Pipeline::~Pipeline() { - // fences shall be destroyed before the command queue is destroyed if (_command_queue) { if (_sync_output_with_fences) { + // fences shall be destroyed before the command queue is destroyed for (size_t i = 0; i < _number_of_command_lists; i++) { - _fences[i].reset(); + if (_fences[i] != nullptr) { + _fences[i].reset(); + } } } _command_queue.reset(); - _command_queue_factory.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); } } diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 8b4124bcfc6154..efb5b6b8978cfc 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -44,6 +44,7 @@ class IGraph : public std::enable_shared_from_this { const std::vector& get_output_descriptors() const; void set_workload_type(const ov::WorkloadType workloadType); + const std::optional get_ze_workload_type() const; std::mutex& get_mutex(); @@ -55,7 +56,6 @@ class IGraph : public std::enable_shared_from_this { uint32_t get_last_submitted_id() const; const std::optional get_batch_size() const; - const std::optional get_ze_workload_type() const; protected: /** diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp index b5a7a92faf37cd..327ab6ce553809 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp @@ -61,7 +61,7 @@ class CommandList { public: friend class CommandQueue; CommandList() = delete; - CommandList(const std::shared_ptr& initStructs, + CommandList(const std::shared_ptr& init_structs, const uint32_t& group_ordinal, bool mtci_is_supported = false); CommandList(const CommandList&) = delete; @@ -85,7 +85,7 @@ class CommandList { } private: - std::shared_ptr _initStructs; + std::shared_ptr _init_structs; Logger _log; @@ -147,23 +147,23 @@ static std::array, workload: priority::PRIORITY_COUNT> _gloabal_command_queues; -class CommandQueueFactory { +class CommandQueueManager { public: - CommandQueueFactory(); - CommandQueueFactory(const CommandQueueFactory& other) = delete; - CommandQueueFactory(CommandQueueFactory&& other) = delete; - void operator=(const CommandQueueFactory&) = delete; - void operator=(CommandQueueFactory&&) = delete; + CommandQueueManager(); + CommandQueueManager(const CommandQueueManager& other) = delete; + CommandQueueManager(CommandQueueManager&& other) = delete; + void operator=(const CommandQueueManager&) = delete; + void operator=(CommandQueueManager&&) = delete; const std::shared_ptr& getCommandQueue( const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, - const std::optional& workloadType, + const std::optional& workload_type, const uint32_t& group_ordinal, bool turbo); void freeCommandQueue(const ze_command_queue_priority_t& priority, - const std::optional& workloadType, + const std::optional& workload_type, bool turbo); private: diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index acaf9aad037233..79e6d3da85d015 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -59,16 +59,16 @@ Event::~Event() { _handle = nullptr; } -CommandList::CommandList(const std::shared_ptr& initStructs, +CommandList::CommandList(const std::shared_ptr& init_structs, const uint32_t& group_ordinal, bool mtci_is_supported) - : _initStructs(initStructs), + : _init_structs(init_structs), _log("CommandList", Logger::global().level()) { ze_mutable_command_list_exp_desc_t mutable_desc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC, nullptr, 0}; ze_command_list_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, &mutable_desc, group_ordinal, 0}; THROW_ON_FAIL_FOR_LEVELZERO( "zeCommandListCreate", - zeCommandListCreate(_initStructs->getContext(), _initStructs->getDevice(), &desc, &_handle)); + zeCommandListCreate(_init_structs->getContext(), _init_structs->getDevice(), &desc, &_handle)); if (mtci_is_supported) { ze_mutable_command_id_exp_desc_t mutableCmdIdDesc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC, @@ -87,14 +87,14 @@ void CommandList::appendMemoryCopy(void* dst, const void* src, const std::size_t } void CommandList::appendGraphInitialize(const ze_graph_handle_t& graph_handle) const { ze_result_t result = - _initStructs->getGraphDdiTable().pfnAppendGraphInitialize(_handle, graph_handle, nullptr, 0, nullptr); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphInitialize", result, _initStructs->getGraphDdiTable()); + _init_structs->getGraphDdiTable().pfnAppendGraphInitialize(_handle, graph_handle, nullptr, 0, nullptr); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphInitialize", result, _init_structs->getGraphDdiTable()); } void CommandList::appendGraphExecute(const ze_graph_handle_t& graph_handle, const ze_graph_profiling_query_handle_t& profiling_query_handle) const { - ze_result_t result = _initStructs->getGraphDdiTable() + ze_result_t result = _init_structs->getGraphDdiTable() .pfnAppendGraphExecute(_handle, graph_handle, profiling_query_handle, nullptr, 0, nullptr); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphExecute", result, _initStructs->getGraphDdiTable()); + THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphExecute", result, _init_structs->getGraphDdiTable()); } void CommandList::appendNpuTimestamp(uint64_t* timestamp_buff) const { THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWriteGlobalTimestamp", @@ -116,9 +116,9 @@ CommandList::~CommandList() { } void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_value) const { ze_mutable_graph_argument_exp_desc_t desc = { - (ZE_MAJOR_VERSION(_initStructs->getZeDrvApiVersion()) > 1 || - (ZE_MAJOR_VERSION(_initStructs->getZeDrvApiVersion()) == 1 && - ZE_MINOR_VERSION(_initStructs->getZeDrvApiVersion()) >= 11)) + (ZE_MAJOR_VERSION(_init_structs->getZeDrvApiVersion()) > 1 || + (ZE_MAJOR_VERSION(_init_structs->getZeDrvApiVersion()) == 1 && + ZE_MINOR_VERSION(_init_structs->getZeDrvApiVersion()) >= 11)) ? ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC : static_cast(ZE_STRUCTURE_TYPE_MUTABLE_GRAPH_ARGUMENT_EXP_DESC_DEPRECATED), nullptr, @@ -203,30 +203,30 @@ Fence::~Fence() { _handle = nullptr; } -CommandQueueFactory::CommandQueueFactory() : _log("CommandQueue", Logger::global().level()) {} -const std::shared_ptr& CommandQueueFactory::getCommandQueue( +CommandQueueManager::CommandQueueManager() : _log("CommandQueue", Logger::global().level()) {} +const std::shared_ptr& CommandQueueManager::getCommandQueue( const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, - const std::optional& workloadType, + const std::optional& workload_type, const uint32_t& group_ordinal, bool turbo) { if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] == nullptr) { + [zeroUtils::toWorkloadEnum(workload_type)] == nullptr) { _log.debug("Create new command queue"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] = + [zeroUtils::toWorkloadEnum(workload_type)] = std::make_shared(init_structs, priority, group_ordinal, turbo); - if (zeroUtils::toWorkloadEnum(workloadType) != workload::NOT_SET) { + if (zeroUtils::toWorkloadEnum(workload_type) != workload::NOT_SET) { try { _log.debug("Set workload type"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] - ->setWorkloadType(*workloadType); + [zeroUtils::toWorkloadEnum(workload_type)] + ->setWorkloadType(*workload_type); } catch (const std::exception& ex) { _log.debug("Destroy pipeline if workload type is not supported!"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] + [zeroUtils::toWorkloadEnum(workload_type)] .reset(); OPENVINO_THROW(ex.what()); } @@ -234,18 +234,18 @@ const std::shared_ptr& CommandQueueFactory::getCommandQueue( } return _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)]; + [zeroUtils::toWorkloadEnum(workload_type)]; } -void CommandQueueFactory::freeCommandQueue(const ze_command_queue_priority_t& priority, - const std::optional& workloadType, +void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& priority, + const std::optional& workload_type, bool turbo) { if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] + [zeroUtils::toWorkloadEnum(workload_type)] .use_count() == 1) { _log.debug("Destroy command queue"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] - [zeroUtils::toWorkloadEnum(workloadType)] + [zeroUtils::toWorkloadEnum(workload_type)] .reset(); } } From 3cbf804bd5cc853a204fa0c51d20d486c248772f Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Tue, 28 Jan 2025 14:13:22 +0200 Subject: [PATCH 06/13] Create a static instance for CommandQueueManager class and lock get and free methods Signed-off-by: Bogdan Pereanu --- .../src/backend/include/zero_pipeline.hpp | 1 - .../src/backend/src/zero_pipeline.cpp | 56 ++++++++++--------- .../intel_npu/utils/zero/zero_wrappers.hpp | 6 ++ .../src/utils/src/zero/zero_wrappers.cpp | 9 ++- 4 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index 85a190891a1532..29069f0a0cf8cc 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -57,7 +57,6 @@ struct Pipeline { */ size_t _number_of_command_lists; - CommandQueueManager _command_queue_manager; std::shared_ptr _command_queue; std::vector> _command_lists; std::vector> _fences; diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 0c79bc089f59be..7ae3db67433a0c 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -156,38 +156,40 @@ Pipeline::Pipeline(const Config& config, void Pipeline::getCommandQueue() { _logger.debug("Pipeline - getCommandQueue() started"); - std::lock_guard lock(_mutex); - - _command_queue = _command_queue_manager.getCommandQueue(_init_structs, - _ze_queue_priority, - _graph->get_ze_workload_type(), - _group_ordinal, - _turbo); - - if (_ze_workload_type != _graph->get_ze_workload_type()) { - if (_ze_workload_type.has_value()) { - // fences created for the old command queue shall be destroyed and make new ones - if (_sync_output_with_fences) { - _logger.debug("Pipeline - getCommandQueue() - destroy old fences"); - for (size_t i = 0; i < _number_of_command_lists; i++) { - if (_fences[i] != nullptr) { - _fences[i].reset(); + + _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs, + _ze_queue_priority, + _graph->get_ze_workload_type(), + _group_ordinal, + _turbo); + { + std::lock_guard lock(_mutex); + + if (_ze_workload_type != _graph->get_ze_workload_type()) { + if (_ze_workload_type.has_value()) { + // fences created for the old command queue shall be destroyed and make new ones + if (_sync_output_with_fences) { + for (size_t i = 0; i < _number_of_command_lists; i++) { + if (_fences[i] != nullptr) { + _logger.debug("Pipeline - getCommandQueue() - destroy old fence"); + _fences[i].reset(); + } } } + + _logger.debug("Pipeline - getCommandQueue() - free command queue"); + CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); } - _logger.debug("Pipeline - getCommandQueue() - free command queue"); - _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + _ze_workload_type = _graph->get_ze_workload_type(); } - _ze_workload_type = _graph->get_ze_workload_type(); - } - - if (_sync_output_with_fences) { - _logger.debug("Pipeline - getCommandQueue() - create new fences"); - for (size_t i = 0; i < _number_of_command_lists; i++) { - if (_fences[i] == nullptr) { - _fences[i] = std::make_unique(*_command_queue); + if (_sync_output_with_fences) { + for (size_t i = 0; i < _number_of_command_lists; i++) { + if (_fences[i] == nullptr) { + _logger.debug("Pipeline - getCommandQueue() - create new fence"); + _fences[i] = std::make_unique(*_command_queue); + } } } } @@ -319,7 +321,7 @@ Pipeline::~Pipeline() { } _command_queue.reset(); - _command_queue_manager.freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); } } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp index 327ab6ce553809..623bfd701546ed 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp @@ -6,6 +6,8 @@ #include +#include + #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "intel_npu/utils/zero/zero_types.hpp" @@ -155,6 +157,8 @@ class CommandQueueManager { void operator=(const CommandQueueManager&) = delete; void operator=(CommandQueueManager&&) = delete; + static CommandQueueManager& getInstance(); + const std::shared_ptr& getCommandQueue( const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, @@ -168,6 +172,8 @@ class CommandQueueManager { private: Logger _log; + + std::mutex _mutex; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index 79e6d3da85d015..fa83e89bd95e56 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -204,12 +204,18 @@ Fence::~Fence() { } CommandQueueManager::CommandQueueManager() : _log("CommandQueue", Logger::global().level()) {} +CommandQueueManager& CommandQueueManager::getInstance() { + static CommandQueueManager instance; + return instance; +} const std::shared_ptr& CommandQueueManager::getCommandQueue( const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, const std::optional& workload_type, const uint32_t& group_ordinal, bool turbo) { + std::lock_guard lock(_mutex); + if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] == nullptr) { _log.debug("Create new command queue"); @@ -239,11 +245,12 @@ const std::shared_ptr& CommandQueueManager::getCommandQueue( void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& priority, const std::optional& workload_type, bool turbo) { + std::lock_guard lock(_mutex); + if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] .use_count() == 1) { _log.debug("Destroy command queue"); - _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] .reset(); From d9002d0948a1efc7b691208b62793f408ebfe900 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Tue, 28 Jan 2025 14:13:33 +0200 Subject: [PATCH 07/13] Add new func test Signed-off-by: Bogdan Pereanu --- .../functional/behavior/infer_request_run.hpp | 30 +++++++++++++++++++ .../internal/overload/compile_and_infer.hpp | 12 ++++++++ 2 files changed, 42 insertions(+) diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index ab53a442c16cda..54590a7abe513f 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -170,6 +170,36 @@ TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsSyncInfers) { } } +TEST_P(InferRequestRunTests, MultipleCompiledModelsTestsSyncInfers) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + // Load CNNNetwork to target plugins + const int no_of_iterations = 256; + std::array compiled_models; + + for (int i = 0; i < no_of_iterations; ++i) { + OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration)); + } + + // Create InferRequests + std::array infer_reqs; + std::array infer_reqs_threads; + for (int i = 0; i < no_of_iterations; ++i) { + OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request()); + } + + for (int i = 0; i < no_of_iterations; ++i) { + infer_reqs_threads[i] = std::thread([&infer_reqs, i]() -> void { + OV_ASSERT_NO_THROW(infer_reqs[i].infer()); + infer_reqs[i] = {}; + }); + } + + for (int i = 0; i < no_of_iterations; ++i) { + infer_reqs_threads[i].join(); + } +} + TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsAsyncInfers) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp index e3775ab13385bc..877eb6628f7645 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp @@ -250,6 +250,18 @@ TEST_P(OVCompileAndInferRequest, CompiledModelWorkloadTypeUpdateAfterCompilation OV_ASSERT_NO_THROW(req2.start_async()); OV_ASSERT_NO_THROW(req2.wait()); ASSERT_TRUE(isCalled); + + req1 = {}; + req2 = {}; + req3 = {}; + + OV_ASSERT_NO_THROW(req1 = execNet.create_infer_request()); + OV_ASSERT_NO_THROW(req2 = secondCompiledModel.create_infer_request()); + OV_ASSERT_NO_THROW(req1.infer()); + OV_ASSERT_NO_THROW(req3 = execNet.create_infer_request()); + OV_ASSERT_NO_THROW(req2.infer()); + OV_ASSERT_NO_THROW(req3.infer()); + OV_ASSERT_NO_THROW(req3.infer()); } } From 20e261c025d2419a88e5e49386bf8ad92b0115ee Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Wed, 29 Jan 2025 10:59:25 +0200 Subject: [PATCH 08/13] Print correct error message Signed-off-by: Bogdan Pereanu --- .../src/utils/src/zero/zero_wrappers.cpp | 82 ++++++++++--------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index fa83e89bd95e56..7f3d5b46976526 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -14,8 +14,8 @@ EventPool::EventPool(ze_device_handle_t device_handle, const ze_context_handle_t nullptr, ZE_EVENT_POOL_FLAG_HOST_VISIBLE, event_count}; - THROW_ON_FAIL_FOR_LEVELZERO("zeEventPoolCreate", - zeEventPoolCreate(context, &event_pool_desc, 1, &device_handle, &_handle)); + auto result = zeEventPoolCreate(context, &event_pool_desc, 1, &device_handle, &_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeEventPoolCreate", result); } EventPool::~EventPool() { auto result = zeEventPoolDestroy(_handle); @@ -30,25 +30,28 @@ Event::Event(const std::shared_ptr& event_pool, uint32_t event_index) : _event_pool(event_pool), _log("Event", Logger::global().level()) { ze_event_desc_t event_desc = {ZE_STRUCTURE_TYPE_EVENT_DESC, nullptr, event_index, 0, 0}; - THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", zeEventCreate(_event_pool->handle(), &event_desc, &_handle)); + auto result = zeEventCreate(_event_pool->handle(), &event_desc, &_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeEventCreate", result); } void Event::AppendSignalEvent(CommandList& command_list) const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendSignalEvent", - zeCommandListAppendSignalEvent(command_list.handle(), _handle)); + auto result = zeCommandListAppendSignalEvent(command_list.handle(), _handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendSignalEvent", result); } void Event::AppendWaitOnEvent(CommandList& command_list) { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWaitOnEvents", - zeCommandListAppendWaitOnEvents(command_list.handle(), 1, &_handle)); + auto result = zeCommandListAppendWaitOnEvents(command_list.handle(), 1, &_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWaitOnEvents", result); } void Event::AppendEventReset(CommandList& command_list) const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendEventReset", - zeCommandListAppendEventReset(command_list.handle(), _handle)); + auto result = zeCommandListAppendEventReset(command_list.handle(), _handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendEventReset", result); } void Event::hostSynchronize() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostSynchronize", zeEventHostSynchronize(_handle, UINT64_MAX)); + auto result = zeEventHostSynchronize(_handle, UINT64_MAX); + THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostSynchronize", result); } void Event::reset() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostReset", zeEventHostReset(_handle)); + auto result = zeEventHostReset(_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeEventHostReset", result); } Event::~Event() { auto result = zeEventDestroy(_handle); @@ -66,24 +69,24 @@ CommandList::CommandList(const std::shared_ptr& init_stru _log("CommandList", Logger::global().level()) { ze_mutable_command_list_exp_desc_t mutable_desc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_LIST_EXP_DESC, nullptr, 0}; ze_command_list_desc_t desc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, &mutable_desc, group_ordinal, 0}; - THROW_ON_FAIL_FOR_LEVELZERO( - "zeCommandListCreate", - zeCommandListCreate(_init_structs->getContext(), _init_structs->getDevice(), &desc, &_handle)); + auto result = zeCommandListCreate(_init_structs->getContext(), _init_structs->getDevice(), &desc, &_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListCreate", result); if (mtci_is_supported) { ze_mutable_command_id_exp_desc_t mutableCmdIdDesc = {ZE_STRUCTURE_TYPE_MUTABLE_COMMAND_ID_EXP_DESC, nullptr, ZE_MUTABLE_COMMAND_EXP_FLAG_GRAPH_ARGUMENT}; - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListGetNextCommandIdExp", - zeCommandListGetNextCommandIdExp(_handle, &mutableCmdIdDesc, &_command_id)); + result = zeCommandListGetNextCommandIdExp(_handle, &mutableCmdIdDesc, &_command_id); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListGetNextCommandIdExp", result); } } void CommandList::reset() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListReset", zeCommandListReset(_handle)); + auto result = zeCommandListReset(_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListReset", result); } void CommandList::appendMemoryCopy(void* dst, const void* src, const std::size_t size) const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendMemoryCopy", - zeCommandListAppendMemoryCopy(_handle, dst, src, size, nullptr, 0, nullptr)); + auto result = zeCommandListAppendMemoryCopy(_handle, dst, src, size, nullptr, 0, nullptr); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendMemoryCopy", result); } void CommandList::appendGraphInitialize(const ze_graph_handle_t& graph_handle) const { ze_result_t result = @@ -97,14 +100,16 @@ void CommandList::appendGraphExecute(const ze_graph_handle_t& graph_handle, THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnAppendGraphExecute", result, _init_structs->getGraphDdiTable()); } void CommandList::appendNpuTimestamp(uint64_t* timestamp_buff) const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWriteGlobalTimestamp", - zeCommandListAppendWriteGlobalTimestamp(_handle, timestamp_buff, nullptr, 0, nullptr)); + auto result = zeCommandListAppendWriteGlobalTimestamp(_handle, timestamp_buff, nullptr, 0, nullptr); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendWriteGlobalTimestamp", result); } void CommandList::appendBarrier() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendBarrier", zeCommandListAppendBarrier(_handle, nullptr, 0, nullptr)); + auto result = zeCommandListAppendBarrier(_handle, nullptr, 0, nullptr); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListAppendBarrier", result); } void CommandList::close() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListClose", zeCommandListClose(_handle)); + auto result = zeCommandListClose(_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListClose", result); } CommandList::~CommandList() { auto result = zeCommandListDestroy(_handle); @@ -130,8 +135,8 @@ void CommandList::updateMutableCommandList(uint32_t arg_index, const void* arg_v &desc, 0}; - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListUpdateMutableCommandsExp", - zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t)); + auto result = zeCommandListUpdateMutableCommandsExp(_handle, &mutable_commands_exp_desc_t); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandListUpdateMutableCommandsExp", result); } CommandQueue::CommandQueue(const std::shared_ptr& init_structs, @@ -152,24 +157,22 @@ CommandQueue::CommandQueue(const std::shared_ptr& init_st } } - THROW_ON_FAIL_FOR_LEVELZERO( - "zeCommandQueueCreate", - zeCommandQueueCreate(_init_structs->getContext(), _init_structs->getDevice(), &queue_desc, &_handle)); + auto result = zeCommandQueueCreate(_init_structs->getContext(), _init_structs->getDevice(), &queue_desc, &_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueCreate", result); } void CommandQueue::executeCommandList(CommandList& command_list) const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", - zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, nullptr)); + auto result = zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, nullptr); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", result); } void CommandQueue::executeCommandList(CommandList& command_list, Fence& fence) const { - THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", - zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle())); + auto result = zeCommandQueueExecuteCommandLists(_handle, 1, &command_list._handle, fence.handle()); + THROW_ON_FAIL_FOR_LEVELZERO("zeCommandQueueExecuteCommandLists", result); } void CommandQueue::setWorkloadType(ze_command_queue_workload_type_t workload_type) const { if (_init_structs->getCommandQueueDdiTable().version()) { - THROW_ON_FAIL_FOR_LEVELZERO( - "zeSetWorkloadType", - _init_structs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workload_type)); + auto result = _init_structs->getCommandQueueDdiTable().pfnSetWorkloadType(_handle, workload_type); + THROW_ON_FAIL_FOR_LEVELZERO("zeSetWorkloadType", result); } else { OPENVINO_THROW("The WorkloadType property is not supported by the current Driver Version!"); } @@ -186,13 +189,16 @@ CommandQueue::~CommandQueue() { Fence::Fence(const CommandQueue& command_queue) : _log("Fence", Logger::global().level()) { ze_fence_desc_t fence_desc = {ZE_STRUCTURE_TYPE_FENCE_DESC, nullptr, 0}; - THROW_ON_FAIL_FOR_LEVELZERO("zeFenceCreate", zeFenceCreate(command_queue.handle(), &fence_desc, &_handle)); + auto result = zeFenceCreate(command_queue.handle(), &fence_desc, &_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeFenceCreate", result); } void Fence::reset() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeFenceReset", zeFenceReset(_handle)); + auto result = zeFenceReset(_handle); + THROW_ON_FAIL_FOR_LEVELZERO("zeFenceReset", result); } void Fence::hostSynchronize() const { - THROW_ON_FAIL_FOR_LEVELZERO("zeFenceHostSynchronize", zeFenceHostSynchronize(_handle, UINT64_MAX)); + auto result = zeFenceHostSynchronize(_handle, UINT64_MAX); + THROW_ON_FAIL_FOR_LEVELZERO("zeFenceHostSynchronize", result); } Fence::~Fence() { auto result = zeFenceDestroy(_handle); From f20f391ac37b9ddcb7a18c486f0afc736d42db80 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Wed, 29 Jan 2025 14:16:40 +0200 Subject: [PATCH 09/13] Run test only on newer drivers Signed-off-by: Bogdan Pereanu --- .../functional/behavior/infer_request_run.cpp | 6 ++ .../functional/behavior/infer_request_run.hpp | 62 ++++++++++--------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp index f30fa2bb1416a3..c369b4b6eafa23 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp @@ -37,6 +37,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, ::testing::ValuesIn(configsInferRequestRunTests)), InferRequestRunTests::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, + InferRunTestsOnNewerDrivers, + ::testing::Combine(::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(configsInferRequestRunTests)), + InferRequestRunTests::getTestCaseName); + const std::vector batchingConfigs = { {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::PLUGIN)}, {ov::log::level(ov::log::Level::WARNING), ov::intel_npu::batch_mode(ov::intel_npu::BatchMode::COMPILER)}, diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index 54590a7abe513f..5bf1c6522bb32e 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -170,36 +170,6 @@ TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsSyncInfers) { } } -TEST_P(InferRequestRunTests, MultipleCompiledModelsTestsSyncInfers) { - // Skip test according to plugin specific disabledTestPatterns() (if any) - SKIP_IF_CURRENT_TEST_IS_DISABLED() - // Load CNNNetwork to target plugins - const int no_of_iterations = 256; - std::array compiled_models; - - for (int i = 0; i < no_of_iterations; ++i) { - OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration)); - } - - // Create InferRequests - std::array infer_reqs; - std::array infer_reqs_threads; - for (int i = 0; i < no_of_iterations; ++i) { - OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request()); - } - - for (int i = 0; i < no_of_iterations; ++i) { - infer_reqs_threads[i] = std::thread([&infer_reqs, i]() -> void { - OV_ASSERT_NO_THROW(infer_reqs[i].infer()); - infer_reqs[i] = {}; - }); - } - - for (int i = 0; i < no_of_iterations; ++i) { - infer_reqs_threads[i].join(); - } -} - TEST_P(InferRequestRunTests, MultipleExecutorStreamsTestsAsyncInfers) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() @@ -1091,6 +1061,38 @@ TEST_P(SetShapeInferRunTests, checkResultsAfterStateTensorsReallocation) { } } +using InferRunTestsOnNewerDrivers = InferRequestRunTests; + +TEST_P(InferRunTestsOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + // Load CNNNetwork to target plugins + const int no_of_iterations = 256; + std::array compiled_models; + + for (int i = 0; i < no_of_iterations; ++i) { + OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration)); + } + + // Create InferRequests + std::array infer_reqs; + std::array infer_reqs_threads; + for (int i = 0; i < no_of_iterations; ++i) { + OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request()); + } + + for (int i = 0; i < no_of_iterations; ++i) { + infer_reqs_threads[i] = std::thread([&infer_reqs, i]() -> void { + OV_ASSERT_NO_THROW(infer_reqs[i].infer()); + infer_reqs[i] = {}; + }); + } + + for (int i = 0; i < no_of_iterations; ++i) { + infer_reqs_threads[i].join(); + } +} + } // namespace behavior } // namespace test } // namespace ov From 4ad00282e4dce7c240294b0c81fb44764ff3aecd Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Wed, 29 Jan 2025 16:00:00 +0200 Subject: [PATCH 10/13] Create event pool and events only if they are used Signed-off-by: Bogdan Pereanu --- .../src/backend/src/zero_pipeline.cpp | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 7ae3db67433a0c..c6286709704947 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -31,10 +31,6 @@ Pipeline::Pipeline(const Config& config, _config(config), _id(_graph->get_unique_id()), _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1), - _event_pool{ - std::make_shared(_init_structs->getDevice(), - _init_structs->getContext(), - _number_of_command_lists ? static_cast(_number_of_command_lists) : 1)}, _npu_profiling(npu_profiling), _logger("Pipeline", _config.get()), _group_ordinal(group_ordinal) { @@ -51,16 +47,31 @@ Pipeline::Pipeline(const Config& config, _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get()); + OPENVINO_ASSERT(_sync_output_with_fences || !_config.get(), + "In-order execution doesn't work in case synchronization of the inferences is done using events"); + + if (!_sync_output_with_fences || _config.get()) { + _event_pool = + std::make_shared(_init_structs->getDevice(), + _init_structs->getContext(), + _number_of_command_lists ? static_cast(_number_of_command_lists) : 1); + + _events.reserve(_number_of_command_lists); + for (size_t i = 0; i < _number_of_command_lists; i++) { + _events.emplace_back(std::make_shared(_event_pool, static_cast(i))); + } + } + _command_lists.reserve(_number_of_command_lists); - _events.reserve(_number_of_command_lists); - _fences.resize(_number_of_command_lists); - _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < _number_of_command_lists; i++) { _command_lists.emplace_back( std::make_unique(_init_structs, group_ordinal, _init_structs->getMutableCommandListVersion() ? true : false)); - _events.emplace_back(std::make_shared(_event_pool, static_cast(i))); + } + + if (_sync_output_with_fences) { + _fences.resize(_number_of_command_lists); } for (size_t i = 0; i < _number_of_command_lists; i++) { From 135da9aa66e83cf75216206db5866c3f86d73cba Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Wed, 29 Jan 2025 17:27:35 +0200 Subject: [PATCH 11/13] Add new test case for changing priority, turbo and workload type Signed-off-by: Bogdan Pereanu --- .../functional/behavior/infer_request_run.hpp | 2 - .../ov_infer_request/compile_and_infer.cpp | 7 ++ .../internal/overload/compile_and_infer.hpp | 82 +++++++++++++++++++ 3 files changed, 89 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index 5bf1c6522bb32e..2889bf04f1dc2f 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -1066,7 +1066,6 @@ using InferRunTestsOnNewerDrivers = InferRequestRunTests; TEST_P(InferRunTestsOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) { // Skip test according to plugin specific disabledTestPatterns() (if any) SKIP_IF_CURRENT_TEST_IS_DISABLED() - // Load CNNNetwork to target plugins const int no_of_iterations = 256; std::array compiled_models; @@ -1074,7 +1073,6 @@ TEST_P(InferRunTestsOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) { OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(ov_model, target_device, configuration)); } - // Create InferRequests std::array infer_reqs; std::array infer_reqs_threads; for (int i = 0; i < no_of_iterations; ++i) { diff --git a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp index 5a77908adabd0c..b0318d9b8f25f7 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/ov_infer_request/compile_and_infer.cpp @@ -31,4 +31,11 @@ INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, {ov::intel_npu::defer_weights_load(false)}})), ov::test::utils::appendPlatformTypeTestName); +INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTests, + OVCompileAndInferRequesOnNewerDrivers, + ::testing::Combine(::testing::Values(getConstantGraph(ov::element::f32)), + ::testing::Values(ov::test::utils::DEVICE_NPU), + ::testing::ValuesIn(configs)), + ov::test::utils::appendPlatformTypeTestName); + } // namespace diff --git a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp index 877eb6628f7645..aa555cdf97dd55 100644 --- a/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp +++ b/src/plugins/intel_npu/tests/functional/internal/overload/compile_and_infer.hpp @@ -5,8 +5,11 @@ #include #include +#include #include +#include #include +#include #include "base/ov_behavior_test_utils.hpp" #include "intel_npu/config/common.hpp" @@ -299,6 +302,85 @@ TEST_P(OVCompileAndInferRequestTurbo, CompiledModelTurbo) { } } +using OVCompileAndInferRequesOnNewerDrivers = OVCompileAndInferRequest; + +TEST_P(OVCompileAndInferRequesOnNewerDrivers, MultipleCompiledModelsTestsSyncInfers) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + auto supportedProperties = core->get_property("NPU", supported_properties.name()).as>(); + bool isTurboSupported = + std::any_of(supportedProperties.begin(), supportedProperties.end(), [](const PropertyName& property) { + return property == intel_npu::turbo.name(); + }); + + if (isCommandQueueExtSupported()) { + ASSERT_TRUE(isTurboSupported); + + const int no_of_iterations = 256; + std::array compiled_models; + + for (int i = 0; i < no_of_iterations; ++i) { + if (i % 4) { + configuration[intel_npu::turbo.name()] = false; + } else { + configuration[intel_npu::turbo.name()] = true; + } + + if (i % 5 == 1) { + configuration[workload_type.name()] = WorkloadType::DEFAULT; + } else if (i % 5 == 2) { + configuration[workload_type.name()] = WorkloadType::EFFICIENT; + } + + if (i % 3 == 0) { + configuration[ov::hint::model_priority.name()] = ov::hint::Priority::LOW; + } else if (i % 3 == 1) { + configuration[ov::hint::model_priority.name()] = ov::hint::Priority::MEDIUM; + } else if (i % 3 == 2) { + configuration[ov::hint::model_priority.name()] = ov::hint::Priority::HIGH; + } + + OV_ASSERT_NO_THROW(compiled_models[i] = core->compile_model(function, target_device, configuration)); + } + + std::array infer_reqs; + std::array infer_reqs_threads; + for (int i = 0; i < no_of_iterations; ++i) { + OV_ASSERT_NO_THROW(infer_reqs[i] = compiled_models[i].create_infer_request()); + } + + for (int i = 0; i < no_of_iterations; ++i) { + infer_reqs_threads[i] = std::thread([&compiled_models, &infer_reqs, i]() -> void { + OV_ASSERT_NO_THROW(infer_reqs[i].infer()); + + ov::AnyMap modelConfiguration; + if (i % 5 == 0) { + modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; + OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration)); + } else if (i % 5 == 1) { + modelConfiguration[workload_type.name()] = WorkloadType::EFFICIENT; + OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration)); + } else if (i % 5 == 2) { + modelConfiguration[workload_type.name()] = WorkloadType::DEFAULT; + OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration)); + } else if (i % 5 == 3) { + modelConfiguration[workload_type.name()] = WorkloadType::EFFICIENT; + OV_ASSERT_NO_THROW(compiled_models[i].set_property(modelConfiguration)); + } + + OV_ASSERT_NO_THROW(infer_reqs[i].infer()); + + infer_reqs[i] = {}; + }); + } + + for (int i = 0; i < no_of_iterations; ++i) { + infer_reqs_threads[i].join(); + } + } +} + } // namespace behavior } // namespace test } // namespace ov From ad75cf1a839c137f65da8fc49b1096e6f9bb97ee Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Wed, 29 Jan 2025 18:28:44 +0200 Subject: [PATCH 12/13] Destroy pipeline even when use count is 0 Signed-off-by: Bogdan Pereanu --- .../include/intel_npu/utils/zero/zero_wrappers.hpp | 11 +++++------ .../intel_npu/src/utils/src/zero/zero_wrappers.cpp | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp index 623bfd701546ed..1f5ad106b53530 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp @@ -159,12 +159,11 @@ class CommandQueueManager { static CommandQueueManager& getInstance(); - const std::shared_ptr& getCommandQueue( - const std::shared_ptr& init_structs, - const ze_command_queue_priority_t& priority, - const std::optional& workload_type, - const uint32_t& group_ordinal, - bool turbo); + std::shared_ptr getCommandQueue(const std::shared_ptr& init_structs, + const ze_command_queue_priority_t& priority, + const std::optional& workload_type, + const uint32_t& group_ordinal, + bool turbo); void freeCommandQueue(const ze_command_queue_priority_t& priority, const std::optional& workload_type, diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index 7f3d5b46976526..522144ff0d97ea 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -214,7 +214,7 @@ CommandQueueManager& CommandQueueManager::getInstance() { static CommandQueueManager instance; return instance; } -const std::shared_ptr& CommandQueueManager::getCommandQueue( +std::shared_ptr CommandQueueManager::getCommandQueue( const std::shared_ptr& init_structs, const ze_command_queue_priority_t& priority, const std::optional& workload_type, @@ -255,7 +255,7 @@ void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& pr if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] - .use_count() == 1) { + .use_count() <= 1) { _log.debug("Destroy command queue"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] From 9c9cc88e21646e919b3d11fe2d6b946649100ad4 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Thu, 30 Jan 2025 07:57:06 +0200 Subject: [PATCH 13/13] Make sure that the pipeline is still alive when fences are destroyed Signed-off-by: Bogdan Pereanu --- .../src/backend/src/zero_pipeline.cpp | 76 +++++++++++-------- .../intel_npu/utils/zero/zero_wrappers.hpp | 8 +- .../src/utils/src/zero/zero_wrappers.cpp | 4 +- 3 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index c6286709704947..3cf9b205df2abd 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -41,12 +41,6 @@ Pipeline::Pipeline(const Config& config, profiling_query.create(profiling_pool._handle); } - if (_config.has()) { - _turbo = _config.get(); - } - - _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get()); - OPENVINO_ASSERT(_sync_output_with_fences || !_config.get(), "In-order execution doesn't work in case synchronization of the inferences is done using events"); @@ -70,8 +64,29 @@ Pipeline::Pipeline(const Config& config, _init_structs->getMutableCommandListVersion() ? true : false)); } + _ze_queue_priority = zeroUtils::toZeQueuePriority(_config.get()); + + if (_config.has()) { + _turbo = _config.get(); + } + + if (config.has()) { + _ze_workload_type = zeroUtils::toZeQueueWorkloadType(config.get()); + } + + _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs, + _ze_queue_priority, + _graph->get_ze_workload_type(), + _group_ordinal, + _turbo); + if (_sync_output_with_fences) { _fences.resize(_number_of_command_lists); + + for (size_t i = 0; i < _number_of_command_lists; i++) { + _logger.debug("Pipeline - getCommandQueue() - create new fence"); + _fences[i] = std::make_unique(*_command_queue); + } } for (size_t i = 0; i < _number_of_command_lists; i++) { @@ -168,41 +183,36 @@ Pipeline::Pipeline(const Config& config, void Pipeline::getCommandQueue() { _logger.debug("Pipeline - getCommandQueue() started"); - _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs, - _ze_queue_priority, - _graph->get_ze_workload_type(), - _group_ordinal, - _turbo); - { - std::lock_guard lock(_mutex); - - if (_ze_workload_type != _graph->get_ze_workload_type()) { - if (_ze_workload_type.has_value()) { - // fences created for the old command queue shall be destroyed and make new ones - if (_sync_output_with_fences) { - for (size_t i = 0; i < _number_of_command_lists; i++) { - if (_fences[i] != nullptr) { - _logger.debug("Pipeline - getCommandQueue() - destroy old fence"); - _fences[i].reset(); - } - } - } + std::lock_guard lock(_mutex); - _logger.debug("Pipeline - getCommandQueue() - free command queue"); - CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + if (_ze_workload_type != _graph->get_ze_workload_type()) { + // fences created for the old command queue shall be destroyed and make new ones + if (_sync_output_with_fences) { + for (size_t i = 0; i < _number_of_command_lists; i++) { + if (_fences[i] != nullptr) { + _logger.debug("Pipeline - getCommandQueue() - destroy old fence"); + _fences[i].reset(); + } } - - _ze_workload_type = _graph->get_ze_workload_type(); } + _command_queue = CommandQueueManager::getInstance().getCommandQueue(_init_structs, + _ze_queue_priority, + _graph->get_ze_workload_type(), + _group_ordinal, + _turbo); + if (_sync_output_with_fences) { for (size_t i = 0; i < _number_of_command_lists; i++) { - if (_fences[i] == nullptr) { - _logger.debug("Pipeline - getCommandQueue() - create new fence"); - _fences[i] = std::make_unique(*_command_queue); - } + _logger.debug("Pipeline - getCommandQueue() - create new fence"); + _fences[i] = std::make_unique(*_command_queue); } } + + _logger.debug("Pipeline - getCommandQueue() - free previous command queue"); + CommandQueueManager::getInstance().freeCommandQueue(_ze_queue_priority, _ze_workload_type, _turbo); + + _ze_workload_type = _graph->get_ze_workload_type(); } _logger.debug("Pipeline - getCommandQueue() completed"); diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp index 1f5ad106b53530..d85725c530fb14 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_wrappers.hpp @@ -145,10 +145,6 @@ class CommandQueue { ze_command_queue_handle_t _handle = nullptr; }; -static std::array, workload::WORKLOAD_COUNT>, turbo::TURBO_COUNT>, - priority::PRIORITY_COUNT> - _gloabal_command_queues; - class CommandQueueManager { public: CommandQueueManager(); @@ -173,6 +169,10 @@ class CommandQueueManager { Logger _log; std::mutex _mutex; + + std::array, workload::WORKLOAD_COUNT>, turbo::TURBO_COUNT>, + priority::PRIORITY_COUNT> + _gloabal_command_queues; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp index 522144ff0d97ea..a6d7fe812c4169 100644 --- a/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp +++ b/src/plugins/intel_npu/src/utils/src/zero/zero_wrappers.cpp @@ -236,7 +236,7 @@ std::shared_ptr CommandQueueManager::getCommandQueue( [zeroUtils::toWorkloadEnum(workload_type)] ->setWorkloadType(*workload_type); } catch (const std::exception& ex) { - _log.debug("Destroy pipeline if workload type is not supported!"); + _log.error("Destroy pipeline if workload type is not supported!"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] .reset(); @@ -255,7 +255,7 @@ void CommandQueueManager::freeCommandQueue(const ze_command_queue_priority_t& pr if (_gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)] - .use_count() <= 1) { + .use_count() == 1) { _log.debug("Destroy command queue"); _gloabal_command_queues[zeroUtils::toPriorityEnum(priority)][zeroUtils::toTurboEnum(turbo)] [zeroUtils::toWorkloadEnum(workload_type)]