From 8c1fa24f5f8f65481d32c25103350868dddc875d Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Thu, 14 Nov 2024 17:52:03 +0800 Subject: [PATCH] Fix issues due to https://github.com/ClickHouse/ClickHouse/pull/71539. Issue 1 BuildQueryPipelineSettings is created manually instead of calling BuildQueryPipelineSettings::fromContext(); so even https://github.com/ClickHouse/ClickHouse/pull/71890 disable 'query_plan_merge_filters', UTs are still failed. To fix this issue, we need set correct default parameters in CHUtil.cpp Issue 2 If we set query_plan_merge_filters to true, then https://github.com/ClickHouse/ClickHouse/pull/71539 will try to split the left most AND atom to a separate DAG and hence create FilterTransformer for each And atom, which cause collecting metrics failed. I am not sure the benefits of setting it to true, let's keep it to false. --- cpp-ch/local-engine/Common/CHUtil.cpp | 11 +++++++++++ cpp-ch/local-engine/Parser/SerializedPlanParser.cpp | 9 +++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp b/cpp-ch/local-engine/Common/CHUtil.cpp index d253de82938f..380435e1fa6d 100644 --- a/cpp-ch/local-engine/Common/CHUtil.cpp +++ b/cpp-ch/local-engine/Common/CHUtil.cpp @@ -79,6 +79,9 @@ namespace Setting { extern const SettingsUInt64 prefer_external_sort_block_bytes; extern const SettingsUInt64 max_bytes_before_external_sort; +extern const SettingsBool query_plan_merge_filters; +extern const SettingsBool compile_expressions; +extern const SettingsShortCircuitFunctionEvaluation short_circuit_function_evaluation; } namespace ErrorCodes { @@ -713,6 +716,14 @@ void BackendInitializerUtil::initSettings(const SparkConfigs::ConfigMap & spark_ settings.set("max_download_threads", 1); settings.set("input_format_parquet_enable_row_group_prefetch", false); + /// update per https://github.com/ClickHouse/ClickHouse/pull/71539 + /// if true, we can't get correct metrics for the query + settings[Setting::query_plan_merge_filters] = false; + /// We now set BuildQueryPipelineSettings according to config. + settings[Setting::compile_expressions] = true; + settings[Setting::short_circuit_function_evaluation] = ShortCircuitFunctionEvaluation::DISABLE; + /// + for (const auto & [key, value] : spark_conf_map) { // Firstly apply spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.* to settings diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp index 748ff88acbd1..68143e99f8f7 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp @@ -297,12 +297,9 @@ DB::QueryPipelineBuilderPtr SerializedPlanParser::buildQueryPipeline(DB::QueryPl settings, 0); const QueryPlanOptimizationSettings optimization_settings{.optimize_plan = settings[Setting::query_plan_enable_optimizations]}; - return query_plan.buildQueryPipeline( - optimization_settings, - BuildQueryPipelineSettings{ - .actions_settings - = ExpressionActionsSettings{.can_compile_expressions = true, .min_count_to_compile_expression = 3, .compile_expressions = CompileExpressions::yes}, - .process_list_element = query_status}); + BuildQueryPipelineSettings build_settings = BuildQueryPipelineSettings::fromContext(context); + build_settings.process_list_element = query_status; + return query_plan.buildQueryPipeline(optimization_settings,build_settings); } std::unique_ptr SerializedPlanParser::createExecutor(const std::string_view plan)