diff --git a/.gitmodules b/.gitmodules
index e4d63a341183..ed61ddb96ba1 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -253,9 +253,6 @@
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl
-[submodule "contrib/idxd-config"]
- path = contrib/idxd-config
- url = https://github.com/intel/idxd-config
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash
@@ -296,6 +293,9 @@
[submodule "contrib/libdivide"]
path = contrib/libdivide
url = https://github.com/ridiculousfish/libdivide
+[submodule "contrib/libbcrypt"]
+ path = contrib/libbcrypt
+ url = https://github.com/rg3/libbcrypt.git
[submodule "contrib/ulid-c"]
path = contrib/ulid-c
url = https://github.com/ClickHouse/ulid-c.git
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e2505856d0c8..1ccd4f9846d9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@
# 2023 Changelog
-### ClickHouse release 23.4 LTS, 2023-04-26
+### ClickHouse release 23.4, 2023-04-26
#### Backward Incompatible Change
* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)).
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0554403cce51..263b202049b3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -170,12 +170,6 @@ else ()
set(NO_WHOLE_ARCHIVE --no-whole-archive)
endif ()
-option(ENABLE_CURL_BUILD "Enable curl, azure, sentry build on by default except MacOS." ON)
-if (OS_DARWIN)
- # Disable the curl, azure, senry build on MacOS
- set (ENABLE_CURL_BUILD OFF)
-endif ()
-
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# Can be lld or ld-lld or lld-13 or /path/to/lld.
if (LINKER_NAME MATCHES "lld")
@@ -393,9 +387,9 @@ else()
endif ()
option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON)
-# We use mmap for allocations more heavily in debug builds,
-# but GWP-ASan also wants to use mmap frequently,
-# and due to a large number of memory mappings,
+# We use mmap for allocations more heavily in debug builds,
+# but GWP-ASan also wants to use mmap frequently,
+# and due to a large number of memory mappings,
# it does not work together well.
if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
set(ENABLE_GWP_ASAN OFF)
diff --git a/SECURITY.md b/SECURITY.md
index 44a122956b45..75c1a9d7d6aa 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -22,13 +22,7 @@ The following versions of ClickHouse server are currently being supported with s
| 22.10 | ❌ |
| 22.9 | ❌ |
| 22.8 | ✔️ |
-| 22.7 | ❌ |
-| 22.6 | ❌ |
-| 22.5 | ❌ |
-| 22.4 | ❌ |
-| 22.3 | ❌ |
-| 22.2 | ❌ |
-| 22.1 | ❌ |
+| 22.* | ❌ |
| 21.* | ❌ |
| 20.* | ❌ |
| 19.* | ❌ |
diff --git a/base/harmful/harmful.c b/base/harmful/harmful.c
index 6112f9a339c0..78796ca0c054 100644
--- a/base/harmful/harmful.c
+++ b/base/harmful/harmful.c
@@ -31,7 +31,8 @@ TRAP(argp_state_help)
TRAP(argp_usage)
TRAP(asctime)
TRAP(clearenv)
-TRAP(crypt)
+// Redefined at contrib/libbcrypt/crypt_blowfish/wrapper.c:186
+// TRAP(crypt)
TRAP(ctime)
TRAP(cuserid)
TRAP(drand48)
diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake
index 578a97572701..52f301ab8ad4 100644
--- a/cmake/fuzzer.cmake
+++ b/cmake/fuzzer.cmake
@@ -7,10 +7,6 @@ if (FUZZER)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link")
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=fuzzer-no-link")
- endif()
-
# NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
if (NOT LIB_FUZZING_ENGINE)
set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake
index fc9793d8f356..bf5eddf09f5d 100644
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@@ -16,49 +16,24 @@ if (SANITIZE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ASAN_FLAGS}")
- endif()
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan")
- endif ()
-
elseif (SANITIZE STREQUAL "memory")
# MemorySanitizer flags are set according to the official documentation:
# https://clang.llvm.org/docs/MemorySanitizer.html#usage
- #
- # For now, it compiles with `cmake -DSANITIZE=memory -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_CXX_FLAGS_ADD="-O1" -DCMAKE_C_FLAGS_ADD="-O1"`
- # Compiling with -DCMAKE_BUILD_TYPE=Debug leads to ld.lld failures because
- # of large files (was not tested with ld.gold). This is why we compile with
- # RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to
- # keep the binary size down.
- # TODO: try compiling with -Og and with ld.gold.
+
+ # Linking can fail due to relocation overflows (see #49145), caused by too big object files / libraries.
+ # Work around this with position-independent builds (-fPIC and -fpie), this is slightly slower than non-PIC/PIE but that's okay.
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
- endif()
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
- endif ()
-
elseif (SANITIZE STREQUAL "thread")
set (TSAN_FLAGS "-fsanitize=thread")
if (COMPILER_CLANG)
set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt")
endif()
-
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${TSAN_FLAGS}")
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread")
- endif()
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan")
- endif ()
elseif (SANITIZE STREQUAL "undefined")
set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero")
@@ -77,12 +52,6 @@ if (SANITIZE)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}")
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
- endif()
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan")
- endif ()
# llvm-tblgen, that is used during LLVM build, doesn't work with UBSan.
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 0ff8b550a982..0c92ff17f115 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -141,20 +141,19 @@ add_contrib (libuv-cmake libuv)
add_contrib (liburing-cmake liburing)
add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
add_contrib (cassandra-cmake cassandra) # requires: libuv
-
-if (ENABLE_CURL_BUILD)
+if (NOT OS_DARWIN)
add_contrib (curl-cmake curl)
add_contrib (azure-cmake azure)
add_contrib (sentry-native-cmake sentry-native) # requires: curl
endif()
-
add_contrib (fmtlib-cmake fmtlib)
add_contrib (krb5-cmake krb5)
add_contrib (cyrus-sasl-cmake cyrus-sasl) # for krb5
add_contrib (libgsasl-cmake libgsasl) # requires krb5
add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl
add_contrib (nats-io-cmake nats-io)
-add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5
+add_contrib (isa-l-cmake isa-l)
+add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l
add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3
add_contrib (cppkafka-cmake cppkafka)
add_contrib (libpqxx-cmake libpqxx)
@@ -178,21 +177,17 @@ add_contrib (s2geometry-cmake s2geometry)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
add_contrib (morton-nd-cmake morton-nd)
-
if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x)
endif()
-
add_contrib (annoy-cmake annoy)
-
add_contrib (xxHash-cmake xxHash)
-add_contrib (google-benchmark-cmake google-benchmark)
+add_contrib (libbcrypt-cmake libbcrypt)
+add_contrib (google-benchmark-cmake google-benchmark)
add_contrib (ulid-c-cmake ulid-c)
-add_contrib (isa-l-cmake isa-l)
-
# Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
diff --git a/contrib/curl b/contrib/curl
index c12fb3ddaf48..b0edf0b7dae4 160000
--- a/contrib/curl
+++ b/contrib/curl
@@ -1 +1 @@
-Subproject commit c12fb3ddaf48e709a7a4deaa55ec485e4df163ee
+Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d
diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt
index 8a570bd267c7..70d9c2816dc1 100644
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@@ -12,6 +12,9 @@ set (SRCS
"${LIBRARY_DIR}/lib/noproxy.c"
"${LIBRARY_DIR}/lib/idn.c"
"${LIBRARY_DIR}/lib/cfilters.c"
+ "${LIBRARY_DIR}/lib/cf-socket.c"
+ "${LIBRARY_DIR}/lib/cf-haproxy.c"
+ "${LIBRARY_DIR}/lib/cf-https-connect.c"
"${LIBRARY_DIR}/lib/file.c"
"${LIBRARY_DIR}/lib/timeval.c"
"${LIBRARY_DIR}/lib/base64.c"
@@ -37,8 +40,8 @@ set (SRCS
"${LIBRARY_DIR}/lib/strcase.c"
"${LIBRARY_DIR}/lib/easy.c"
"${LIBRARY_DIR}/lib/curl_fnmatch.c"
+ "${LIBRARY_DIR}/lib/curl_log.c"
"${LIBRARY_DIR}/lib/fileinfo.c"
- "${LIBRARY_DIR}/lib/wildcard.c"
"${LIBRARY_DIR}/lib/krb5.c"
"${LIBRARY_DIR}/lib/memdebug.c"
"${LIBRARY_DIR}/lib/http_chunks.c"
@@ -96,6 +99,7 @@ set (SRCS
"${LIBRARY_DIR}/lib/rand.c"
"${LIBRARY_DIR}/lib/curl_multibyte.c"
"${LIBRARY_DIR}/lib/conncache.c"
+ "${LIBRARY_DIR}/lib/cf-h1-proxy.c"
"${LIBRARY_DIR}/lib/http2.c"
"${LIBRARY_DIR}/lib/smb.c"
"${LIBRARY_DIR}/lib/curl_endian.c"
@@ -113,12 +117,13 @@ set (SRCS
"${LIBRARY_DIR}/lib/altsvc.c"
"${LIBRARY_DIR}/lib/socketpair.c"
"${LIBRARY_DIR}/lib/bufref.c"
+ "${LIBRARY_DIR}/lib/bufq.c"
"${LIBRARY_DIR}/lib/dynbuf.c"
+ "${LIBRARY_DIR}/lib/dynhds.c"
"${LIBRARY_DIR}/lib/hsts.c"
"${LIBRARY_DIR}/lib/http_aws_sigv4.c"
"${LIBRARY_DIR}/lib/mqtt.c"
"${LIBRARY_DIR}/lib/rename.c"
- "${LIBRARY_DIR}/lib/h2h3.c"
"${LIBRARY_DIR}/lib/headers.c"
"${LIBRARY_DIR}/lib/timediff.c"
"${LIBRARY_DIR}/lib/vauth/vauth.c"
@@ -133,6 +138,7 @@ set (SRCS
"${LIBRARY_DIR}/lib/vauth/oauth2.c"
"${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c"
"${LIBRARY_DIR}/lib/vauth/spnego_sspi.c"
+ "${LIBRARY_DIR}/lib/vquic/vquic.c"
"${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vtls/gtls.c"
"${LIBRARY_DIR}/lib/vtls/vtls.c"
@@ -147,9 +153,6 @@ set (SRCS
"${LIBRARY_DIR}/lib/vtls/keylog.c"
"${LIBRARY_DIR}/lib/vtls/x509asn1.c"
"${LIBRARY_DIR}/lib/vtls/hostcheck.c"
- "${LIBRARY_DIR}/lib/vquic/ngtcp2.c"
- "${LIBRARY_DIR}/lib/vquic/quiche.c"
- "${LIBRARY_DIR}/lib/vquic/msh3.c"
"${LIBRARY_DIR}/lib/vssh/libssh2.c"
"${LIBRARY_DIR}/lib/vssh/libssh.c"
)
diff --git a/contrib/idxd-config b/contrib/idxd-config
deleted file mode 160000
index f6605c41a735..000000000000
--- a/contrib/idxd-config
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt
index fd0218a7b801..d4d6d648268b 100644
--- a/contrib/isa-l-cmake/CMakeLists.txt
+++ b/contrib/isa-l-cmake/CMakeLists.txt
@@ -1,6 +1,23 @@
+option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES})
+if (ARCH_AARCH64)
+ # Disable ISA-L libray on aarch64.
+ set (ENABLE_ISAL_LIBRARY OFF)
+endif ()
+
+if (NOT ENABLE_ISAL_LIBRARY)
+ message(STATUS "Not using isa-l")
+ return()
+endif()
+
set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l")
-# check nasm compiler
+# The YASM and NASM assembers are somewhat mutually compatible. ISAL specifically needs NASM. If only YASM is installed, then check_language(ASM_NASM)
+# below happily finds YASM, leading to weird errors at build time. Therefore, do an explicit check for NASM here.
+find_program(NASM_PATH NAMES nasm)
+if (NOT NASM_PATH)
+ message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
+endif ()
+
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
diff --git a/contrib/libbcrypt b/contrib/libbcrypt
new file mode 160000
index 000000000000..8aa32ad94ebe
--- /dev/null
+++ b/contrib/libbcrypt
@@ -0,0 +1 @@
+Subproject commit 8aa32ad94ebe06b76853b0767c910c9fbf7ccef4
diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt
new file mode 100644
index 000000000000..d40d7f9195ee
--- /dev/null
+++ b/contrib/libbcrypt-cmake/CMakeLists.txt
@@ -0,0 +1,19 @@
+option(ENABLE_BCRYPT "Enable bcrypt" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_BCRYPT)
+ message(STATUS "Not using bcrypt")
+ return()
+endif()
+
+set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt")
+
+set(SRCS
+ "${LIBRARY_DIR}/bcrypt.c"
+ "${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c"
+ "${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c"
+ "${LIBRARY_DIR}/crypt_blowfish/wrapper.c"
+)
+
+add_library(_bcrypt ${SRCS})
+target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}")
+add_library(ch_contrib::bcrypt ALIAS _bcrypt)
diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt
index d9f7009c1bd4..fd9ed7dc182c 100644
--- a/contrib/libhdfs3-cmake/CMakeLists.txt
+++ b/contrib/libhdfs3-cmake/CMakeLists.txt
@@ -172,8 +172,10 @@ if (TARGET OpenSSL::SSL)
target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
endif()
-target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
-add_definitions(-DHADOOP_ISAL_LIBRARY)
+if (TARGET ch_contrib::isal)
+ target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
+ add_definitions(-DHADOOP_ISAL_LIBRARY)
+endif()
add_library(ch_contrib::hdfs ALIAS _hdfs3)
diff --git a/docker/images.json b/docker/images.json
index 9150abe1f1cf..b4f3e755bd1f 100644
--- a/docker/images.json
+++ b/docker/images.json
@@ -123,7 +123,8 @@
"docker/test/stateless",
"docker/test/integration/base",
"docker/test/fuzzer",
- "docker/test/keeper-jepsen"
+ "docker/test/keeper-jepsen",
+ "docker/test/server-jepsen"
]
},
"docker/test/integration/kerberized_hadoop": {
@@ -139,6 +140,10 @@
"name": "clickhouse/keeper-jepsen-test",
"dependent": []
},
+ "docker/test/server-jepsen": {
+ "name": "clickhouse/server-jepsen-test",
+ "dependent": []
+ },
"docker/test/install/deb": {
"name": "clickhouse/install-deb-test",
"dependent": []
diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile
index 59e8d2ed3d87..73da4515ff4b 100644
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.4.1.1943"
+ARG VERSION="23.4.2.11"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine
index d59a08c28052..1a5d2071f6b7 100644
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.4.1.1943"
+ARG VERSION="23.4.2.11"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu
index 390f347d549c..8792d419a165 100644
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.4.1.1943"
+ARG VERSION="23.4.2.11"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
diff --git a/docker/test/server-jepsen/Dockerfile b/docker/test/server-jepsen/Dockerfile
index 958dbfa066af..125b187aa5b9 100644
--- a/docker/test/server-jepsen/Dockerfile
+++ b/docker/test/server-jepsen/Dockerfile
@@ -16,6 +16,8 @@ ENV TESTS_TO_RUN="8"
ENV TIME_LIMIT="30"
ENV KEEPER_NODE=""
+ENV NEMESIS=""
+ENV WORKLOAD=""
# volumes
diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh
index 4a966d50f74b..4e90a74e7055 100644
--- a/docker/test/server-jepsen/run.sh
+++ b/docker/test/server-jepsen/run.sh
@@ -15,8 +15,38 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
ls -lath ||:
fi
+clickhouse_source="--clickhouse-source \'$CLICKHOUSE_PACKAGE\'"
+if [ -n "$WITH_LOCAL_BINARY" ]; then
+ clickhouse_source="--clickhouse-source /clickhouse"
+fi
+
+tests_count="--test-count \"$TESTS_TO_RUN\""
+tests_to_run="test-all"
+workload=""
+if [ -n "$WORKLOAD" ]; then
+ tests_to_run="test"
+ workload="--workload $WORKLOAD"
+ tests_count=""
+fi
+
+nemesis=""
+if [ -n "$NEMESIS" ]; then
+ nemesis="--nemesis $NEMESIS"
+fi
+
+rate=""
+if [ -n "$RATE" ]; then
+ rate="--rate $RATE"
+fi
+
+concurrency=""
+if [ -n "$CONCURRENCY" ]; then
+ concurrency="--concurrency $CONCURRENCY"
+fi
+
+
cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse"
-(lein run server test-all --keeper "$KEEPER_NODE" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 -r 50 --clickhouse-source "$CLICKHOUSE_PACKAGE" --test-count "$TESTS_TO_RUN" || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
+(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
mv store "$TEST_OUTPUT/"
diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py
index 3c1c6e2a795a..470eb61b3fad 100755
--- a/docker/test/util/process_functional_tests_result.py
+++ b/docker/test/util/process_functional_tests_result.py
@@ -80,11 +80,9 @@ def process_test_log(log_path, broken_tests):
test_results.append(
(
test_name,
- "FAIL",
+ "SKIPPED",
test_time,
- [
- "Test is expected to fail! Please, update broken_tests.txt!\n"
- ],
+ ["This test passed. Update broken_tests.txt.\n"],
)
)
else:
diff --git a/docs/changelogs/v23.4.2.11-stable.md b/docs/changelogs/v23.4.2.11-stable.md
new file mode 100644
index 000000000000..3c572b9c1cb2
--- /dev/null
+++ b/docs/changelogs/v23.4.2.11-stable.md
@@ -0,0 +1,20 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.4.2.11-stable (b6442320f9d) FIXME as compared to v23.4.1.1943-stable (3920eb987f7)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Revert "Fix GCS native copy ([#48981](https://github.com/ClickHouse/ClickHouse/issues/48981))" [#49194](https://github.com/ClickHouse/ClickHouse/pull/49194) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix race on Outdated parts loading [#49223](https://github.com/ClickHouse/ClickHouse/pull/49223) ([Alexander Tokmakov](https://github.com/tavplubix)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Implement status comment [#48468](https://github.com/ClickHouse/ClickHouse/pull/48468) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Update curl to 8.0.1 (for CVEs) [#48765](https://github.com/ClickHouse/ClickHouse/pull/48765) ([Boris Kuschel](https://github.com/bkuschel)).
+* Fallback auth gh api [#49314](https://github.com/ClickHouse/ClickHouse/pull/49314) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/docs/en/development/build.md b/docs/en/development/build.md
index e3a63da6a3e1..a55d44bdf939 100644
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@@ -22,7 +22,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
### Install Prerequisites {#install-prerequisites}
``` bash
-sudo apt-get install git cmake ccache python3 ninja-build yasm gawk
+sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk
```
### Install and Use the Clang compiler
@@ -72,7 +72,7 @@ cmake -S . -B build
cmake --build build # or: `cd build; ninja`
```
-To create an executable, run `cmake --build --target clickhouse` (or: `cd build; ninja clickhouse`).
+To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`).
This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments.
## Building on Any Linux {#how-to-build-clickhouse-on-any-linux}
@@ -92,7 +92,7 @@ If all the components are installed, you may build in the same way as the steps
Example for OpenSUSE Tumbleweed:
``` bash
-sudo zypper install git cmake ninja clang-c++ python lld yasm gawk
+sudo zypper install git cmake ninja clang-c++ python lld nasm yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build
cmake -S . -B build
@@ -103,7 +103,7 @@ Example for Fedora Rawhide:
``` bash
sudo yum update
-sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk
+sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build
cmake -S . -B build
diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md
index d5189d4b9d99..7780dee41360 100644
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@@ -439,6 +439,50 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions,
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
- `random_seed` — The seed for Bloom filter hash functions.
+Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows:
+
+```sql
+CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster]
+AS
+(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2));
+
+CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster]
+AS
+(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));
+
+CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster]
+AS
+(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions);
+
+CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster]
+AS
+(number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions))))
+
+```
+To use those functions,we need to specify two parameter at least.
+For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries:
+
+
+```sql
+--- estimate number of bits in the filter
+SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes;
+
+┌─size_of_bloom_filter_in_bytes─┐
+│ 10304 │
+└───────────────────────────────┘
+
+--- estimate number of hash functions
+SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions
+
+┌─number_of_hash_functions─┐
+│ 13 │
+└──────────────────────────┘
+
+```
+Of course, you can also use those functions to estimate parameters by other conditions.
+The functions refer to the content [here](https://hur.st/bloomfilter).
+
+
#### Token Bloom Filter
The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters.
@@ -731,7 +775,13 @@ The names given to the described entities can be found in the system tables, [sy
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
-Disks, volumes and storage policies should be declared inside the `` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory.
+Disks, volumes and storage policies should be declared inside the `` tag either in a file in the `config.d` directory.
+
+:::tip
+Disks can also be declared in the `SETTINGS` section of a query. This is useful
+for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL.
+See [dynamic storage](#dynamic-storage) for more details.
+:::
Configuration structure:
@@ -876,6 +926,87 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.
+### Dynamic Storage
+
+This example query shows how to attach a table stored at a URL and configure the
+remote storage within the query. The web storage is not configured in the ClickHouse
+configuration files; all the settings are in the CREATE/ATTACH query.
+
+:::note
+The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
+:::
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+ price UInt32,
+ date Date,
+ postcode1 LowCardinality(String),
+ postcode2 LowCardinality(String),
+ type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+ is_new UInt8,
+ duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+ addr1 String,
+ addr2 String,
+ street LowCardinality(String),
+ locality LowCardinality(String),
+ town LowCardinality(String),
+ district LowCardinality(String),
+ county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+ # highlight-start
+ SETTINGS disk = disk(
+ type=web,
+ endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+ );
+ # highlight-end
+```
+
+### Nested Dynamic Storage
+
+This example query builds on the above dynamic disk configuration and shows how to
+use a local disk to cache data from a table stored at a URL. Neither the cache disk
+nor the web storage is configured in the ClickHouse configuration files; both are
+configured in the CREATE/ATTACH query settings.
+
+In the settings highlighted below notice that the disk of `type=web` is nested within
+the disk of `type=cache`.
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+ price UInt32,
+ date Date,
+ postcode1 LowCardinality(String),
+ postcode2 LowCardinality(String),
+ type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+ is_new UInt8,
+ duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+ addr1 String,
+ addr2 String,
+ street LowCardinality(String),
+ locality LowCardinality(String),
+ town LowCardinality(String),
+ district LowCardinality(String),
+ county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+ # highlight-start
+ SETTINGS disk = disk(
+ type=cache,
+ max_size='1Gi',
+ path='/var/lib/clickhouse/custom_disk_cache/',
+ disk=disk(
+ type=web,
+ endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+ )
+ );
+ # highlight-end
+```
+
### Details {#details}
In the case of `MergeTree` tables, data is getting to disk in different ways:
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 02145a2fb6c9..113e42499fe7 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -1324,7 +1324,7 @@ The trailing slash is mandatory.
/var/lib/clickhouse/
```
-## prometheus {#server_configuration_parameters-prometheus}
+## Prometheus {#server_configuration_parameters-prometheus}
Exposing metrics data for scraping from [Prometheus](https://prometheus.io).
@@ -1339,13 +1339,25 @@ Settings:
**Example**
``` xml
-
- /metrics
- 9363
- true
- true
- true
-
+
+ 0.0.0.0
+ 8123
+ 9000
+
+
+ /metrics
+ 9363
+ true
+ true
+ true
+
+
+
+```
+
+Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
+```bash
+curl 127.0.0.1:9363/metrics
```
## query_log {#server_configuration_parameters-query-log}
diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md
index 4b1e75c25a1c..deb9a0aaeb37 100644
--- a/docs/en/operations/system-tables/clusters.md
+++ b/docs/en/operations/system-tables/clusters.md
@@ -20,6 +20,9 @@ Columns:
- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica.
- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests.
- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal.
+- `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database).
+- `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database).
+- `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown".
**Example**
@@ -47,6 +50,9 @@ default_database:
errors_count: 0
slowdowns_count: 0
estimated_recovery_time: 0
+database_shard_name:
+database_replica_name:
+is_active: NULL
Row 2:
──────
@@ -63,6 +69,9 @@ default_database:
errors_count: 0
slowdowns_count: 0
estimated_recovery_time: 0
+database_shard_name:
+database_replica_name:
+is_active: NULL
```
**See Also**
diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md
index a90fa01a45db..58cdb82d31f8 100644
--- a/docs/en/operations/system-tables/users.md
+++ b/docs/en/operations/system-tables/users.md
@@ -12,7 +12,7 @@ Columns:
- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter.
-- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password.
+- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0, 'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6, 'bcrypt_password' = 7)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://en.wikipedia.org/wiki/SHA-2)-encoded password, with [double SHA-1](https://en.wikipedia.org/wiki/SHA-1)-encoded password or with [bcrypt](https://en.wikipedia.org/wiki/Bcrypt)-encoded password.
- `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`.
diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
new file mode 100644
index 000000000000..3da9645181ee
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
@@ -0,0 +1,118 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
+sidebar_position: 300
+sidebar_label: kolmogorovSmirnovTest
+---
+
+# kolmogorovSmirnovTest
+
+Applies Kolmogorov-Smirnov's test to samples from two populations.
+
+**Syntax**
+
+``` sql
+kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
+```
+
+Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population.
+Samples must belong to continuous, one-dimensional probability distributions.
+
+**Arguments**
+
+- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Parameters**
+
+- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+ Let F(x) and G(x) be the CDFs of the first and second distributions respectively.
+ - `'two-sided'`
+ The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x.
+ And the alternative is that the distributions are not identical.
+ - `'greater'`
+ The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one,
+ e.g. the CDF of first distribution lies above and hence to the left of that for the second one.
+ Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x.
+ - `'less'`.
+ The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one,
+ e.g. the CDF of first distribution lies below and hence to the right of that for the second one.
+ Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x.
+- `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
+ - `'exact'` - calculation is performed using precise probability distribution of the test statistics. Compute intensive and wasteful except for small samples.
+ - `'asymp'` (`'asymptotic'`) - calculation is performed using an approximation. For large sample sizes, the exact and asymptotic p-values are very similar.
+ - `'auto'` - the `'exact'` method is used when a maximum number of samples is less than 10'000.
+
+
+**Returned values**
+
+[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
+
+- calculated statistic. [Float64](../../../sql-reference/data-types/float.md).
+- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Query:
+
+``` sql
+SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
+FROM
+(
+ SELECT
+ randNormal(0, 10) AS value,
+ 0 AS num
+ FROM numbers(10000)
+ UNION ALL
+ SELECT
+ randNormal(0, 10) AS value,
+ 1 AS num
+ FROM numbers(10000)
+)
+```
+
+Result:
+
+``` text
+┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
+│ (0.009899999999999996,0.37528595205132287) │
+└────────────────────────────────────────────────────┘
+```
+
+Note:
+P-value is bigger than 0.05 (for confidence level of 95%), so null hypothesis is not rejected.
+
+
+Query:
+
+``` sql
+SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
+FROM
+(
+ SELECT
+ randStudentT(10) AS value,
+ 0 AS num
+ FROM numbers(100)
+ UNION ALL
+ SELECT
+ randNormal(0, 10) AS value,
+ 1 AS num
+ FROM numbers(100)
+)
+```
+
+Result:
+
+``` text
+┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
+│ (0.4100000000000002,6.61735760482795e-8) │
+└─────────────────────────────────────────────────────────┘
+```
+
+Note:
+P-value is less than 0.05 (for confidence level of 95%), so null hypothesis is rejected.
+
+
+**See Also**
+
+- [Kolmogorov-Smirnov'test](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test)
diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md
index c61a3069db66..2ad8ac4bb239 100644
--- a/docs/en/sql-reference/data-types/index.md
+++ b/docs/en/sql-reference/data-types/index.md
@@ -27,7 +27,7 @@ ClickHouse data types include:
- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results
- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell
- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type.
-- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
+- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type)
- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses
- **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon`
- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md)
\ No newline at end of file
diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md
index 230b4af7960b..28180f7f9919 100644
--- a/docs/en/sql-reference/data-types/nullable.md
+++ b/docs/en/sql-reference/data-types/nullable.md
@@ -8,7 +8,7 @@ sidebar_label: Nullable
Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
-For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
+For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
A `Nullable` type field can’t be included in table indexes.
diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index 48a8ce45d332..189673cdae75 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -1658,6 +1658,7 @@ Example of settings:
testdictionary_source
+ ssl=true
```
@@ -1672,6 +1673,7 @@ SOURCE(MONGODB(
password ''
db 'test'
collection 'dictionary_source'
+ options 'ssl=true'
))
```
@@ -1683,6 +1685,8 @@ Setting fields:
- `password` – Password of the MongoDB user.
- `db` – Name of the database.
- `collection` – Name of the collection.
+- `options` - MongoDB connection string options (optional parameter).
+
### Redis
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index 3548ef7cc071..d168be63c36c 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -32,9 +32,12 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`
- `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
- `IDENTIFIED WITH double_sha1_hash BY 'hash'`
+- `IDENTIFIED WITH bcrypt_password BY 'qwerty'`
+- `IDENTIFIED WITH bcrypt_hash BY 'hash'`
- `IDENTIFIED WITH ldap SERVER 'server_name'`
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
+- `IDENTIFIED BY 'qwerty'`
## Examples
@@ -54,21 +57,12 @@ There are multiple ways of user identification:
The password is stored in a SQL text file in `/var/lib/clickhouse/access`, so it's not a good idea to use `plaintext_password`. Try `sha256_password` instead, as demonstrated next...
:::
-3. The best option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
+3. The most common option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
```sql
CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
```
- Notice ClickHouse generates and runs the following command for you:
-
- ```response
- CREATE USER name3
- IDENTIFIED WITH sha256_hash
- BY '8B3404953FCAA509540617F082DB13B3E0734F90FF6365C19300CC6A6EA818D6'
- SALT 'D6489D8B5692D82FF944EA6415785A8A8A1AF33825456AFC554487725A74A609'
- ```
-
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
```bash
@@ -92,6 +86,34 @@ There are multiple ways of user identification:
CREATE USER name4 IDENTIFIED WITH double_sha1_hash BY 'CCD3A959D6A004B9C3807B728BC2E55B67E10518'
```
+5. The `bcrypt_password` is the most secure option for storing passwords. It uses the [bcrypt](https://en.wikipedia.org/wiki/Bcrypt) algorithm, which is resilient against brute force attacks even if the password hash is compromised.
+
+ ```sql
+ CREATE USER name5 IDENTIFIED WITH bcrypt_password BY 'my_password'
+ ```
+
+ The length of the password is limited to 72 characters with this method. The bcrypt work factor parameter, which defines the amount of computations and time needed to compute the hash and verify the password, can be modified in the server configuration:
+
+ ```xml
+ 12
+ ```
+
+ The work factor must be between 4 and 31, with a default value of 12.
+
+6. The type of the password can also be omitted:
+
+ ```sql
+ CREATE USER name6 IDENTIFIED BY 'my_password'
+ ```
+
+ In this case, ClickHouse will use the default password type specified in the server configuration:
+
+ ```xml
+ sha256_password
+ ```
+
+ The available password types are: `plaintext_password`, `sha256_password`, `double_sha1_password`.
+
## User Host
User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways:
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index 5a5a771f2393..c5596b7ba5f6 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -76,7 +76,7 @@ Resets the mark cache.
## DROP REPLICA
-Dead replicas can be dropped using following syntax:
+Dead replicas of `ReplicatedMergeTree` tables can be dropped using following syntax:
``` sql
SYSTEM DROP REPLICA 'replica_name' FROM TABLE database.table;
@@ -85,13 +85,25 @@ SYSTEM DROP REPLICA 'replica_name';
SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
```
-Queries will remove the replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
+Queries will remove the `ReplicatedMergeTree` replica path in ZooKeeper. It is useful when the replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it cannot drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
The first one removes metadata of `'replica_name'` replica of `database.table` table.
The second one does the same for all replicated tables in the database.
The third one does the same for all replicated tables on the local server.
The fourth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation.
+## DROP DATABASE REPLICA
+
+Dead replicas of `Replicated` databases can be dropped using following syntax:
+
+``` sql
+SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'] FROM DATABASE database;
+SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'];
+SYSTEM DROP DATABASE REPLICA 'replica_name' [FROM SHARD 'shard_name'] FROM ZKPATH '/path/to/table/in/zk';
+```
+
+Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica path from ZooKeeper when there's no database to run `DROP DATABASE`. Please note that it does not remove `ReplicatedMergeTree` replicas (so you may need `SYSTEM DROP REPLICA` as well). Shard and replica names are the names that were specified in `Replicated` engine arguments when creating the database. Also, these names can be obtained from `database_shard_name` and `database_replica_name` columns in `system.clusters`. If the `FROM SHARD` clause is missing, then `replica_name` must be a full replica name in `shard_name|replica_name` format.
+
## DROP UNCOMPRESSED CACHE
Reset the uncompressed data cache.
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
new file mode 100644
index 000000000000..2f8c6bb6760c
--- /dev/null
+++ b/docs/ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md
@@ -0,0 +1,117 @@
+---
+slug: /ru/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest
+sidebar_position: 300
+sidebar_label: kolmogorovSmirnovTest
+---
+
+# kolmogorovSmirnovTest {#kolmogorovSmirnovTest}
+
+Проводит статистический тест Колмогорова-Смирнова для двух независимых выборок.
+
+**Синтаксис**
+
+``` sql
+kolmogorovSmirnovTest([alternative, computation_method])(sample_data, sample_index)
+```
+
+Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке.
+Выборки должны принадлежать непрерывным одномерным распределениям.
+
+**Аргументы**
+
+- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md).
+- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md).
+
+**Параметры**
+
+- `alternative` — альтернативная гипотеза (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md).
+ Пусть F(x) и G(x) - функции распределения первой и второй выборки соотвественно.
+ - `'two-sided'`
+ Нулевая гипотеза состоит в том, что выборки происходит из одного и того же распределение, то есть F(x) = G(x) для любого x.
+ Альтернатива - выборки принадлежат разным распределениям.
+ - `'greater'`
+ Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное меньше элементов из второй выборки,
+ то есть функция распределения первой выборки лежит выше и соотвественно левее, чем функция распределения второй выборки.
+ Таким образом это означает, что F(x) >= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) < G(x) хотя бы для одного x.
+ - `'less'`.
+ Нулевая гипотеза состоит в том, что элементы первой выборки в асимптотически почти наверное больше элементов из второй выборки,
+ то есть функция распределения первой выборки лежит ниже и соотвественно правее, чем функция распределения второй выборки.
+ Таким образом это означает, что F(x) <= G(x) for любого x, а альтернатива в этом случае состоит в том, что F(x) > G(x) хотя бы для одного x.
+- `computation_method` — метод, используемый для вычисления p-value. (Необязательный параметр, по умолчанию: `'auto'`.) [String](../../../sql-reference/data-types/string.md).
+ - `'exact'` - вычисление производится с помощью вычисления точного распределения статистики. Требует большого количества вычислительных ресурсов и расточительно для больших выборок.
+ - `'asymp'`(`'asymptotic'`) - используется приближенное вычисление. Для больших выборок приближенный результат и точный почти идентичны.
+ - `'auto'` - значение вычисляется точно (с помощью метода `'exact'`), если максимальный размер двух выборок не превышает 10'000.
+
+**Возвращаемые значения**
+
+[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
+
+- вычисленное статистики. [Float64](../../../sql-reference/data-types/float.md).
+- вычисленное p-value. [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT kolmogorovSmirnovTest('less', 'exact')(value, num)
+FROM
+(
+ SELECT
+ randNormal(0, 10) AS value,
+ 0 AS num
+ FROM numbers(10000)
+ UNION ALL
+ SELECT
+ randNormal(0, 10) AS value,
+ 1 AS num
+ FROM numbers(10000)
+)
+```
+
+Результат:
+
+``` text
+┌─kolmogorovSmirnovTest('less', 'exact')(value, num)─┐
+│ (0.009899999999999996,0.37528595205132287) │
+└────────────────────────────────────────────────────┘
+```
+
+Заметки:
+P-value больше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза не отвергается.
+
+
+Запрос:
+
+``` sql
+SELECT kolmogorovSmirnovTest('two-sided', 'exact')(value, num)
+FROM
+(
+ SELECT
+ randStudentT(10) AS value,
+ 0 AS num
+ FROM numbers(100)
+ UNION ALL
+ SELECT
+ randNormal(0, 10) AS value,
+ 1 AS num
+ FROM numbers(100)
+)
+```
+
+Результат:
+
+``` text
+┌─kolmogorovSmirnovTest('two-sided', 'exact')(value, num)─┐
+│ (0.4100000000000002,6.61735760482795e-8) │
+└─────────────────────────────────────────────────────────┘
+```
+
+Заметки:
+P-value меньше чем 0.05 (для уровня значимости 95%), то есть нулевая гипотеза отвергается.
+
+
+**Смотрите также**
+
+- [Критерий согласия Колмогорова-Смирнова](https://ru.wikipedia.org/wiki/%D0%9A%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D1%81%D0%BE%D0%B3%D0%BB%D0%B0%D1%81%D0%B8%D1%8F_%D0%9A%D0%BE%D0%BB%D0%BC%D0%BE%D0%B3%D0%BE%D1%80%D0%BE%D0%B2%D0%B0)
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 5768e744f94e..8925f50fe973 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -26,12 +26,13 @@
#include
#include
#include
+#include
#include
#include
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -133,6 +134,11 @@ void LocalServer::initialize(Poco::Util::Application & self)
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
+
+ OutdatedPartsLoadingThreadPool::initialize(
+ config().getUInt("max_outdated_parts_loading_thread_pool_size", 16),
+ 0, // We don't need any threads one all the parts will be loaded
+ config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000));
}
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 8c0d50bae55c..bbd536d93004 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -41,10 +41,9 @@
#include
#include
#include
-#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -778,6 +777,11 @@ try
server_settings.max_backups_io_thread_pool_free_size,
server_settings.backups_io_thread_pool_queue_size);
+ OutdatedPartsLoadingThreadPool::initialize(
+ server_settings.max_outdated_parts_loading_thread_pool_size,
+ 0, // We don't need any threads one all the parts will be loaded
+ server_settings.outdated_part_loading_thread_pool_queue_size);
+
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))
{
@@ -1852,7 +1856,7 @@ try
LOG_INFO(log, "Closed all listening sockets.");
/// Killing remaining queries.
- if (server_settings.shutdown_wait_unfinished_queries)
+ if (!server_settings.shutdown_wait_unfinished_queries)
global_context->getProcessList().killAllQueries();
if (current_connections)
diff --git a/programs/server/config.xml b/programs/server/config.xml
index 1aeda624db2d..51aa04ba0e5d 100644
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@@ -476,6 +476,14 @@
11
+
+ sha256_password
+
+
+ 12
+
\n"
+ f"This is an automated comment for commit {pr_info.sha} with "
+ f"description of existing statuses. It's updated for the latest CI running\n"
+ f"The full report is available [here]({report_url})\n"
+ f"{worst_state}\n\n
"
+ "
Check name
Description
Status
\n"
+ ""
+ )
+ # group checks by the name to get the worst one per each
+ grouped_statuses = {} # type: Dict[CheckDescription, CommitStatuses]
+ for status in statuses:
+ cd = None
+ for c in CHECK_DESCRIPTIONS:
+ if c.match_func(status.context):
+ cd = c
+ break
+
+ if cd is None or cd == CHECK_DESCRIPTIONS[-1]:
+ # This is the case for either non-found description or a fallback
+ cd = CheckDescription(
+ status.context,
+ CHECK_DESCRIPTIONS[-1].description,
+ CHECK_DESCRIPTIONS[-1].match_func,
+ )
+
+ if cd in grouped_statuses:
+ grouped_statuses[cd].append(status)
+ else:
+ grouped_statuses[cd] = [status]
+
+ table_rows = [] # type: List[str]
+ for desc, gs in grouped_statuses.items():
+ table_rows.append(
+ f"